├── .github └── workflows │ └── build.yml ├── .gitignore ├── .isort.cfg ├── .pre-commit-config.yaml ├── LICENSE ├── README.rst ├── aclimatise ├── __init__.py ├── cli.py ├── cli_types.py ├── converter │ ├── __init__.py │ ├── cwl.py │ ├── janis.py │ ├── wdl.py │ └── yml.py ├── execution │ ├── __init__.py │ ├── docker.py │ ├── help.py │ ├── local.py │ └── man.py ├── flag_parser │ ├── __init__.py │ ├── elements.py │ └── parser.py ├── integration.py ├── model.py ├── name_generation.py ├── nlp.py ├── parser.py ├── usage_parser │ ├── __init__.py │ ├── elements.py │ ├── model.py │ └── parser.py └── yaml.py ├── docs ├── Makefile ├── README.md ├── _static │ └── railroad.html ├── api.rst ├── changes.rst ├── cli.rst ├── conf.py ├── grammar.rst ├── index.rst ├── installation.rst ├── make.bat └── model.rst ├── environment.yml ├── setup.cfg ├── setup.py └── test ├── __init__.py ├── conftest.py ├── executors ├── __init__.py ├── test_docker.py ├── test_local.py └── test_man.py ├── flags ├── conftest.py ├── test_bedtools.py ├── test_bwa.py ├── test_bwakit.py ├── test_gth.py ├── test_htseq.py ├── test_pisces.py ├── test_podchecker.py ├── test_samtools.py └── test_singularity.py ├── name_generation ├── __init__.py ├── conftest.py ├── test_batch.py ├── test_case.py ├── test_description.py ├── test_group.py └── test_single_flag.py ├── test_cli.py ├── test_convert.py ├── test_data ├── bedtools.txt ├── bedtools │ ├── bedtools.yml │ ├── bedtools_annotate.yml │ ├── bedtools_bamtobed.yml │ ├── bedtools_bamtofastq.yml │ ├── bedtools_bed12tobed6.yml │ ├── bedtools_bedpetobam.yml │ ├── bedtools_bedtobam.yml │ ├── bedtools_closest.yml │ ├── bedtools_cluster.yml │ ├── bedtools_complement.yml │ ├── bedtools_coverage.yml │ ├── bedtools_expand.yml │ ├── bedtools_fisher.yml │ ├── bedtools_flank.yml │ ├── bedtools_genomecov.yml │ ├── bedtools_getfasta.yml │ ├── bedtools_groupby.yml │ ├── bedtools_igv.yml │ ├── bedtools_intersect.yml │ ├── bedtools_jaccard.yml │ ├── bedtools_links.yml │ ├── bedtools_makewindows.yml │ ├── bedtools_map.yml │ ├── bedtools_maskfasta.yml │ ├── bedtools_merge.yml │ ├── bedtools_multicov.yml │ ├── bedtools_multiinter.yml │ ├── bedtools_nuc.yml │ ├── bedtools_overlap.yml │ ├── bedtools_pairtobed.yml │ ├── bedtools_pairtopair.yml │ ├── bedtools_random.yml │ ├── bedtools_reldist.yml │ ├── bedtools_sample.yml │ ├── bedtools_shift.yml │ ├── bedtools_shuffle.yml │ ├── bedtools_slop.yml │ ├── bedtools_sort.yml │ ├── bedtools_spacing.yml │ ├── bedtools_split.yml │ ├── bedtools_subtract.yml │ ├── bedtools_tag.yml │ ├── bedtools_unionbedg.yml │ └── bedtools_window.yml ├── bedtools_closest.txt ├── bedtools_coverage.txt ├── bedtools_multiinter.txt ├── bedtools_random.txt ├── bedtools_spacing.txt ├── bedtools_subtract.txt ├── bedtools_window.txt ├── bowtie2_build.txt ├── bwa.txt ├── bwa_bwt2sa.txt ├── bwa_bwtupdate.txt ├── bwa_index.txt ├── bwa_mem.txt ├── dinosaur.txt ├── gth.txt ├── htseq_count.txt ├── mauve.txt ├── pisces.txt ├── podchecker.txt ├── samtools.txt ├── samtools │ ├── samtools.yml │ ├── samtools_addreplacerg.yml │ ├── samtools_bedcov.yml │ ├── samtools_calmd.yml │ ├── samtools_cat.yml │ ├── samtools_collate.yml │ ├── samtools_depad.yml │ ├── samtools_depth.yml │ ├── samtools_dict.yml │ ├── samtools_faidx.yml │ ├── samtools_fasta.yml │ ├── samtools_fastq.yml │ ├── samtools_fixmate.yml │ ├── samtools_flags.yml │ ├── samtools_flagstat.yml │ ├── samtools_fqidx.yml │ ├── samtools_idxstats.yml │ ├── samtools_index.yml │ ├── samtools_markdup.yml │ ├── samtools_merge.yml │ ├── samtools_mpileup.yml │ ├── samtools_phase.yml │ ├── samtools_quickcheck.yml │ ├── samtools_reheader.yml │ ├── samtools_sort.yml │ ├── samtools_split.yml │ ├── samtools_stats.yml │ ├── samtools_targetcut.yml │ ├── samtools_tview.yml │ └── samtools_view.yml ├── samtools_bedcov.txt ├── samtools_dict.txt ├── samtools_pl.txt ├── samtools_quickcheck.txt └── typeHLA.txt ├── test_explore_e2e.py ├── test_model.py ├── test_parse_e2e.py ├── test_type_inference.py ├── test_yaml_dump.py ├── usage └── test_usage.py └── util.py /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | test_conda: 7 | # Run tests using conda, which gives us bioinformatics tools installed 8 | runs-on: ubuntu-latest 9 | container: continuumio/miniconda3 10 | steps: 11 | - uses: actions/checkout@v2 12 | 13 | - name: apt dependencies, and fix the `man` pages 14 | run: | 15 | rm /etc/dpkg/dpkg.cfg.d/docker 16 | apt-get update && apt install -y --reinstall man coreutils manpages build-essential git git-man 17 | mandb --create 18 | 19 | - uses: actions/setup-dotnet@v1.7.2 20 | with: 21 | dotnet-version: "2.1.x" 22 | 23 | - name: Tests 24 | run: | 25 | source /root/.bashrc 26 | conda env create 27 | conda activate aclimatise-test 28 | pip install --upgrade pip wheel setuptools 29 | pip install -e .[dev] 30 | python -m spacy download en 31 | pytest --tb=native --log-cli-level INFO 32 | shell: bash 33 | 34 | test_system: 35 | runs-on: ubuntu-latest 36 | 37 | strategy: 38 | matrix: 39 | python-version: [3.6, 3.7, 3.8] 40 | 41 | steps: 42 | - uses: actions/checkout@v2 43 | 44 | - name: Set up Python ${{ matrix.python-version }} 45 | uses: actions/setup-python@v1 46 | with: 47 | python-version: ${{ matrix.python-version }} 48 | 49 | - name: Install dependencies 50 | run: | 51 | pip install --upgrade pip wheel setuptools 52 | pip install -e .[dev] 53 | python -m spacy download en 54 | 55 | - uses: pre-commit/action@v1.0.1 56 | 57 | - name: Test with pytest 58 | run: | 59 | pytest --tb=native 60 | 61 | - name: Test the documentation still builds 62 | run: | 63 | cd docs 64 | make html 65 | 66 | publish: 67 | if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') 68 | needs: [test_system, test_conda] 69 | runs-on: ubuntu-latest 70 | steps: 71 | - uses: actions/checkout@v2 72 | 73 | - name: Set up Python ${{ matrix.python-version }} 74 | uses: actions/setup-python@v1 75 | with: 76 | python-version: 3.8 77 | 78 | - name: Compile package 79 | run: | 80 | pip install -U wheel setuptools 81 | pip install -e .[dev] 82 | python -m spacy download en 83 | python setup.py sdist bdist_wheel 84 | 85 | - name: Create GitHub Release 86 | id: create_release 87 | uses: actions/create-release@v1 88 | env: 89 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 90 | with: 91 | tag_name: ${{ github.ref }} 92 | release_name: Release ${{ github.ref }} 93 | draft: false 94 | prerelease: false 95 | 96 | - name: Publish package 97 | uses: pypa/gh-action-pypi-publish@master 98 | with: 99 | user: __token__ 100 | password: ${{ secrets.pypi_password }} 101 | 102 | - name: Generate railroad diagram 103 | run: | 104 | # We need an unpublished version of PyParsing for this 105 | pip install -U git+https://github.com/pyparsing/pyparsing.git#egg=pyparsing[diagrams] 106 | aclimatise railroad > docs/_static/railroad.html 107 | 108 | - name: Build the documentation 109 | run: | 110 | cd docs 111 | make html 112 | 113 | - name: Deploy docs 114 | uses: peaceiris/actions-gh-pages@v3 115 | with: 116 | github_token: ${{ secrets.GITHUB_TOKEN }} 117 | publish_dir: docs/_build/html 118 | publish_branch: gh-pages 119 | enable_jekyll: false 120 | 121 | - name: Update Basecamp 122 | uses: peter-evans/repository-dispatch@v1 123 | with: 124 | token: ${{ secrets.REPOACCESSTOKEN }} 125 | event-type: aclimatise-update 126 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # celery beat schedule file 88 | celerybeat-schedule 89 | 90 | # SageMath parsed files 91 | *.sage.py 92 | 93 | # Environments 94 | .env 95 | .venv 96 | env/ 97 | venv/ 98 | ENV/ 99 | env.bak/ 100 | venv.bak/ 101 | 102 | # Spyder project settings 103 | .spyderproject 104 | .spyproject 105 | 106 | # Rope project settings 107 | .ropeproject 108 | 109 | # mkdocs documentation 110 | /site 111 | 112 | # mypy 113 | .mypy_cache/ 114 | .dmypy.json 115 | dmypy.json 116 | 117 | # Pyre type checker 118 | .pyre/ 119 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | known_third_party = WDL,click,cwl_utils,cwltool,inflection,pkg_resources,pyhash,pyparsing,pytest,ruamel,setuptools,spacy,wdlgen,wordsegment,regex,num2words,word2number,psutil,packaging,docker,attr 3 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/timothycrosley/isort 3 | rev: 4.3.21-2 4 | hooks: 5 | - id: isort 6 | types: [python] 7 | args: 8 | - "--multi-line=3" 9 | - "--trailing-comma" 10 | - "--force-grid-wrap=0" 11 | - "--use-parentheses" 12 | - "--line-width=88" 13 | 14 | - repo: https://github.com/psf/black 15 | rev: 20.8b1 16 | hooks: 17 | - id: black 18 | 19 | - repo: https://github.com/pre-commit/mirrors-prettier 20 | rev: v2.1.2 21 | hooks: 22 | - id: prettier 23 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | aCLImatise 2 | *********** 3 | |DOI| 4 | 5 | .. |DOI| image:: https://zenodo.org/badge/DOI/10.1093/bioinformatics/btaa1033.svg 6 | :target: https://doi.org/10.1093/bioinformatics/btaa1033 7 | 8 | For the full documentation, refer to the `Github Pages Website 9 | `_. 10 | 11 | ====================================================================== 12 | 13 | aCLImatise is a Python library and command-line utility for parsing the help output 14 | of a command-line tool and then outputting a description of the tool in a more 15 | structured format, for example a 16 | `Common Workflow Language tool definition `_. 17 | 18 | Currently aCLImatise supports both `CWL `_ and 19 | `WDL `_ outputs, but other formats will be considered in the future, especially pull 20 | requests to support them. 21 | 22 | Please also refer to `The aCLImatise Base Camp `_, which is a database of pre-computed tool definitions 23 | generated by the aCLImatise parser. Most bioinformatics tools have a tool definition already generated in the Base Camp, 24 | so you may not need to run aCLImatise directly. 25 | 26 | aCLImatise is now published in the journal *Bioinformatics*. You can read the application note here: https://doi.org/10.1093/bioinformatics/btaa1033. 27 | To cite aCLImatise, please use the citation generator provided by the journal. 28 | 29 | Example 30 | ------- 31 | 32 | Lets say you want to create a CWL workflow containing the common Unix ``wc`` (word count) 33 | utility. Running ``wc --help`` returns: 34 | 35 | .. code-block:: 36 | 37 | Usage: wc [OPTION]... [FILE]... 38 | or: wc [OPTION]... --files0-from=F 39 | Print newline, word, and byte counts for each FILE, and a total line if 40 | more than one FILE is specified. A word is a non-zero-length sequence of 41 | characters delimited by white space. 42 | 43 | With no FILE, or when FILE is -, read standard input. 44 | 45 | The options below may be used to select which counts are printed, always in 46 | the following order: newline, word, character, byte, maximum line length. 47 | -c, --bytes print the byte counts 48 | -m, --chars print the character counts 49 | -l, --lines print the newline counts 50 | --files0-from=F read input from the files specified by 51 | NUL-terminated names in file F; 52 | If F is - then read names from standard input 53 | -L, --max-line-length print the maximum display width 54 | -w, --words print the word counts 55 | --help display this help and exit 56 | --version output version information and exit 57 | 58 | GNU coreutils online help: 59 | Full documentation at: 60 | or available locally via: info '(coreutils) wc invocation' 61 | 62 | If you run ``aclimatise explore wc``, which means "parse the wc command and all subcommands", 63 | you'll end up with the following files in your current directory: 64 | 65 | * ``wc.cwl`` 66 | * ``wc.wdl`` 67 | * ``wc.yml`` 68 | 69 | These are representations of the command ``wc`` in 3 different formats. If you look at ``wc.wdl``, you'll see that it 70 | contains a WDL-compatible tool definition for ``wc``: 71 | 72 | .. code-block:: text 73 | 74 | version 1.0 75 | task Wc { 76 | input { 77 | Boolean bytes 78 | Boolean chars 79 | Boolean lines 80 | String files__from 81 | Boolean max_line_length 82 | Boolean words 83 | } 84 | command <<< 85 | wc \ 86 | ~{true="--bytes" false="" bytes} \ 87 | ~{true="--chars" false="" chars} \ 88 | ~{true="--lines" false="" lines} \ 89 | ~{if defined(files__from) then ("--files0-from " + '"' + files__from + '"') else ""} \ 90 | ~{true="--max-line-length" false="" max_line_length} \ 91 | ~{true="--words" false="" words} 92 | >>> 93 | } 94 | -------------------------------------------------------------------------------- /aclimatise/__init__.py: -------------------------------------------------------------------------------- 1 | import typing 2 | 3 | from aclimatise.converter import WrapperGenerator 4 | from aclimatise.converter.cwl import CwlGenerator 5 | from aclimatise.converter.janis import JanisGenerator 6 | from aclimatise.converter.wdl import WdlGenerator 7 | from aclimatise.converter.yml import YmlGenerator 8 | from aclimatise.execution import Executor 9 | from aclimatise.execution.docker import DockerExecutor 10 | from aclimatise.execution.local import LocalExecutor 11 | from aclimatise.execution.man import ManPageExecutor 12 | from aclimatise.integration import parse_help 13 | from aclimatise.model import Command, Flag 14 | from deprecated import deprecated 15 | 16 | default_executor = LocalExecutor() 17 | 18 | 19 | @deprecated( 20 | reason="Please use the explore method on the executors directly. e.g. `LocalExecutor().explore()`" 21 | ) 22 | def explore_command( 23 | cmd: typing.List[str], 24 | flags: typing.Iterable[str] = (["--help"], ["-h"], [], ["--usage"]), 25 | parent: typing.Optional[Command] = None, 26 | max_depth: int = 2, 27 | try_subcommand_flags=True, 28 | executor: Executor = default_executor, 29 | ) -> typing.Optional[Command]: 30 | """ 31 | Given a command to start with, builds a model of this command and all its subcommands (if they exist). 32 | Use this if you know the command you want to parse, you don't know which flags it responds to with help text, and 33 | you want to include subcommands. 34 | """ 35 | return executor.explore(cmd, max_depth=max_depth, parent=parent) 36 | 37 | 38 | __all__ = [ 39 | CwlGenerator, 40 | WdlGenerator, 41 | YmlGenerator, 42 | JanisGenerator, 43 | LocalExecutor, 44 | DockerExecutor, 45 | ManPageExecutor, 46 | explore_command, 47 | parse_help, 48 | ] 49 | -------------------------------------------------------------------------------- /aclimatise/cli.py: -------------------------------------------------------------------------------- 1 | """ 2 | Code relating to the command line interface to aCLImatise 3 | """ 4 | import sys 5 | from pathlib import Path 6 | from typing import Iterable, Tuple 7 | 8 | import click 9 | 10 | from aclimatise import WrapperGenerator, explore_command, parse_help 11 | from aclimatise.execution.local import LocalExecutor 12 | from aclimatise.execution.man import ManPageExecutor 13 | from aclimatise.flag_parser.parser import CliParser 14 | 15 | # Some common options 16 | opt_generate_names = click.option( 17 | "--generate-names", 18 | "-g", 19 | is_flag=True, 20 | help=( 21 | "Rather than using the long flag to generate the argument name, generate them automatically using the " 22 | "flag description. Generally helpful if there are no long flags, only short flags." 23 | ), 24 | ) 25 | opt_case = click.option( 26 | "--case", 27 | "-c", 28 | type=click.Choice(WrapperGenerator.cases), 29 | help=( 30 | "Which case to use for variable names. If not set, defaults to the language defaults: snake_case for CWL" 31 | " and snake_case for WDL" 32 | ), 33 | default="snake", 34 | ) 35 | opt_cmd = click.argument("cmd", nargs=-1, required=True) 36 | 37 | 38 | @click.group() 39 | def main(): 40 | pass 41 | 42 | 43 | @main.command(help="Run an executable and explore all subcommands") 44 | @opt_cmd 45 | @opt_case 46 | @opt_generate_names 47 | @click.option( 48 | "--man", 49 | "-m", 50 | is_flag=True, 51 | help="Parse the help using its man page, rather than by executing the command. This will fail if the man page doesn't exist", 52 | ) 53 | @click.option( 54 | "--depth", 55 | "-d", 56 | type=int, 57 | default=1, 58 | help="How many levels of subcommands we should look for. Depth 2 means commands can be 3 levels deep, such as " 59 | "``git submodule foreach``", 60 | ) 61 | @click.option( 62 | "--format", 63 | "-f", 64 | "formats", 65 | type=click.Choice(["wdl", "cwl", "yml"]), 66 | multiple=True, 67 | default=("yml", "wdl", "cwl"), 68 | help="The language in which to output the CLI wrapper", 69 | ) 70 | @click.option( 71 | "--out-dir", 72 | "-o", 73 | type=Path, 74 | help="Directory in which to put the output files", 75 | default=Path(), 76 | ) 77 | @click.option( 78 | "--help-flag", 79 | "-l", 80 | type=str, 81 | help="Flag to append to the end of the command to make it output help text", 82 | ) 83 | @click.option( 84 | "--subcommands/--no-subcommands", default=True, help="Look for subcommands" 85 | ) 86 | def explore( 87 | cmd: Iterable[str], 88 | out_dir: Path, 89 | formats: Tuple[str], 90 | subcommands: bool, 91 | case: str, 92 | generate_names: bool, 93 | man: bool, 94 | help_flag: str, 95 | depth: int = None, 96 | ): 97 | # We only support these two executors via CLI because the docker executor would require some additional config 98 | if man: 99 | exec = ManPageExecutor() 100 | else: 101 | kwargs = {} 102 | if help_flag is not None: 103 | kwargs["flags"] = [[help_flag]] 104 | exec = LocalExecutor(**kwargs) 105 | 106 | if subcommands: 107 | command = exec.explore(list(cmd), max_depth=depth) 108 | else: 109 | command = exec.convert(list(cmd)) 110 | 111 | for format in formats: 112 | converter_cls = WrapperGenerator.choose_converter(format) 113 | converter = converter_cls( 114 | generate_names=generate_names, 115 | case=case, 116 | ) 117 | list(converter.generate_tree(command, out_dir)) 118 | 119 | 120 | @main.command( 121 | help="Read a command help from stdin and output a tool definition to stdout" 122 | ) 123 | @opt_cmd 124 | @opt_generate_names 125 | @opt_case 126 | @click.option( 127 | "--format", 128 | "-f", 129 | type=click.Choice(["wdl", "cwl", "yml", "janis"]), 130 | default="cwl", 131 | help="The language in which to output the CLI wrapper", 132 | ) 133 | def pipe(cmd, generate_names, case, format): 134 | stdin = "".join(sys.stdin.readlines()) 135 | command = parse_help(cmd, stdin) 136 | 137 | converter_cls = WrapperGenerator.choose_converter(format) 138 | converter = converter_cls( 139 | generate_names=generate_names, 140 | case=case, 141 | ) 142 | output = converter.save_to_string(command) 143 | print(output) 144 | 145 | 146 | @main.command(help="Output a representation of the internal grammar") 147 | def railroad(): 148 | try: 149 | from pyparsing.diagram import to_railroad, railroad_to_html 150 | 151 | parser = CliParser() 152 | railroad = to_railroad(parser.flags) 153 | sys.stdout.write(railroad_to_html(railroad)) 154 | except ImportError: 155 | print( 156 | "You need PyParsing 3.0.0a2 or greater to use this feature", file=sys.stderr 157 | ) 158 | sys.exit(1) 159 | 160 | 161 | if __name__ == "__main__": 162 | main() 163 | -------------------------------------------------------------------------------- /aclimatise/cli_types.py: -------------------------------------------------------------------------------- 1 | """ 2 | Contains the objects that represent a "type" of data a flag argument might store 3 | """ 4 | import typing 5 | from enum import Enum 6 | 7 | import attr 8 | 9 | from aclimatise.yaml import AttrYamlMixin 10 | 11 | 12 | @attr.s(auto_attribs=True, frozen=True) 13 | class CliType(AttrYamlMixin): 14 | """ 15 | A data type used in the command-line 16 | """ 17 | 18 | @staticmethod 19 | def lowest_common_type(types: typing.Iterable["CliType"]) -> "CliType": 20 | type_set: typing.Set[typing.Type[CliType]] = {type(t) for t in types} 21 | 22 | if len(type_set) == 1: 23 | # If there is only one type, use it 24 | return next(iter(types)) 25 | 26 | if len(type_set) == 2 and CliInteger in type_set and CliFloat in type_set: 27 | # If they're all numeric, they can be represented as floats 28 | return CliFloat() 29 | 30 | if { 31 | CliDir, 32 | CliDict, 33 | CliFile, 34 | CliTuple, 35 | CliList, 36 | } & type_set: 37 | # These complex types cannot be represented in a simpler way 38 | raise Exception( 39 | "There is no common type between {}".format( 40 | ", ".join([str(typ) for typ in type_set]) 41 | ) 42 | ) 43 | 44 | else: 45 | # Most of the time, strings can be used to represent primitive types 46 | return CliString() 47 | 48 | @property 49 | def representable(self) -> set: 50 | """ 51 | Returns a set of types that this type could alternatively be represented as. 52 | Adds the class's own type to the _representable set 53 | """ 54 | return self._representable.union({type(self)}) 55 | 56 | # The list of types that this specific type could be representable as 57 | _representable = set() 58 | 59 | 60 | @attr.s(auto_attribs=True, frozen=True) 61 | class CliEnum(CliType): 62 | """ 63 | One of a list of possible options 64 | """ 65 | 66 | enum: Enum 67 | """ 68 | The possible options as a Python Enum 69 | """ 70 | 71 | 72 | @attr.s(auto_attribs=True, frozen=True) 73 | class CliFloat(CliType): 74 | """ 75 | Takes a floating-point value 76 | """ 77 | 78 | pass 79 | 80 | 81 | @attr.s(auto_attribs=True, frozen=True) 82 | class CliInteger(CliType): 83 | """ 84 | Takes an integer value 85 | """ 86 | 87 | _representable = {CliFloat} 88 | 89 | 90 | @attr.s(auto_attribs=True, frozen=True) 91 | class CliString(CliType): 92 | """ 93 | Takes a string value 94 | """ 95 | 96 | pass 97 | 98 | 99 | @attr.s(auto_attribs=True, frozen=True) 100 | class CliBoolean(CliType): 101 | """ 102 | Takes a boolean value 103 | """ 104 | 105 | pass 106 | 107 | 108 | @attr.s(auto_attribs=True, frozen=True) 109 | class CliFileSystemType(CliType): 110 | """ 111 | Takes a directory / file path 112 | """ 113 | 114 | output: bool = False 115 | """ 116 | Indicator if it is input or output 117 | """ 118 | 119 | 120 | @attr.s(auto_attribs=True, frozen=True) 121 | class CliDir(CliFileSystemType): 122 | """ 123 | Takes a directory path 124 | """ 125 | 126 | pass 127 | 128 | 129 | @attr.s(auto_attribs=True, frozen=True) 130 | class CliFile(CliFileSystemType): 131 | """ 132 | Takes a file path 133 | """ 134 | 135 | pass 136 | 137 | 138 | @attr.s(auto_attribs=True, frozen=True) 139 | class CliDict(CliType): 140 | """ 141 | Takes a dictionary value 142 | """ 143 | 144 | key: CliType 145 | """ 146 | Data type of the keys to this dictionary 147 | """ 148 | 149 | value: CliType 150 | """ 151 | Data type of the values to this dictionary 152 | """ 153 | 154 | 155 | @attr.s(auto_attribs=True, frozen=True) 156 | class CliList(CliType): 157 | """ 158 | Takes a list value 159 | """ 160 | 161 | value: CliType 162 | """ 163 | Data type of the values in this list 164 | """ 165 | 166 | 167 | @attr.s(auto_attribs=True, frozen=True) 168 | class CliTuple(CliType): 169 | """ 170 | Takes a list of values with a fixed length, possibly each with different types 171 | """ 172 | 173 | values: typing.List[CliType] 174 | """ 175 | List of types, in order, held within the tuple 176 | """ 177 | 178 | @property 179 | def homogenous(self): 180 | """ 181 | A tuple is homogenous if all types in the tuple are the same, aka the set of all types has length 1 182 | """ 183 | return len(set([type(x) for x in self.values])) == 1 184 | -------------------------------------------------------------------------------- /aclimatise/converter/__init__.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | from itertools import groupby, zip_longest 3 | from os import PathLike 4 | from pathlib import Path 5 | from typing import Generator, Iterable, List, Set, TextIO, Tuple, Type 6 | 7 | import attr 8 | 9 | from aclimatise.model import CliArgument, Command, Flag 10 | from aclimatise.name_generation import ( 11 | NameGenerationError, 12 | choose_unique_name, 13 | generate_name, 14 | generate_names_nlp, 15 | generate_names_segment, 16 | name_to_camel, 17 | name_to_snake, 18 | ) 19 | from aclimatise.yaml import AttrYamlMixin 20 | 21 | 22 | @attr.s( 23 | auto_attribs=True, 24 | ) 25 | class NamedArgument(AttrYamlMixin): 26 | arg: CliArgument 27 | name: str 28 | 29 | 30 | @attr.s( 31 | auto_attribs=True, 32 | ) 33 | class WrapperGenerator(AttrYamlMixin): 34 | """ 35 | Abstract base class for a class that converts a Command object into a string that defines a tool 36 | wrapper in a certain workflow language 37 | """ 38 | 39 | cases = ["snake", "camel"] 40 | 41 | @classmethod 42 | def get_subclasses(cls) -> List[Type["WrapperGenerator"]]: 43 | return cls.__subclasses__() 44 | 45 | @classmethod 46 | def choose_converter(cls, typ) -> Type["WrapperGenerator"]: 47 | """ 48 | Returns a converter subclass, given a converter type name 49 | :param type: The type of converter, e.g. 'cwl' or 'wdl' 50 | """ 51 | for subclass in cls.__subclasses__(): 52 | if subclass.format() == typ: 53 | return subclass 54 | 55 | raise Exception("Unknown format type") 56 | 57 | @classmethod 58 | @abstractmethod 59 | def format(cls) -> str: 60 | """ 61 | Returns the output format that this generator produces as a string, e.g. "cwl" 62 | """ 63 | pass 64 | 65 | @abstractmethod 66 | def save_to_string(self, cmd: Command) -> str: 67 | """ 68 | Convert the command into a single string, ignoring subcommands 69 | """ 70 | pass 71 | 72 | def save_to_file(self, cmd: Command, path: Path) -> None: 73 | """ 74 | Write the command into a file 75 | """ 76 | # By default we just write the string out, but subclasses can have different behaviour 77 | path.write_text(self.save_to_string(cmd)) 78 | 79 | def generate_tree( 80 | self, cmd: Command, out_dir: PathLike 81 | ) -> Generator[Tuple[Path, Command], None, None]: 82 | out_dir = Path(out_dir) 83 | for cmd in cmd.command_tree(): 84 | path = out_dir / (cmd.as_filename + self.suffix) 85 | try: 86 | self.save_to_file(cmd, path) 87 | except NameGenerationError as e: 88 | raise NameGenerationError( 89 | 'Name generation error for command "{}". {}'.format( 90 | " ".join(cmd.command), e.message 91 | ) 92 | ) 93 | yield path, cmd 94 | 95 | @property 96 | def reserved(self) -> Set[Tuple[str, ...]]: 97 | """ 98 | A list of reserved keywords for this language 99 | """ 100 | return set() 101 | 102 | @property 103 | @abstractmethod 104 | def suffix(self) -> str: 105 | """ 106 | Returns a suffix for files generated using this converter 107 | """ 108 | 109 | def words_to_name(self, words: Iterable[str]): 110 | """ 111 | Converts a list of tokens, such as ["a", "variable", "name"] to a language-appropriate name, such as 112 | "aVariableName" 113 | """ 114 | if self.case == "snake": 115 | return name_to_snake(words) 116 | elif self.case == "camel": 117 | return name_to_camel(words) 118 | 119 | def choose_variable_names( 120 | self, flags: List[CliArgument], length: int = 3 121 | ) -> List[NamedArgument]: 122 | """ 123 | Choose names for a list of flags. This needs to be done in one go because there is a risk of duplicate 124 | variable names otherwise 125 | :param length: See :py:func:`from aclimatise.name_generation.generate_name` 126 | """ 127 | options = list( 128 | zip_longest( 129 | generate_names_segment([flag.full_name() for flag in flags]), 130 | generate_names_nlp( 131 | [flag.description for flag in flags], reserved=self.reserved 132 | ), 133 | [flag.argument_name() for flag in flags if isinstance(flag, Flag)], 134 | fillvalue=[], 135 | ) 136 | ) 137 | 138 | return [ 139 | NamedArgument( 140 | arg=flag, 141 | name=self.words_to_name( 142 | choose_unique_name(flag_options, reserved=self.reserved, number=i) 143 | ), 144 | ) 145 | for i, (flag, flag_options) in enumerate(zip(flags, options)) 146 | ] 147 | 148 | case: str = "snake" 149 | """ 150 | Which case to use for variable names 151 | """ 152 | 153 | generate_names: bool = True 154 | """ 155 | Rather than using the long flag to generate the argument name, generate them automatically using the 156 | flag description. Generally helpful if there are no long flags, only short flags. 157 | """ 158 | 159 | ignore_positionals: bool = False 160 | """ 161 | Don't include positional arguments, for example because the help formatting has some 162 | misleading sections that look like positional arguments 163 | """ 164 | -------------------------------------------------------------------------------- /aclimatise/converter/cwl.py: -------------------------------------------------------------------------------- 1 | from io import StringIO 2 | from pathlib import Path 3 | from typing import List 4 | 5 | import attr 6 | from cwl_utils.parser_v1_1 import ( 7 | CommandInputParameter, 8 | CommandLineBinding, 9 | CommandLineTool, 10 | CommandOutputBinding, 11 | CommandOutputParameter, 12 | DockerRequirement, 13 | ) 14 | 15 | from aclimatise import cli_types 16 | from aclimatise.cli_types import CliType 17 | from aclimatise.converter import NamedArgument, WrapperGenerator 18 | from aclimatise.model import CliArgument, Command, Flag, Positional 19 | from aclimatise.yaml import yaml 20 | 21 | 22 | @attr.s(auto_attribs=True) 23 | class CwlGenerator(WrapperGenerator): 24 | case = "snake" 25 | 26 | @classmethod 27 | def format(cls) -> str: 28 | return "cwl" 29 | 30 | @staticmethod 31 | def snake_case(words: list): 32 | return "_".join([word.lower() for word in words]) 33 | 34 | @staticmethod 35 | def type_to_cwl_type(typ: cli_types.CliType) -> str: 36 | """ 37 | Calculate the CWL type for a CLI type 38 | """ 39 | if isinstance(typ, cli_types.CliFile): 40 | return "File" 41 | elif isinstance(typ, cli_types.CliDir): 42 | return "Directory" 43 | elif isinstance(typ, cli_types.CliString): 44 | return "string" 45 | elif isinstance(typ, cli_types.CliFloat): 46 | return "double" 47 | elif isinstance(typ, cli_types.CliInteger): 48 | return "long" 49 | elif isinstance(typ, cli_types.CliBoolean): 50 | return "boolean" 51 | elif isinstance(typ, cli_types.CliEnum): 52 | return "string" 53 | elif isinstance(typ, cli_types.CliList): 54 | return CwlGenerator.type_to_cwl_type(typ.value) + "[]" 55 | elif isinstance(typ, cli_types.CliTuple): 56 | return CwlGenerator.type_to_cwl_type(CliType.lowest_common_type(typ.values)) 57 | else: 58 | raise Exception(f"Invalid type {typ}!") 59 | 60 | @staticmethod 61 | def arg_to_cwl_type(arg: CliArgument) -> str: 62 | """ 63 | Calculate the CWL type for an entire argument 64 | """ 65 | typ = arg.get_type() 66 | cwl_type = CwlGenerator.type_to_cwl_type(typ) 67 | 68 | if arg.optional and not cwl_type.endswith("[]"): 69 | return cwl_type + "?" 70 | else: 71 | return cwl_type 72 | 73 | def get_inputs(self, names: List[NamedArgument]) -> List[CommandInputParameter]: 74 | ret = [] 75 | for arg in names: 76 | assert arg.name != "", arg 77 | ret.append( 78 | CommandInputParameter( 79 | id="in_" + arg.name, 80 | type=self.arg_to_cwl_type(arg.arg), 81 | inputBinding=CommandLineBinding( 82 | position=arg.arg.position 83 | if isinstance(arg.arg, Positional) 84 | else None, 85 | prefix=arg.arg.longest_synonym 86 | if isinstance(arg.arg, Flag) 87 | else None, 88 | ), 89 | doc=arg.arg.description, 90 | ) 91 | ) 92 | 93 | return ret 94 | 95 | def get_outputs(self, names: List[NamedArgument]) -> List[CommandOutputParameter]: 96 | ret = [ 97 | # We default to always capturing stdout 98 | CommandOutputParameter( 99 | id="out_stdout", 100 | type="stdout", 101 | doc="Standard output stream", 102 | ) 103 | ] 104 | 105 | for arg in names: 106 | typ = arg.arg.get_type() 107 | if isinstance(typ, cli_types.CliFileSystemType) and typ.output: 108 | ret.append( 109 | CommandOutputParameter( 110 | id="out_" + arg.name, 111 | type=self.arg_to_cwl_type(arg.arg), 112 | doc=arg.arg.description, 113 | outputBinding=CommandOutputBinding( 114 | glob="$(inputs.in_{})".format(arg.name) 115 | ), 116 | ) 117 | ) 118 | return ret 119 | 120 | def command_to_tool(self, cmd: Command) -> CommandLineTool: 121 | """ 122 | Outputs the CWL wrapper to the provided file 123 | """ 124 | inputs: List[CliArgument] = [*cmd.named] + ( 125 | [] if self.ignore_positionals else [*cmd.positional] 126 | ) 127 | names = self.choose_variable_names(inputs) 128 | 129 | hints = [] 130 | if cmd.docker_image is not None: 131 | hints.append(DockerRequirement(dockerPull=cmd.docker_image)) 132 | 133 | tool = CommandLineTool( 134 | id=cmd.as_filename + ".cwl", 135 | baseCommand=list(cmd.command), 136 | cwlVersion="v1.1", 137 | inputs=self.get_inputs(names), 138 | outputs=self.get_outputs(names), 139 | hints=hints, 140 | ) 141 | 142 | return tool 143 | 144 | @property 145 | def suffix(self) -> str: 146 | return ".cwl" 147 | 148 | def save_to_string(self, cmd: Command) -> str: 149 | io = StringIO() 150 | yaml.dump(self.command_to_tool(cmd).save(), io) 151 | return io.getvalue() 152 | 153 | def save_to_file(self, cmd: Command, path: Path) -> None: 154 | map = self.command_to_tool(cmd).save() 155 | with path.open("w") as fp: 156 | yaml.dump(map, fp) 157 | -------------------------------------------------------------------------------- /aclimatise/converter/janis.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import janis_core as janis 4 | from aclimatise import cli_types 5 | from aclimatise.cli_types import CliType 6 | from aclimatise.converter import NamedArgument, WrapperGenerator 7 | from aclimatise.model import CliArgument, Command, Flag, Positional 8 | 9 | 10 | class JanisGenerator(WrapperGenerator): 11 | @classmethod 12 | def format(cls) -> str: 13 | return "janis" 14 | 15 | def save_to_string(self, cmd: Command) -> str: 16 | 17 | clt = self.command_to_tool(cmd) 18 | return clt.translate("janis", to_console=False) 19 | 20 | def command_to_tool(self, cmd: Command) -> janis.CommandToolBuilder: 21 | 22 | inputs: List[CliArgument] = [*cmd.named] + ( 23 | [] if self.ignore_positionals else [*cmd.positional] 24 | ) 25 | names = self.choose_variable_names(inputs) 26 | 27 | tool = janis.CommandToolBuilder( 28 | tool=cmd.as_filename, 29 | base_command=list(cmd.command), 30 | inputs=self.get_inputs(names), 31 | outputs=self.get_outputs(names), 32 | version="v0.1.0", 33 | container=cmd.docker_image, 34 | ) 35 | 36 | return tool 37 | 38 | def type_to_janis_type( 39 | self, typ: cli_types.CliType, optional: bool 40 | ) -> janis.DataType: 41 | 42 | if isinstance(typ, cli_types.CliFile): 43 | return janis.File(optional=optional) 44 | elif isinstance(typ, cli_types.CliDir): 45 | return janis.Directory(optional=optional) 46 | elif isinstance(typ, cli_types.CliString): 47 | return janis.String(optional=optional) 48 | elif isinstance(typ, cli_types.CliFloat): 49 | return janis.Float(optional=optional) 50 | elif isinstance(typ, cli_types.CliInteger): 51 | return janis.Int(optional=optional) 52 | elif isinstance(typ, cli_types.CliBoolean): 53 | return janis.Boolean(optional=optional) 54 | elif isinstance(typ, cli_types.CliEnum): 55 | return janis.String(optional=optional) 56 | elif isinstance(typ, cli_types.CliList): 57 | # TODO: how is Array represented? 58 | inner = self.type_to_janis_type(typ.value, optional=False) 59 | return janis.Array(inner, optional=optional) 60 | 61 | elif isinstance(typ, cli_types.CliTuple): 62 | return self.type_to_janis_type( 63 | CliType.lowest_common_type(typ.values), optional=False 64 | ) 65 | else: 66 | raise Exception(f"Invalid type {typ}!") 67 | 68 | def arg_to_janis_type(self, arg: CliArgument) -> janis.DataType: 69 | return self.type_to_janis_type(arg.get_type(), arg.optional) 70 | 71 | def get_inputs(self, names: List[NamedArgument]) -> List[janis.ToolInput]: 72 | ret = [] 73 | for arg in names: 74 | assert arg.name != "", arg 75 | ret.append( 76 | janis.ToolInput( 77 | tag="in_" + arg.name, 78 | input_type=self.arg_to_janis_type(arg.arg), 79 | position=arg.arg.position 80 | if isinstance(arg.arg, Positional) 81 | else None, 82 | prefix=arg.arg.longest_synonym 83 | if isinstance(arg.arg, Flag) 84 | else None, 85 | doc=arg.arg.description, 86 | ) 87 | ) 88 | return ret 89 | 90 | def get_outputs(self, names: List[NamedArgument]) -> List[janis.ToolOutput]: 91 | ret = [] 92 | for arg in names: 93 | typ = arg.arg.get_type() 94 | if isinstance(typ, cli_types.CliFileSystemType) and typ.output: 95 | ret.append( 96 | janis.ToolOutput( 97 | tag="out_" + arg.name, 98 | output_type=self.arg_to_janis_type(arg.arg), 99 | doc=arg.arg.description, 100 | selector=janis.InputSelector("in_" + arg.name), 101 | ) 102 | ) 103 | return ret 104 | 105 | @property 106 | def suffix(self) -> str: 107 | return ".py" 108 | -------------------------------------------------------------------------------- /aclimatise/converter/wdl.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for generating WDL from the CLI data model 3 | """ 4 | import re 5 | from typing import Iterable, List, Set, Tuple 6 | 7 | from inflection import camelize 8 | from WDL._grammar import keywords 9 | from wdlgen import ( 10 | ArrayType, 11 | File, 12 | Input, 13 | Output, 14 | ParameterMeta, 15 | PrimitiveType, 16 | Task, 17 | WdlType, 18 | ) 19 | 20 | from aclimatise import cli_types, model 21 | from aclimatise.converter import NamedArgument, WrapperGenerator 22 | from aclimatise.model import CliArgument, Command, Flag, Positional 23 | from aclimatise.nlp import wordsegment 24 | 25 | #: A regex, borrowed from MiniWDL, that ma 26 | WDL_IDENT = re.compile(r"[a-zA-Z][a-zA-Z0-9_]*") 27 | #: Matches all characters we should remove from a WDL identifier 28 | WDL_STRIP = re.compile(r"(^[^a-zA-Z])|([^a-zA-Z0-9_])") 29 | 30 | 31 | def escape_wdl_str(text: str): 32 | """ 33 | Escape literal quotes in a Python string, to become suitable for WDL 34 | """ 35 | return text.replace('"', '\\"').replace("\n", "\\n") 36 | 37 | 38 | def flag_to_command_input( 39 | named_flag: NamedArgument, converter: WrapperGenerator 40 | ) -> Task.Command.CommandInput: 41 | args = dict(name=named_flag.name) 42 | 43 | if isinstance(named_flag.arg, model.Flag): 44 | args.update(dict(optional=named_flag.arg.optional)) 45 | if isinstance(named_flag.arg.args, model.EmptyFlagArg): 46 | args.update(dict(true=named_flag.arg.longest_synonym, false="")) 47 | else: 48 | args.update( 49 | dict( 50 | prefix=named_flag.arg.longest_synonym, 51 | ) 52 | ) 53 | elif isinstance(named_flag, model.Positional): 54 | args.update(dict(optional=False, position=named_flag.position)) 55 | 56 | return Task.Command.CommandInput.from_fields(**args) 57 | 58 | 59 | class WdlGenerator(WrapperGenerator): 60 | @property 61 | def suffix(self) -> str: 62 | return ".wdl" 63 | 64 | case = "snake" 65 | 66 | @property 67 | def reserved(self) -> Set[Tuple[str, ...]]: 68 | # Steal the keywords list from miniWDL 69 | return {tuple(wordsegment.segment(key)) for key in keywords["1.0"]} 70 | 71 | @classmethod 72 | def format(cls) -> str: 73 | return "wdl" 74 | 75 | @classmethod 76 | def type_to_wdl(cls, typ: cli_types.CliType, optional: bool = False) -> WdlType: 77 | if isinstance(typ, cli_types.CliString): 78 | return WdlType(PrimitiveType(PrimitiveType.kString), optional=optional) 79 | elif isinstance(typ, cli_types.CliFloat): 80 | return WdlType(PrimitiveType(PrimitiveType.kFloat), optional=optional) 81 | elif isinstance(typ, cli_types.CliBoolean): 82 | return WdlType(PrimitiveType(PrimitiveType.kBoolean), optional=optional) 83 | elif isinstance(typ, cli_types.CliInteger): 84 | return WdlType(PrimitiveType(PrimitiveType.kInt), optional=optional) 85 | elif isinstance(typ, cli_types.CliFile): 86 | return WdlType(PrimitiveType(PrimitiveType.kFile), optional=optional) 87 | elif isinstance(typ, cli_types.CliDir): 88 | return WdlType(PrimitiveType(PrimitiveType.kDirectory), optional=optional) 89 | elif isinstance(typ, cli_types.CliTuple): 90 | if typ.homogenous: 91 | return WdlType( 92 | ArrayType( 93 | cls.type_to_wdl(typ.values[0]), requires_multiple=not optional 94 | ) 95 | ) 96 | else: 97 | return WdlType( 98 | ArrayType( 99 | cls.type_to_wdl( 100 | cli_types.CliType.lowest_common_type(typ.values) 101 | ), 102 | requires_multiple=not optional, 103 | ) 104 | ) 105 | elif isinstance(typ, cli_types.CliList): 106 | return WdlType( 107 | ArrayType(cls.type_to_wdl(typ.value), requires_multiple=not optional) 108 | ) 109 | elif isinstance(typ, cli_types.CliEnum): 110 | return WdlType(PrimitiveType(PrimitiveType.kString), optional=optional) 111 | else: 112 | return WdlType(PrimitiveType(PrimitiveType.kString), optional=optional) 113 | 114 | def make_inputs(self, named: Iterable[NamedArgument]) -> List[Input]: 115 | return [ 116 | Input( 117 | data_type=self.type_to_wdl( 118 | named_arg.arg.get_type(), optional=named_arg.arg.optional 119 | ), 120 | name=named_arg.name, 121 | ) 122 | for named_arg in named 123 | ] 124 | 125 | def make_command(self, cmd: Command, inputs: List[NamedArgument]) -> Task.Command: 126 | return Task.Command( 127 | command=" ".join([WDL_STRIP.sub("_", tok) for tok in cmd.command]), 128 | inputs=[ 129 | flag_to_command_input(input, self) 130 | for input in inputs 131 | if isinstance(input.arg, Positional) 132 | ], 133 | arguments=[ 134 | flag_to_command_input(input, self) 135 | for input in inputs 136 | if isinstance(input.arg, Flag) 137 | ], 138 | ) 139 | 140 | def make_parameter_meta(self, named: Iterable[NamedArgument]) -> ParameterMeta: 141 | params = {} 142 | for named_arg in named: 143 | params[named_arg.name] = escape_wdl_str(named_arg.arg.description) 144 | 145 | return ParameterMeta(**params) 146 | 147 | def make_task_name(self, cmd: Command) -> str: 148 | return camelize( 149 | "_".join([WDL_STRIP.sub("", token) for token in cmd.command]).replace( 150 | "-", "_" 151 | ) 152 | ) 153 | 154 | def make_outputs(self, names: List[NamedArgument]) -> List[Output]: 155 | ret = [ 156 | # We default to always capturing stdout 157 | Output(data_type=File, name="out_stdout", expression="stdout()") 158 | ] 159 | for arg in names: 160 | typ = arg.arg.get_type() 161 | if isinstance(typ, cli_types.CliFileSystemType) and typ.output: 162 | ret.append( 163 | Output( 164 | data_type=self.type_to_wdl(typ), 165 | name="out_" + arg.name, 166 | expression='"${{in_{}}}"'.format(arg.name), 167 | ) 168 | ) 169 | 170 | return ret 171 | 172 | def save_to_string(self, cmd: Command) -> str: 173 | inputs: List[CliArgument] = [*cmd.named] + ( 174 | [] if self.ignore_positionals else [*cmd.positional] 175 | ) 176 | names = self.choose_variable_names(inputs) 177 | runtime = Task.Runtime() 178 | runtime.add_docker(cmd.docker_image) 179 | 180 | tool = Task( 181 | name=self.make_task_name(cmd), 182 | command=self.make_command(cmd, names), 183 | version="1.0", 184 | inputs=self.make_inputs(names), 185 | outputs=self.make_outputs(names), 186 | parameter_meta=self.make_parameter_meta(names), 187 | runtime=runtime, 188 | ) 189 | 190 | return tool.get_string() 191 | -------------------------------------------------------------------------------- /aclimatise/converter/yml.py: -------------------------------------------------------------------------------- 1 | from io import StringIO 2 | from os import PathLike 3 | from pathlib import Path 4 | from typing import Generator, List 5 | 6 | import attr 7 | 8 | from aclimatise.converter import WrapperGenerator 9 | from aclimatise.model import Command 10 | from aclimatise.yaml import yaml 11 | 12 | 13 | @attr.s(auto_attribs=True) 14 | class YmlGenerator(WrapperGenerator): 15 | """ 16 | Internal YML format 17 | """ 18 | 19 | @property 20 | def suffix(self) -> str: 21 | return ".yml" 22 | 23 | def save_to_file(self, cmd: Command, path: Path) -> None: 24 | with path.open("w") as fp: 25 | yaml.dump(cmd, fp) 26 | 27 | def save_to_string(self, cmd: Command) -> str: 28 | buffer = StringIO() 29 | yaml.dump(cmd, buffer) 30 | return buffer.getvalue() 31 | 32 | @classmethod 33 | def format(cls) -> str: 34 | return "yml" 35 | -------------------------------------------------------------------------------- /aclimatise/execution/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module is concerned with running the actual commands so that we can parse their output 3 | """ 4 | import abc 5 | from typing import List, Optional 6 | 7 | from aclimatise.model import Command 8 | 9 | 10 | class Executor(abc.ABC): 11 | """ 12 | An executor is anything that can take a command such as ["bwa"] or 13 | ["samtools", "sort"] and return the help output 14 | """ 15 | 16 | def __init__( 17 | self, timeout: int = 10, raise_on_timout=False, max_length: Optional[int] = 1000 18 | ): 19 | """ 20 | :param timeout: Amount of inactivity before the execution will be killed 21 | :param raise_on_timout: If true, execute will raise a TimeoutError if it 22 | times out 23 | """ 24 | # Here we initialise all shared parameters that are used by all executors 25 | self.timeout = timeout 26 | self.raise_on_timeout = raise_on_timout 27 | self.max_length = max_length 28 | 29 | def handle_timeout(self, e: Exception) -> str: 30 | """ 31 | Subclasses can call this when a timeout has occurred 32 | :param e: The timeout exception that caused the timeout 33 | """ 34 | if self.raise_on_timeout: 35 | raise TimeoutError() 36 | else: 37 | return "" 38 | 39 | def explore( 40 | self, 41 | command: List[str], 42 | max_depth: int = 2, 43 | parent: Optional[Command] = None, 44 | ) -> Command: 45 | """ 46 | Given a command to start with, builds a model of this command and all its subcommands (if they exist) 47 | """ 48 | # If the executor doesn't implement a specific exploration technique, we just execute and ignore subcommands 49 | return self.convert(command) 50 | 51 | @abc.abstractmethod 52 | def convert(self, command: List[str]) -> Command: 53 | """ 54 | Convert a single executable to a Command object, without considering subcommands 55 | """ 56 | pass 57 | -------------------------------------------------------------------------------- /aclimatise/execution/docker.py: -------------------------------------------------------------------------------- 1 | import select 2 | import socket 3 | import time 4 | from select import select as original_select 5 | from typing import List, Tuple 6 | from unittest.mock import patch 7 | 8 | from docker.utils.socket import consume_socket_output, demux_adaptor, frames_iter 9 | 10 | from aclimatise.execution.help import CliHelpExecutor 11 | from aclimatise.model import Command 12 | 13 | 14 | def read_socket(sock, timeout: int = None) -> Tuple[bytes, bytes]: 15 | """ 16 | Reads from a docker socket, and returns everything 17 | :param sock: Docker socket to read from 18 | :param timeout: Number of seconds after which we return all data collected 19 | :return: A tuple of stdout, stderr 20 | """ 21 | start_time = time.time() 22 | out = [b"", b""] 23 | for frame in frames_iter(sock, tty=False): 24 | frame = demux_adaptor(*frame) 25 | 26 | # If we hit the timeout, return anyawy 27 | if time.time() >= start_time + timeout: 28 | return tuple(out) 29 | 30 | assert frame != (None, None) 31 | 32 | if frame[0] is not None: 33 | out[0] += frame[0] 34 | else: 35 | out[1] += frame[1] 36 | return tuple(out) 37 | 38 | 39 | class DockerExecutor(CliHelpExecutor): 40 | """ 41 | An executor that runs the commands on an already-running docker Container (not an Image!) 42 | """ 43 | 44 | def __init__( 45 | self, container: "docker.models.containers.Container", save_image=True, **kwargs 46 | ): 47 | """ 48 | :param container: The object from the Docker API that represents the running container to run inside 49 | :param save_image: If true (default), save the image name on the command, meaning that the resulting tool 50 | definitions also use this Docker image 51 | """ 52 | super().__init__(**kwargs) 53 | self.container = container 54 | self.save_image = save_image 55 | 56 | def convert( 57 | self, 58 | cmd: List[str], 59 | ) -> Command: 60 | # Use the existing function, but patch in the docker image 61 | cmd = super().convert(cmd) 62 | if self.save_image: 63 | cmd.docker_image = self.container.image.tags[0] 64 | return cmd 65 | 66 | def execute(self, command: List[str]) -> str: 67 | _, sock = self.container.exec_run( 68 | command, stdout=True, stderr=True, demux=True, socket=True 69 | ) 70 | try: 71 | # These are timeouts that define how long to wait while nothing is being output 72 | sock._sock.settimeout(self.timeout) 73 | with patch.object( 74 | select, 75 | "select", 76 | new=lambda rlist, wlist, xlist: original_select( 77 | rlist, wlist, xlist, self.timeout 78 | ), 79 | ): 80 | stdout, stderr = read_socket(sock, timeout=self.timeout) 81 | except socket.timeout as e: 82 | return self.handle_timeout(e) 83 | 84 | return (stdout or stderr or b"").decode() 85 | -------------------------------------------------------------------------------- /aclimatise/execution/help.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import copy 3 | import logging 4 | from typing import Iterable, List, Optional 5 | 6 | from pyparsing import ParseBaseException 7 | 8 | from aclimatise.execution import Executor 9 | from aclimatise.integration import parse_help 10 | from aclimatise.model import Command 11 | 12 | logger = logging.getLogger() 13 | 14 | 15 | class CliHelpExecutor(Executor): 16 | """ 17 | This is an abstract class for any executor that works with command-line help conventions like using help flags in 18 | order to obtain the help text. 19 | """ 20 | 21 | def __init__( 22 | self, 23 | flags: Iterable[str] = (["--help"], ["-h"], [], ["--usage"]), 24 | try_subcommand_flags=True, 25 | **kwargs 26 | ): 27 | super().__init__(**kwargs) 28 | self.flags = flags 29 | self.try_subcommand_flags = try_subcommand_flags 30 | 31 | def explore( 32 | self, 33 | command: List[str], 34 | max_depth: int = 2, 35 | parent: Optional[Command] = None, 36 | ) -> Optional[Command]: 37 | 38 | logger.info("Exploring {}".format(" ".join(command))) 39 | best = self.convert(command) 40 | best.parent = parent 41 | 42 | # Check if this is a valid subcommand 43 | if parent: 44 | if best.valid_subcommand(): 45 | logger.info( 46 | "{} seems to be a valid subcommand".format(" ".join(command)) 47 | ) 48 | else: 49 | logger.info( 50 | "{} does not seem to be a valid subcommand".format( 51 | " ".join(command) 52 | ) 53 | ) 54 | return None 55 | 56 | # Recursively call this function on positionals, but only do this if we aren't at max depth 57 | if best.depth < max_depth: 58 | # By default we use the best parent help-flag 59 | child_executor = copy.copy(self) 60 | child_executor.flags = ( 61 | self.flags if self.try_subcommand_flags else [best.generated_using] 62 | ) 63 | 64 | # Try each *unique* positional 65 | for positional in {positional.name for positional in best.positional}: 66 | subcommand = child_executor.explore( 67 | command=command + [positional], 68 | parent=best, 69 | max_depth=max_depth, 70 | ) 71 | if subcommand is not None: 72 | best.subcommands.append(subcommand) 73 | # If we had any subcommands then we probably don't have any positionals, or at least don't care about them 74 | best.positional = [] 75 | 76 | return best 77 | 78 | @abc.abstractmethod 79 | def execute(self, cmd: List[str]) -> str: 80 | """ 81 | Executes the provided command and returns a string containing the output 82 | """ 83 | pass 84 | 85 | def convert( 86 | self, 87 | cmd: List[str], 88 | ) -> Command: 89 | """ 90 | Determine the best Command instance for a given command line tool, by trying many 91 | different help flags, such as --help and -h, then return the Command. Use this if you know the command you want to 92 | parse, but you don't know which flags it responds to with help text. Unlike :py:func:`aclimatise.explore_command`, 93 | this doesn't even attempt to parse subcommands. 94 | 95 | :param cmd: The command to analyse, e.g. ['wc'] or ['bwa', 'mem'] 96 | :param flags: A list of help flags to try, e.g. ['--help', '-h'], in order how which one you would prefer to use. 97 | Generally [] aka no flags should be last 98 | :param executor: A class that provides the means to run a command. You can use the pre-made classes or write your own. 99 | """ 100 | # For each help flag, run the command and then try to parse it 101 | logger.info("Trying flags for {}".format(" ".join(cmd))) 102 | commands = [] 103 | for flag in self.flags: 104 | help_cmd = cmd + flag 105 | logger.info("Trying {}".format(" ".join(help_cmd))) 106 | try: 107 | final = self.execute(help_cmd) 108 | result = parse_help(cmd, final, max_length=self.max_length) 109 | result.generated_using = flag 110 | commands.append(result) 111 | except (ParseBaseException, UnicodeDecodeError) as e: 112 | # If parsing fails, this wasn't the right flag to use 113 | continue 114 | 115 | # Sort by flags primarily, and if they're equal, return the command with the longest help text, and if they're equal 116 | # return the command with the most help flags. This helps ensure we get ["bedtools", "--help"] instead of 117 | # ["bedtools"] 118 | best = Command.best(commands) 119 | logger.info( 120 | "The best help flag seems to be {}".format( 121 | " ".join(best.command + best.generated_using) 122 | ) 123 | ) 124 | return best 125 | -------------------------------------------------------------------------------- /aclimatise/execution/local.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions that relate to executing the programs of interest, in order to extract their help text 3 | """ 4 | import os 5 | import pty 6 | import signal 7 | import subprocess 8 | import sys 9 | from typing import List 10 | 11 | import psutil 12 | 13 | from aclimatise.execution.help import CliHelpExecutor 14 | 15 | 16 | def kill_proc_tree(pid, sig=signal.SIGTERM, include_parent=True): 17 | """ 18 | Kill a process tree (including grandchildren) with signal "sig" and return a (gone, still_alive) tuple. 19 | "on_terminate", if specified, is a callabck function which is called as soon as a child terminates. 20 | 21 | Taken from https://psutil.readthedocs.io/en/latest/#kill-process-tree 22 | """ 23 | assert pid != os.getpid(), "won't kill myself" 24 | parent = psutil.Process(pid) 25 | children = parent.children(recursive=True) 26 | if include_parent: 27 | children.append(parent) 28 | for p in children: 29 | p.send_signal(sig) 30 | 31 | 32 | class LocalExecutor(CliHelpExecutor): 33 | def __init__(self, popen_args: dict = {}, **kwargs): 34 | super().__init__(**kwargs) 35 | self.popen_args = popen_args 36 | 37 | def execute(self, command: List[str]) -> str: 38 | master, slave = pty.openpty() 39 | popen_kwargs = dict( 40 | stdout=subprocess.PIPE, 41 | stderr=subprocess.PIPE, 42 | stdin=slave, 43 | encoding="utf-8", 44 | ) 45 | popen_kwargs.update(self.popen_args) 46 | 47 | # This works a lot like subprocess.run, but we need access to the pid in order to kill the process tree, so use Popen 48 | with subprocess.Popen(command, **popen_kwargs) as process: 49 | try: 50 | stdout, stderr = process.communicate(timeout=self.timeout) 51 | except subprocess.TimeoutExpired as e: 52 | # Kill the entire process tree, because sometimes killing the parent isn't enough 53 | kill_proc_tree( 54 | process.pid, 55 | include_parent=True, 56 | sig=signal.SIGKILL if sys.platform == "linux" else None, 57 | ) 58 | process.communicate() 59 | return self.handle_timeout(e) 60 | finally: 61 | os.close(master) 62 | os.close(slave) 63 | 64 | return stdout or stderr 65 | -------------------------------------------------------------------------------- /aclimatise/execution/man.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | from typing import Collection, List, Optional 4 | 5 | from aclimatise.execution import Executor 6 | from aclimatise.integration import parse_help 7 | from aclimatise.model import Command 8 | 9 | 10 | class ManPageExecutor(Executor): 11 | def __init__( 12 | self, 13 | man_paths: List[str] = [], 14 | subcommand_sep: Collection[str] = ("-", "_"), 15 | man_flags: Collection[str] = ["--no-subpages"], 16 | **kwargs 17 | ): 18 | """ 19 | :param man_paths: Additional paths within which to look for man pages 20 | :param subcommand_sep: A list of separators to use to generate man paths from subcommands. For example 21 | ``git branch`` has an associated man page at ``git-branch``, using the hyphen as a separator. 22 | :param man_flags: Additional flags to pass to the ``man`` command 23 | """ 24 | super().__init__(**kwargs) 25 | self.man_paths = man_paths 26 | self.subcommand_sep = subcommand_sep 27 | self.man_flags = man_flags 28 | 29 | def execute_with_sep(self, command: List[str], separator: str = "-") -> str: 30 | """ 31 | Returns the man page text for the provided command, using the provided subcommand separator, or an empty string 32 | if this man page doesn't exist 33 | """ 34 | env = {**os.environ.copy(), "MANPAGER": "cat"} # Don't use a pager 35 | if len(self.man_paths) > 0: 36 | env.update({"MANPATH": ":".join(self.man_paths)}) 37 | 38 | sub_man = separator.join(command) 39 | result = subprocess.run( 40 | ["man", *self.man_flags, sub_man], 41 | env=env, 42 | stdout=subprocess.PIPE, 43 | stderr=subprocess.PIPE, 44 | ) 45 | if result.returncode == 0: 46 | return result.stdout.decode() 47 | 48 | return "" 49 | 50 | def convert(self, command: List[str]) -> Command: 51 | if len(command) == 1: 52 | return parse_help( 53 | command, self.execute_with_sep(command), max_length=self.max_length 54 | ) 55 | else: 56 | commands = [] 57 | for sep in self.subcommand_sep: 58 | man_text = self.execute_with_sep(command, sep) 59 | commands.append( 60 | parse_help(command, man_text, max_length=self.max_length) 61 | ) 62 | return Command.best(commands) 63 | -------------------------------------------------------------------------------- /aclimatise/flag_parser/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aCLImatise/CliHelpParser/f573543d61f2be487063d3517c199cf3a80cbe53/aclimatise/flag_parser/__init__.py -------------------------------------------------------------------------------- /aclimatise/flag_parser/elements.py: -------------------------------------------------------------------------------- 1 | """ 2 | Re-usable parser elements that aren't tied to the parser object 3 | """ 4 | from typing import List 5 | 6 | from pyparsing import * 7 | 8 | from aclimatise.model import * 9 | 10 | #: Characters that delimit flag synonyms 11 | synonym_delim_chars = ",|/" 12 | #: Characters that can start a CLI element, e.g. "-@" 13 | element_start_chars = alphanums + "@" 14 | #: Characters that can be in the middle of a CLI element, e.g. "-some-arg" 15 | element_body_chars = element_start_chars + "-_." 16 | #: Characters that can only be used in arguments for flags e.g. "" 17 | argument_body_chars = element_body_chars + "|" 18 | #: Characters that can be in the middle of an argument that has brackets around it, e.g. "-arg " 19 | delimited_body_chars = argument_body_chars + r" \/" 20 | 21 | NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()).setName("Newline") 22 | 23 | 24 | cli_id = Word(initChars=element_start_chars, bodyChars=element_body_chars) 25 | 26 | positional_name = Word( 27 | initChars=element_start_chars, bodyChars=element_body_chars, min=2 28 | ) 29 | 30 | # short_flag = originalTextFor(Literal('-') + Word(alphanums + '@', max=1)) 31 | # """A short flag has only a single dash and single character, e.g. `-m`""" 32 | # long_flag = originalTextFor(Literal('--') + cli_id) 33 | # """A long flag has two dashes and any amount of characters, e.g. `--max-count`""" 34 | any_flag = ( 35 | originalTextFor("-" + Optional("-") + cli_id).leaveWhitespace().setName("Flag") 36 | ) 37 | """The flag is the part with the dashes, e.g. `-m` or `--max-count`""" 38 | 39 | flag_arg_sep = ( 40 | Or([Literal("="), Literal(" ")]).leaveWhitespace().setName("FlagArgSeparator") 41 | ) 42 | """The term that separates the flag from the arguments, e.g. in `--file=FILE` it's `=`""" 43 | 44 | arg = Word(initChars=element_start_chars, bodyChars=argument_body_chars) 45 | """A single argument name, e.g. `FILE`""" 46 | 47 | optional_args = Forward().setName("OptionalArg") 48 | 49 | 50 | def visit_optional_args(s, lok, toks): 51 | if len(toks) == 1: 52 | return OptionalFlagArg(names=[toks[0]]) 53 | else: 54 | first, _, sep, second, _ = toks 55 | if isinstance(second, str): 56 | return OptionalFlagArg(names=[first, second], separator=sep) 57 | elif isinstance(second, OptionalFlagArg): 58 | return OptionalFlagArg(names=[first] + second.names, separator=sep) 59 | 60 | 61 | optional_args <<= ( 62 | (arg + "[" + "," + (optional_args ^ arg) + "]") 63 | .setParseAction(visit_optional_args) 64 | .setName("OptionalArgs") 65 | ) 66 | """ 67 | When the flag has multiple arguments, some of which are optional, e.g. 68 | -I FLOAT[,FLOAT[,INT[,INT]]] 69 | """ 70 | 71 | # simple_arg = arg.copy().setParseAction( 72 | # lambda s, loc, toks: SimpleFlagArg(toks[0])) 73 | simple_arg = ( 74 | ( 75 | Or( 76 | [ 77 | Word(initChars=element_start_chars, bodyChars=element_body_chars), 78 | # Allow spaces in the argument name, but only if it's enclosed in angle brackets 79 | ( 80 | Literal("<").suppress() 81 | + Word( 82 | initChars=element_start_chars, bodyChars=delimited_body_chars 83 | ) 84 | + Literal(">").suppress() 85 | ).setName("angle_delimited_arg"), 86 | ] 87 | ) 88 | ) 89 | .leaveWhitespace() 90 | .setParseAction(lambda s, loc, toks: SimpleFlagArg(toks[0])) 91 | ).setName("SimpleArg") 92 | 93 | repeated_segment = ( 94 | (ZeroOrMore(arg) + Literal(".")[2, 3].suppress() + Optional(arg)) 95 | .setParseAction(lambda s, loc, toks: RepeatFlagArg(toks[-1] or toks[0])) 96 | .setName("RepeatedSegment") 97 | ) # Either ".." or "..." 98 | 99 | list_type_arg = ( 100 | ( 101 | (arg + repeated_segment) 102 | ^ (arg + Literal("[").suppress() + repeated_segment + Literal("]").suppress()) 103 | ) 104 | .setParseAction(lambda s, loc, toks: toks[1]) 105 | .setName("repeated_arg") 106 | ) 107 | """ 108 | When the argument is an array of values, e.g. when the help says `--samout SAMOUTS [SAMOUTS ...]` or 109 | `-i FILE1 FILE2 .. FILEn` 110 | 111 | """ 112 | 113 | choice_type_arg = ( 114 | nestedExpr(opener="{", closer="}", content=delimitedList(cli_id, delim=",")) 115 | .setParseAction(lambda s, loc, toks: ChoiceFlagArg(set(toks[0]))) 116 | .setName("ChoiceArg") 117 | ) 118 | """When the argument is one from a list of values, e.g. when the help says `--format {sam,bam}`""" 119 | 120 | 121 | def noop(s, loc, toks): 122 | return toks 123 | 124 | 125 | arg_expression = ( 126 | ( 127 | flag_arg_sep.suppress() 128 | + (list_type_arg | choice_type_arg | optional_args | simple_arg) 129 | ) 130 | # .leaveWhitespace() 131 | .setParseAction(lambda s, loc, toks: toks[0]) 132 | ) 133 | arg_expression.skipWhitespace = False 134 | """An argument with separator, e.g. `=FILE`""" 135 | 136 | flag_with_arg = ( 137 | (any_flag + Optional(arg_expression)) 138 | .setParseAction( 139 | lambda s, loc, toks: ( 140 | FlagSynonym( 141 | name=toks[0], argtype=toks[1] if len(toks) > 1 else EmptyFlagArg() 142 | ) 143 | ) 144 | ) 145 | .setName("FlagWithArg") 146 | ) 147 | flag_with_arg.skipWhitespace = True 148 | """e.g. `--max-count=NUM`""" 149 | 150 | synonym_delim = ( 151 | White() ^ (Optional(White()) + Char(synonym_delim_chars) + Optional(White())) 152 | ).leaveWhitespace() 153 | """ 154 | The character used to separate synonyms of a flag. Depending on the help text this might be a comma, pipe or space 155 | """ 156 | 157 | description_sep = White(min=1).suppress() 158 | """ 159 | The section that separates a flag from its description. This needs to be broad enough that it will match all different 160 | formats of help outputs but not so broad that every single word starting with a dash will be matched as a flag 161 | """ 162 | 163 | # block_element_prefix = LineStart().leaveWhitespace() 164 | block_element_prefix = ( 165 | ((LineStart().leaveWhitespace() ^ Literal(":")) + White(min=1)) 166 | .setName("block_element_prefix") 167 | .leaveWhitespace() 168 | .suppress() 169 | ) 170 | """ 171 | Each element (e.g. flag) in a list of flags must either start with a colon or nothing 172 | 173 | e.g. in this example "index" is prefixed by a colon and "mem" is prefixed by a LineStart 174 | 175 | Command: index index sequences in the FASTA format 176 | mem BWA-MEM algorithm 177 | """ 178 | 179 | flag_synonyms = delimitedList(flag_with_arg, delim=synonym_delim).setName( 180 | "FlagSynonyms" 181 | ) 182 | """ 183 | When the help lists multiple synonyms for a flag, e.g: 184 | -n, --lines=NUM 185 | """ 186 | 187 | 188 | # The description of the flag 189 | # e.g. for grep's `-o, --only-matching`, this is: 190 | # "Print only the matched (non-empty) parts of a matching line, with each such part on a separate output line." 191 | # desc_line = originalTextFor(SkipTo(LineEnd())).setName( 192 | # "DescriptionLine" 193 | # ) # .setParseAction(success)) 194 | # desc_line = originalTextFor( 195 | # delimitedList(Regex("[^\s]+"), delim=" ", combine=True) 196 | # ).leaveWhitespace() 197 | 198 | 199 | def visit_description_line(s, loc, toks): 200 | return toks[0].strip() 201 | 202 | 203 | description_line = ( 204 | SkipTo(LineEnd(), include=True) 205 | .setParseAction(visit_description_line) 206 | .setWhitespaceChars(" \t") 207 | ).setName("DescriptionLine") 208 | -------------------------------------------------------------------------------- /aclimatise/integration.py: -------------------------------------------------------------------------------- 1 | import typing 2 | 3 | import attr 4 | 5 | from aclimatise.flag_parser.parser import CliParser 6 | from aclimatise.model import Command, Flag 7 | from aclimatise.usage_parser.parser import UsageParser 8 | 9 | 10 | def parse_help(cmd: typing.Collection[str], text: str, max_length=1000) -> Command: 11 | """ 12 | Parse a string of help text into a Command. Use this if you already have run the executable and extracted the 13 | help text yourself 14 | 15 | :param cmd: List of arguments used to generate this help text, e.g. ['bwa', 'mem'] 16 | :param text: The help text to parse 17 | :param max_length: If the input text has more than this many lines, no attempt will be made to parse the file (as 18 | it's too large, will likely take a long time, and there's probably an underlying problem if this has happened). 19 | In this case, an empty Command will be returned 20 | """ 21 | if len(text.splitlines()) > max_length: 22 | return Command(list(cmd)) 23 | 24 | help_command = CliParser().parse_command(name=cmd, cmd=text) 25 | usage_command = UsageParser().parse_usage(list(cmd), text) 26 | 27 | # Combine the two commands by picking from the help_command where possible, otherwise falling back on the usage 28 | fields = dict( 29 | help_text=text, 30 | # Use the help command's positionals preferentially, but fall back to usage 31 | positional=help_command.positional or usage_command.positional, 32 | # Combine the flags from both help and usage 33 | named=list(Flag.combine([help_command.named, usage_command.named])), 34 | ) 35 | for field in attr.fields(Command): 36 | fields[field.name] = ( 37 | fields.get(field.name) 38 | or getattr(help_command, field.name) 39 | or getattr(usage_command, field.name) 40 | ) 41 | 42 | return Command(**fields) 43 | -------------------------------------------------------------------------------- /aclimatise/nlp.py: -------------------------------------------------------------------------------- 1 | import spacy 2 | import wordsegment 3 | 4 | 5 | @spacy.language.Language.component("prevent_sentence_boundary") 6 | def prevent_sentence_boundary_detection(doc): 7 | for token in doc: 8 | token.is_sent_start = False 9 | return doc 10 | 11 | 12 | try: 13 | nlp = spacy.load("en_core_web_sm") 14 | no_sentences = spacy.load("en_core_web_sm") 15 | no_sentences.add_pipe( 16 | "prevent_sentence_boundary", name="prevent-sbd", before="parser" 17 | ) 18 | except IOError: 19 | raise Exception( 20 | "Spacy model doesn't exist! Install it with `python -m spacy download en`" 21 | ) 22 | 23 | # We load the spacy and the wordsegment models here as a kind of singleton pattern, to avoid multiple functions loading 24 | # redundant copies 25 | 26 | if len(wordsegment.WORDS) == 0: 27 | wordsegment.load() 28 | 29 | 30 | def is_sentence(text: str, threshold: float = 0.8) -> bool: 31 | """ 32 | Returns a bool that indicates if this text is likely a sentence. This should probably be replaced by a machine 33 | learning classifier in the future 34 | :param threshold: If the ratio of non-word tokens over word tokens is higher than this, then return False 35 | """ 36 | 37 | doc = no_sentences(text) 38 | sents = list(doc.sents) 39 | 40 | if len(sents) == 0: 41 | return False 42 | 43 | sentence = sents[0] 44 | non_word_count = 0 45 | word_count = 0 46 | for tok in sentence: 47 | pos = tok.pos_ 48 | if pos == "SPACE": 49 | # Ignore whitespace 50 | continue 51 | 52 | if pos in {"X", "SYM", "PUNCT", "NUM"}: 53 | non_word_count += 1 54 | word_count += 1 55 | 56 | result = word_count == 0 or non_word_count / word_count < threshold 57 | return result 58 | -------------------------------------------------------------------------------- /aclimatise/parser.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from pyparsing import * 4 | 5 | 6 | class IndentCheckpoint(ParseElementEnhance): 7 | """ 8 | This is a wrapper element that simply rolls back changes in the indent stack whenever the contained element 9 | fails to match. This ensures the stack remains accurate 10 | """ 11 | 12 | def __init__(self, expr: ParserElement, indent_stack: List[int]): 13 | super().__init__(expr) 14 | # self.expr = expr 15 | self.stack = indent_stack 16 | 17 | def parseImpl(self, instring, loc, doActions=True): 18 | # Backup the stack whenever we reach this element during the parse 19 | backup_stack = self.stack[:] 20 | try: 21 | return self.expr._parse(instring, loc, doActions, callPreParse=False) 22 | except ParseException as e: 23 | # On a parse failure, reset the stack 24 | self.stack[:] = backup_stack 25 | raise e 26 | 27 | def __str__(self): 28 | if hasattr(self, "name"): 29 | return self.name 30 | 31 | if self.strRepr is None: 32 | self.strRepr = "Indented[" + str(self.expr) + "]" 33 | 34 | return self.strRepr 35 | 36 | 37 | class IndentParserMixin: 38 | """ 39 | A mixin that maintains an indent stack, and utility methods for them 40 | """ 41 | 42 | def __init__(self): 43 | self.stack = [1] 44 | 45 | def pop_indent(self): 46 | def check_indent(s, l, t): 47 | self.stack.pop() 48 | 49 | return (Empty() + Empty()).setParseAction(check_indent).setName("Pop") 50 | 51 | def push_indent(self): 52 | def check_indent(s, l, t): 53 | curCol = col(l, s) 54 | self.stack.append(curCol) 55 | 56 | return (Empty() + Empty()).setParseAction(check_indent).setName("Push") 57 | 58 | def peer_indent(self, allow_greater=False): 59 | """ 60 | :param allow_greater: Allow greater indent than the previous indentation, but don't add it to the stack 61 | """ 62 | 63 | def check_peer_indent(s, l, t): 64 | if l >= len(s): 65 | return 66 | curCol = col(l, s) 67 | if allow_greater and curCol >= self.stack[-1]: 68 | return 69 | elif curCol == self.stack[-1]: 70 | return 71 | else: 72 | if curCol > self.stack[-1]: 73 | raise ParseException(s, l, "illegal nesting") 74 | raise ParseException(s, l, "not a peer entry") 75 | 76 | return Empty().setParseAction(check_peer_indent).setName("Peer") 77 | 78 | def indent(self, update=True): 79 | """ 80 | :param update: If true, update the stack, otherwise simply check for an indent 81 | """ 82 | 83 | def check_sub_indent(s, l, t): 84 | curCol = col(l, s) 85 | if curCol > self.stack[-1]: 86 | if update: 87 | self.stack.append(curCol) 88 | else: 89 | raise ParseException(s, l, "not a subentry") 90 | 91 | return (Empty() + Empty().setParseAction(check_sub_indent)).setName("Indent") 92 | 93 | def dedent(self, precise=True): 94 | def check_dedent(s, l, t): 95 | if l >= len(s): 96 | return 97 | curCol = col(l, s) 98 | if precise and self.stack and curCol not in self.stack: 99 | raise ParseException(s, l, "not an unindent") 100 | if curCol < self.stack[-1]: 101 | self.stack.pop() 102 | 103 | return Empty().setParseAction(check_dedent).setName("Unindent") 104 | 105 | 106 | __all__ = [IndentCheckpoint, IndentParserMixin] 107 | -------------------------------------------------------------------------------- /aclimatise/usage_parser/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aCLImatise/CliHelpParser/f573543d61f2be487063d3517c199cf3a80cbe53/aclimatise/usage_parser/__init__.py -------------------------------------------------------------------------------- /aclimatise/usage_parser/elements.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from pyparsing import * 4 | 5 | # from aclimatise.flag_parser.elements import cli_id, any_flag, long_flag, short_flag, flag_with 6 | from aclimatise.flag_parser.elements import ( 7 | arg, 8 | argument_body_chars, 9 | delimited_body_chars, 10 | element_body_chars, 11 | element_start_chars, 12 | flag_with_arg, 13 | repeated_segment, 14 | ) 15 | from aclimatise.model import ( 16 | Command, 17 | EmptyFlagArg, 18 | Flag, 19 | FlagSynonym, 20 | Positional, 21 | SimpleFlagArg, 22 | ) 23 | from aclimatise.usage_parser.model import UsageElement 24 | 25 | 26 | def delimited_item(open, el, close): 27 | def action(s, loc, toks): 28 | return toks[1:-1] 29 | 30 | return (open + el + close).setParseAction(action) 31 | 32 | 33 | usage_element = Forward() 34 | element_char = arg.copy() # Word(initChars=element_start_chars, bodyChars=) 35 | 36 | mandatory_element = ( 37 | element_char.copy() 38 | .setParseAction( 39 | lambda s, loc, toks: UsageElement( 40 | text=toks[0], 41 | ) 42 | ) 43 | .setName("MandatoryElement") 44 | ) 45 | """ 46 | A mandatory element in the command-line invocation. Might be a variable or a constant 47 | """ 48 | 49 | variable_element = ( 50 | delimited_item( 51 | "<", Word(initChars=element_start_chars, bodyChars=delimited_body_chars), ">" 52 | ) 53 | .setParseAction(lambda s, loc, toks: UsageElement(text=toks[1], variable=True)) 54 | .setName("VariableElement") 55 | ) 56 | """ 57 | Any element inside angle brackets is a variable, meaning you are supposed to provide your own value for it. 58 | However, some usage formats show variables without the angle brackets 59 | """ 60 | 61 | 62 | def visit_optional_section(s, loc, toks): 63 | inner = toks[1:-1] 64 | for tok in inner: 65 | tok.optional = True 66 | return inner 67 | 68 | 69 | optional_section = ( 70 | delimited_item("[", OneOrMore(usage_element), "]") 71 | .setParseAction(visit_optional_section) 72 | .setName("OptionalSection") 73 | ) 74 | """ 75 | Anything can be nested within square brackets, which indicates that everything there is optional 76 | """ 77 | 78 | # flag_arg = Or([ 79 | # variable_element, 80 | # element_char 81 | # ]) 82 | """ 83 | The argument after a flag, e.g. in "-b " this would be everything after "-b" 84 | """ 85 | 86 | # short_flag_name = Char(alphas) 87 | """ 88 | The single character for a short flag, e.g. "n" for a "-n" flag 89 | """ 90 | 91 | # short_flag = ( 92 | # '-' + short_flag_name + White() + Optional(flag_arg) 93 | # ).setParseAction( 94 | # lambda s, loc, toks: 95 | # Flag.from_synonyms([FlagSynonym( 96 | # name=toks[0] + toks[1], 97 | # argtype=SimpleFlagArg(toks[3]) if toks[3] else EmptyFlagArg() 98 | # )], description=None) 99 | # ) 100 | """ 101 | The usage can contain a flag with its argument 102 | """ 103 | 104 | # long_flag = ( 105 | # '--' + element_char + White() + Optional(flag_arg) 106 | # ).setParseAction(lambda s, loc, toks: Flag.from_synonyms([FlagSynonym( 107 | # name=toks[1], 108 | # argtype=SimpleFlagArg(toks[3]) if toks[3] else EmptyFlagArg() 109 | # )])) 110 | """ 111 | The usage can contain a flag with its argument 112 | """ 113 | 114 | 115 | def visit_short_flag_list(s, loc, toks): 116 | return [ 117 | Flag.from_synonyms( 118 | [FlagSynonym(name="-" + flag, argtype=EmptyFlagArg())], description=None 119 | ) 120 | for flag in toks[1:] 121 | ] 122 | 123 | 124 | # short_flag_list = ('-' + short_flag_name + OneOrMore(short_flag_name)).setParseAction( 125 | # visit_short_flag_list).leaveWhitespace() 126 | """ 127 | Used to illustrate where a list of short flags could be used, e.g. -nurlf indicates -n or -u etc 128 | """ 129 | 130 | 131 | def visit_list_element(s, loc, toks): 132 | # Pick the last element if there is one, otherwise use the first element 133 | # This gives us a better name like 'inN.bam' instead of 'in2.bam' 134 | els = [tok for tok in toks if isinstance(tok, (UsageElement, Flag))] 135 | for el in els: 136 | el.repeatable = True 137 | return els[-1] 138 | 139 | 140 | options_placeholder = ( 141 | Regex("options?", flags=re.IGNORECASE).suppress().setName("OptionsPlaceholder") 142 | ) 143 | 144 | list_element = ( 145 | ( 146 | OneOrMore(options_placeholder ^ mandatory_element ^ variable_element) 147 | + Literal(".")[2, 3] 148 | + Optional(options_placeholder ^ mandatory_element ^ variable_element) 149 | ) 150 | .setParseAction(visit_list_element) 151 | .setName("list_element") 152 | ) 153 | """ 154 | When one or more arguments are allowed, e.g. " ... " 155 | """ 156 | 157 | usage_flag = ( 158 | And([flag_with_arg]) 159 | .setParseAction(lambda s, loc, toks: Flag.from_synonyms(toks, description="")) 160 | .setName("usage_flag") 161 | ) 162 | 163 | 164 | usage_element <<= Or( 165 | [ 166 | optional_section, 167 | list_element, 168 | # short_flag_list, 169 | usage_flag, 170 | variable_element, 171 | options_placeholder, 172 | mandatory_element, 173 | ] 174 | ).setName("usage_element") 175 | 176 | stack = [1] 177 | 178 | 179 | def visit_usage(s, loc, toks): 180 | # Fix up stack inconsistencies 181 | while len(stack) > 1: 182 | stack.pop() 183 | 184 | return toks[0][0] 185 | 186 | 187 | usage_example = OneOrMore(usage_element, stopOn=LineEnd()) 188 | """ 189 | Each usage example is a single line of text, e.g. 190 | 191 | shell [options] -e string 192 | """ 193 | 194 | usage = ( 195 | LineStart() 196 | + Regex("usage:", flags=re.IGNORECASE).suppress() 197 | + OneOrMore(usage_example) 198 | ) # .setParseAction(visit_usage).setDebug() 199 | """ 200 | Each usage block can have one or more lines of different usage. e.g. 201 | 202 | Usage: 203 | shell [options] -e string 204 | execute string in V8 205 | shell [options] file1 file2 ... filek 206 | run JavaScript scripts in file1, file2, ..., filek 207 | """ 208 | 209 | 210 | # usage = Regex('usage:', flags=re.IGNORECASE).suppress() + delimitedList(usage_element, delim=Or([' ', '\n'])) 211 | # indentedBlock( 212 | # delimitedList(usage_element, delim=' '), 213 | # indentStack=stack, 214 | # indent=True 215 | # ) 216 | -------------------------------------------------------------------------------- /aclimatise/usage_parser/model.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | import attr 4 | 5 | from aclimatise import model 6 | from aclimatise.yaml import AttrYamlMixin 7 | 8 | 9 | @attr.s(auto_attribs=True) 10 | class UsageElement(AttrYamlMixin): 11 | text: str 12 | """ 13 | The name of this element, as defined in the usage section 14 | """ 15 | 16 | optional: bool = False 17 | """ 18 | Whether or not this element is required 19 | """ 20 | 21 | variable: bool = False 22 | """ 23 | True if this is a variable, ie you are supposed to replace this text with your own, False if this is a constant 24 | that you shouldn't change, e.g. the name of the application 25 | """ 26 | 27 | # flag: bool = False 28 | """ 29 | True if this is a flag (starts with dashes) and not a regular element 30 | """ 31 | 32 | repeatable: bool = False 33 | """ 34 | If this flag/argument can be used multiple times 35 | """ 36 | 37 | 38 | @attr.s(auto_attribs=True) 39 | class UsageInstance(AttrYamlMixin): 40 | items: List[UsageElement] 41 | """ 42 | The string of elements that make up a valid command invocation 43 | """ 44 | 45 | description: Optional[str] = None 46 | """ 47 | Description of this invocation 48 | """ 49 | 50 | @property 51 | def positionals(self) -> List["model.Positional"]: 52 | """ 53 | Return all the positional arguments that could be derived from this instance 54 | """ 55 | return [ 56 | model.Positional( 57 | description="", position=i, name=el.text, optional=el.optional 58 | ) 59 | for i, el in enumerate(self.items) 60 | if isinstance(el, UsageElement) 61 | ] 62 | 63 | @property 64 | def flags(self) -> List["model.Flag"]: 65 | """ 66 | Return all the flags that could be derived from this instance 67 | """ 68 | return [el for el in self.items if isinstance(el, model.Flag)] 69 | -------------------------------------------------------------------------------- /aclimatise/usage_parser/parser.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import List 3 | 4 | from pyparsing import * 5 | 6 | from aclimatise.flag_parser.elements import description_line 7 | from aclimatise.parser import IndentCheckpoint, IndentParserMixin 8 | from aclimatise.usage_parser.elements import usage_example 9 | from aclimatise.usage_parser.model import UsageElement, UsageInstance 10 | 11 | from .elements import * 12 | 13 | 14 | def normalise_cline(tokens): 15 | """ 16 | Normalise a command line string, such as ["dotnet", "Pisces.dll"], converting it to ["dotnet", "pisces"] 17 | :param tokens: 18 | :return: 19 | """ 20 | return [Path(el.lower()).stem for el in tokens] 21 | 22 | 23 | class UsageParser(IndentParserMixin): 24 | def __init__(self): 25 | super().__init__() 26 | 27 | def visit_description_block(s, loc, toks): 28 | return "\n".join(toks) 29 | 30 | self.description_block = IndentCheckpoint( 31 | self.indent() 32 | + (self.peer_indent(allow_greater=True) + description_line)[1, ...] 33 | + self.dedent(precise=False), 34 | indent_stack=self.stack, 35 | ).setParseAction(visit_description_block) 36 | 37 | def visit_single_usage(s, loc, toks): 38 | return [UsageInstance(items=list(toks), description=None)] 39 | 40 | self.single_usage = usage_example.copy().setParseAction(visit_single_usage) 41 | 42 | def visit_described_usage(s, loc, toks): 43 | if len(toks) > 0 and isinstance(toks[-1], str): 44 | description = toks[-1] 45 | else: 46 | description = None 47 | 48 | return UsageInstance(items=list(toks[:-1]), description=description) 49 | 50 | self.described_usage = ( 51 | usage_example + Optional(self.description_block) 52 | ).setParseAction(visit_described_usage) 53 | 54 | def visit_multi_usage(s, loc, toks): 55 | return list(toks) 56 | 57 | self.multi_usage = ( 58 | LineEnd().suppress() 59 | + ( 60 | IndentCheckpoint( 61 | # This indent ensures that every usage example is somewhat indented (more than column 1, at least), 62 | # and also sets the baseline from which the description block is measured 63 | self.indent() + self.described_usage 64 | # The pop here doesn't check that we have dedented, but rather it just resets the indentation so that 65 | # a new usage block can have a different indentation 66 | + self.pop_indent(), 67 | indent_stack=self.stack, 68 | ) 69 | )[1, ...] 70 | ).setParseAction(visit_multi_usage) 71 | 72 | self.usage = ( 73 | LineStart() 74 | + Regex("usage:", flags=re.IGNORECASE).suppress() 75 | + Optional(self.multi_usage | self.single_usage) 76 | ).setWhitespaceChars( 77 | "\t " 78 | ) # .setParseAction(visit_usage).setDebug() 79 | 80 | def parse_usage(self, cmd: List[str], usage: str, debug: bool = False) -> Command: 81 | # return self.usage.searchString(usage) 82 | usage_blocks = self.usage.setDebug(debug).searchString(usage) 83 | if not usage_blocks: 84 | # If we had no results, return an empty command 85 | return Command(command=cmd) 86 | 87 | instances = [] 88 | all_positionals = [] 89 | all_flags = [] 90 | for block in usage_blocks: 91 | for instance in block: 92 | 93 | positional = [ 94 | tok for tok in instance.items if isinstance(tok, UsageElement) 95 | ] 96 | flags = [tok for tok in instance.items if isinstance(tok, Flag)] 97 | 98 | # Remove an "options" argument which is just a proxy for other flags 99 | # positional = [pos for pos in positional if pos.text.lower() != "options"] 100 | # The usage often starts with a re-iteration of the command name itself. Remove this if present 101 | for i in range(len(positional)): 102 | # For each positional argument, if the entire cmd string is present, slice away this and everything before it 103 | end = i + len(cmd) 104 | if end <= len(positional) and normalise_cline( 105 | [pos.text for pos in positional[i:end]] 106 | ) == normalise_cline(cmd): 107 | positional = positional[end:] 108 | 109 | if not any([tok for tok in positional if tok.variable]): 110 | # If the usage didn't explicitly mark anything as a variable using < > brackets, we have to assume that 111 | # everything other than flags are positional elements 112 | for element in positional: 113 | element.variable = True 114 | 115 | instances.append(instance) 116 | # Convert these UsageElements into Positionals 117 | all_positionals += [ 118 | Positional( 119 | description="", position=i, name=el.text, optional=el.optional 120 | ) 121 | for i, el in enumerate(positional) 122 | ] 123 | all_flags += flags 124 | 125 | return Command( 126 | command=cmd, 127 | positional=Positional.deduplicate(all_positionals), 128 | named=Flag.deduplicate(all_flags), 129 | ) 130 | -------------------------------------------------------------------------------- /aclimatise/yaml.py: -------------------------------------------------------------------------------- 1 | from ruamel.yaml import YAML, yaml_object 2 | from ruamel.yaml.comments import CommentedMap 3 | 4 | yaml = YAML() 5 | 6 | 7 | class AttrYamlMixin: 8 | @classmethod 9 | def from_yaml(cls, constructor, node): 10 | state = CommentedMap() 11 | constructor.construct_mapping(node, state) 12 | return cls(**state) 13 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | ../README.md -------------------------------------------------------------------------------- /docs/_static/railroad.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aCLImatise/CliHelpParser/f573543d61f2be487063d3517c199cf3a80cbe53/docs/_static/railroad.html -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | Python API 2 | ========== 3 | .. automodule:: aclimatise 4 | :members: parse_help, best_cmd, explore_command, execute_cmd 5 | :undoc-members: 6 | :show-inheritance: 7 | 8 | -------------------------------------------------------------------------------- /docs/cli.rst: -------------------------------------------------------------------------------- 1 | Command Line Interface 2 | ====================== 3 | 4 | .. click:: aclimatise.cli:main 5 | :prog: aclimatise 6 | :show-nested: 7 | :commands: explore, pipe 8 | 9 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = "aCLImatise" 21 | copyright = "2020, Michael Milton" 22 | author = "Michael Milton" 23 | 24 | # The full version, including alpha/beta/rc tags 25 | release = "0.0.16" 26 | 27 | 28 | # -- General configuration --------------------------------------------------- 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = [ 34 | "sphinx.ext.autodoc", 35 | "sphinx_click.ext", 36 | ] 37 | 38 | # Add any paths that contain templates here, relative to this directory. 39 | templates_path = ["_templates"] 40 | 41 | # List of patterns, relative to source directory, that match files and 42 | # directories to ignore when looking for source files. 43 | # This pattern also affects html_static_path and html_extra_path. 44 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 45 | 46 | 47 | # -- Options for HTML output ------------------------------------------------- 48 | 49 | # The theme to use for HTML and HTML Help pages. See the documentation for 50 | # a list of builtin themes. 51 | # 52 | html_theme = "alabaster" 53 | 54 | # Add any paths that contain custom static files (such as style sheets) here, 55 | # relative to this directory. They are copied after the builtin static files, 56 | # so a file named "default.css" will overwrite the builtin "default.css". 57 | html_static_path = ["_static"] 58 | -------------------------------------------------------------------------------- /docs/grammar.rst: -------------------------------------------------------------------------------- 1 | Grammar 2 | ======= 3 | 4 | Internally, aCLImatise uses a `Parsing Expression Grammar `_, 5 | which is a class of recursive grammar used to parse programming languages. This grammar is expressed and parsed using 6 | the `PyParsing `_ Python library. To help visualise the grammar used to parse 7 | command-line help, here is a `Railroad Diagram `_ generated using 8 | PyParsing. 9 | 10 | The "terminal" nodes (circular) are either: 11 | 12 | * In quotes, e.g. ``':'``, which indicates a literal string 13 | * In the form ``W:(start, body)``, e.g. ``W:(0-9@-Za-z, \--9@-Z\\_a-z|)``, which indicates a word where the first character comes from the ``start`` list of characters, and the remaining characters come from the ``body`` characters 14 | * In the form ``Re: pattern``, which indicates a regular expression pattern used to match this terminal 15 | * Whitespace nodes, e.g. ````, which list the types of whitespace being parsed by that terminal 16 | * Certain other special nodes like ``Empty``, and ``LineStart`` which match based on custom code. Where possible, these are annotated with what they are designed to match, for example ``UnIndent`` matches an unindent in the input file. 17 | 18 | The "non-terminal" nodes (square) refer to subsections of the diagram, which are spelled-out under the subheading with 19 | the same name. 20 | 21 | To read the diagram, start with ``FlagList``, the start node, and from there follow the lines along any branch of the path that goes forward (although some paths end up turning backwards to indicate loops). Any string that matches the sequence of tokens you encounter along that path will be parsed by the grammar. 22 | 23 | .. raw:: html 24 | :file: _static/railroad.html 25 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. aCLImatise documentation master file, created by 2 | sphinx-quickstart on Mon May 11 16:46:56 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to aCLImatise's documentation! 7 | ====================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 1 11 | :caption: Contents: 12 | 13 | installation 14 | api 15 | cli 16 | model 17 | grammar 18 | changes 19 | aCLImatise Base Camp 20 | 21 | .. include:: ../README.rst 22 | 23 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | To install ``aCLImatise``, run: 5 | 6 | .. code-block:: bash 7 | 8 | pip install aclimatise 9 | python -m spacy download en # Install an internal language model 10 | 11 | Now you can use either the :doc:`Python API ` or the :doc:`CLI `. 12 | 13 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/model.rst: -------------------------------------------------------------------------------- 1 | Data Model 2 | ========== 3 | 4 | Command 5 | ------- 6 | .. autoclass:: aclimatise.model.Command 7 | :members: 8 | 9 | Command Inputs 10 | -------------- 11 | .. autoclass:: aclimatise.model.CliArgument 12 | :members: 13 | .. autoclass:: aclimatise.model.Positional 14 | :members: 15 | .. autoclass:: aclimatise.model.Flag 16 | :members: 17 | .. autoclass:: aclimatise.model.FlagSynonym 18 | :members: 19 | 20 | Flag Arguments 21 | -------------- 22 | .. autoclass:: aclimatise.model.FlagArg 23 | :members: 24 | .. autoclass:: aclimatise.model.EmptyFlagArg 25 | :members: 26 | .. autoclass:: aclimatise.model.OptionalFlagArg 27 | :members: 28 | .. autoclass:: aclimatise.model.SimpleFlagArg 29 | :members: 30 | .. autoclass:: aclimatise.model.RepeatFlagArg 31 | :members: 32 | .. autoclass:: aclimatise.model.ChoiceFlagArg 33 | :members: 34 | 35 | Argument Types 36 | -------------- 37 | .. automodule:: aclimatise.cli_types 38 | :members: 39 | :undoc-members: 40 | :show-inheritance: 41 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: aclimatise-test 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | dependencies: 6 | - python>=3.7.5 7 | - bwa==0.7.17 8 | - samtools=1.9 9 | - bedtools==2.26.0 10 | - htseq==0.12.4 11 | - dinosaur==1.1.3 12 | - pisces==5.2.9.122 13 | - genomethreader==1.7.1 14 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = aclimatise 3 | version = 3.0.1 4 | description = aCLImatise is a Python library and command-line utility for parsing the help output of a command-line tool and then outputting a description of the tool in a more structured format 5 | long_description = file: README.rst 6 | long_description_content_type: text/x-rst 7 | license = GPLv3 8 | classifiers = 9 | License :: OSI Approved :: GNU General Public License v3 (GPLv3) 10 | Programming Language :: Python :: 3 11 | Programming Language :: Python :: 3.7 12 | Programming Language :: Python :: 3.8 13 | Intended Audience :: Developers 14 | Natural Language :: English 15 | 16 | [tool:pytest] 17 | log_level = INFO 18 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from setuptools import find_packages, setup 3 | 4 | setup( 5 | packages=find_packages(exclude="test"), 6 | install_requires=[ 7 | "pyparsing", 8 | "jinja2", 9 | "spacy~=3.0", 10 | "miniwdl", 11 | "wordsegment", 12 | "inflection", 13 | "illusional.wdlgen==0.3.0", 14 | "ruamel.yaml==0.16.5", 15 | "click", 16 | "cwltool", 17 | "cwl-utils>=0.4", 18 | "regex", 19 | "num2words", 20 | "word2number", 21 | "psutil", 22 | "deprecated", 23 | "attrs", 24 | "janis-pipelines.core>=0.11.2", 25 | ], 26 | python_requires=">=3.6", 27 | entry_points={"console_scripts": ["aclimatise = aclimatise.cli:main"]}, 28 | extras_require={ 29 | "dev": [ 30 | "pytest", 31 | "pre-commit", 32 | "Sphinx", 33 | "sphinx-click", 34 | "pytest-timeout", 35 | "docker", 36 | ], 37 | }, 38 | ) 39 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aCLImatise/CliHelpParser/f573543d61f2be487063d3517c199cf3a80cbe53/test/__init__.py -------------------------------------------------------------------------------- /test/conftest.py: -------------------------------------------------------------------------------- 1 | from textwrap import dedent 2 | 3 | import pytest 4 | from pkg_resources import resource_filename 5 | 6 | from aclimatise.converter.yml import YmlGenerator 7 | from aclimatise.execution.local import LocalExecutor 8 | from aclimatise.flag_parser.parser import CliParser 9 | from aclimatise.usage_parser.parser import UsageParser 10 | from aclimatise.yaml import yaml 11 | 12 | 13 | @pytest.fixture() 14 | def usage_parser(): 15 | return UsageParser() 16 | 17 | 18 | @pytest.fixture() 19 | def local_executor(): 20 | return LocalExecutor() 21 | 22 | 23 | @pytest.fixture() 24 | def yaml_converter(): 25 | return YmlGenerator() 26 | 27 | 28 | @pytest.fixture() 29 | def bedtools_cmd(): 30 | with open(resource_filename(__name__, "test_data/bedtools/bedtools.yml")) as fp: 31 | return yaml.load(fp) 32 | 33 | 34 | @pytest.fixture() 35 | def samtools_cmd(): 36 | with open(resource_filename(__name__, "test_data/samtools/samtools.yml")) as fp: 37 | return yaml.load(fp) 38 | 39 | 40 | @pytest.fixture 41 | def samtools_help(): 42 | with open(resource_filename(__name__, "test_data/samtools.txt")) as fp: 43 | return fp.read() 44 | 45 | 46 | @pytest.fixture 47 | def htseq_help(): 48 | with open(resource_filename(__name__, "test_data/htseq_count.txt")) as fp: 49 | return fp.read() 50 | 51 | 52 | @pytest.fixture 53 | def bwamem_help(): 54 | with open(resource_filename(__name__, "test_data/bwa_mem.txt")) as fp: 55 | return fp.read() 56 | 57 | 58 | @pytest.fixture 59 | def pisces_help(): 60 | with open(resource_filename(__name__, "test_data/pisces.txt")) as fp: 61 | return fp.read() 62 | 63 | 64 | @pytest.fixture 65 | def bwa_help(): 66 | with open(resource_filename(__name__, "test_data/bwa.txt")) as fp: 67 | return fp.read() 68 | 69 | 70 | @pytest.fixture 71 | def bwa_bwt2sa_help(): 72 | with open(resource_filename(__name__, "test_data/bwa_bwt2sa.txt")) as fp: 73 | return fp.read() 74 | 75 | 76 | @pytest.fixture 77 | def bedtools_help(): 78 | with open(resource_filename(__name__, "test_data/bedtools.txt")) as fp: 79 | return fp.read() 80 | 81 | 82 | @pytest.fixture 83 | def bedtools_coverage_help(): 84 | with open(resource_filename(__name__, "test_data/bedtools_coverage.txt")) as fp: 85 | return fp.read() 86 | 87 | 88 | @pytest.fixture 89 | def podchecker_help(): 90 | with open(resource_filename(__name__, "test_data/podchecker.txt")) as fp: 91 | return fp.read() 92 | 93 | 94 | @pytest.fixture() 95 | def process(): 96 | def process_help_section(help): 97 | """ 98 | Does some preprocessing on a help text segment to facilitate testing 99 | """ 100 | help = help.strip("\n") 101 | return dedent(help) 102 | 103 | return process_help_section 104 | -------------------------------------------------------------------------------- /test/executors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aCLImatise/CliHelpParser/f573543d61f2be487063d3517c199cf3a80cbe53/test/executors/__init__.py -------------------------------------------------------------------------------- /test/executors/test_docker.py: -------------------------------------------------------------------------------- 1 | import docker 2 | import pytest 3 | 4 | from aclimatise.execution.docker import DockerExecutor 5 | 6 | 7 | @pytest.mark.timeout(360) 8 | def test_docker_image_saved(bwamem_help): 9 | client = docker.from_env() 10 | container = client.containers.run( 11 | "biocontainers/bwa:v0.7.17_cv1", 12 | entrypoint=["sleep", "999999999"], 13 | detach=True, 14 | ) 15 | 16 | exec = DockerExecutor(container) 17 | cmd = exec.convert(["bwa", "mem"]) 18 | assert cmd.docker_image == "biocontainers/bwa:v0.7.17_cv1" 19 | 20 | 21 | def test_docker(bwamem_help): 22 | client = docker.from_env() 23 | container = client.containers.run( 24 | "biocontainers/bwa:v0.7.17_cv1", 25 | entrypoint=["sleep", "999999999"], 26 | detach=True, 27 | ) 28 | 29 | exec = DockerExecutor(container) 30 | output = exec.execute(["bwa", "mem"]) 31 | assert output == bwamem_help 32 | container.kill() 33 | 34 | 35 | @pytest.mark.timeout(360) 36 | def test_docker_kill(): 37 | """ 38 | Test that the DockerExecutor can kill the command if it times out 39 | """ 40 | client = docker.from_env(timeout=99999) 41 | container = client.containers.run( 42 | "ubuntu:latest", 43 | entrypoint=["sleep", "999999999"], 44 | detach=True, 45 | ) 46 | 47 | exec = DockerExecutor(container) 48 | output = exec.execute(["sleep", "999999"]) 49 | container.kill() 50 | assert output == "" 51 | 52 | 53 | def test_no_output(): 54 | # Check that it doesn't crash when no output is received 55 | 56 | client = docker.from_env() 57 | container = client.containers.run( 58 | "quay.io/biocontainers/gadem:1.3.1--h516909a_2", 59 | entrypoint=["sleep", "9999999"], 60 | detach=True, 61 | ) 62 | exec = DockerExecutor(container) 63 | output = exec.execute(["gadem"]) 64 | container.kill() 65 | assert output is not None 66 | 67 | 68 | @pytest.mark.timeout(360) 69 | def test_infinite_output(): 70 | """ 71 | Test that the DockerExecutor can kill the command if it's constantly producing output 72 | """ 73 | client = docker.from_env(timeout=99999) 74 | container = client.containers.run( 75 | "ubuntu:latest", 76 | entrypoint=["sleep", "999999999"], 77 | detach=True, 78 | ) 79 | 80 | exec = DockerExecutor(container) 81 | output = exec.execute(["yes"]) 82 | container.kill() 83 | assert output.startswith("y") 84 | -------------------------------------------------------------------------------- /test/executors/test_local.py: -------------------------------------------------------------------------------- 1 | from aclimatise.execution.local import LocalExecutor 2 | 3 | from ..util import skip_not_installed 4 | 5 | 6 | @skip_not_installed("bwa") 7 | def test_local(bwamem_help): 8 | exec = LocalExecutor() 9 | output = exec.execute(["bwa", "mem"]) 10 | assert output == bwamem_help 11 | -------------------------------------------------------------------------------- /test/executors/test_man.py: -------------------------------------------------------------------------------- 1 | from test.util import skip_not_installed 2 | 3 | from aclimatise.execution.man import ManPageExecutor 4 | 5 | 6 | @skip_not_installed("git") 7 | @skip_not_installed("man") 8 | def test_git(): 9 | cmd = ManPageExecutor(max_length=99999).explore( 10 | ["git"], 11 | ) 12 | assert len(cmd.positional) > 20 13 | 14 | 15 | @skip_not_installed("git") 16 | @skip_not_installed("ls") 17 | def test_ls(): 18 | cmd = ManPageExecutor().explore( 19 | ["ls"], 20 | ) 21 | assert {"-A", "--almost-all", "-1", "--context"} <= cmd.all_synonyms 22 | -------------------------------------------------------------------------------- /test/flags/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from aclimatise.flag_parser.parser import CliParser 4 | 5 | 6 | @pytest.fixture 7 | def parser(): 8 | return CliParser() 9 | -------------------------------------------------------------------------------- /test/flags/test_bedtools.py: -------------------------------------------------------------------------------- 1 | def test_bedtools_block(parser, process): 2 | txt = """ 3 | [ Multi-way file comparisons ] 4 | multiinter Identifies common intervals among multiple interval files. 5 | unionbedg Combines coverage intervals from multiple BEDGRAPH files. 6 | 7 | [ Paired-end manipulation ] 8 | """ 9 | blocks = parser.flags.searchString(txt) 10 | assert len(blocks) == 1, "This comprises only one block of flags" 11 | assert len(blocks[0]) == 2, "The single block contains 2 positional arguments" 12 | 13 | 14 | def test_bedtools_root(parser, bedtools_help): 15 | command = parser.parse_command(bedtools_help, ["bedtools"]) 16 | assert len(command.named) == 1 17 | assert len(command.positional) == 43 18 | -------------------------------------------------------------------------------- /test/flags/test_bwa.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from textwrap import dedent 3 | 4 | import pytest 5 | 6 | from aclimatise.flag_parser import elements 7 | from aclimatise.integration import parse_help 8 | from aclimatise.model import Flag, FlagSynonym, OptionalFlagArg 9 | 10 | 11 | def test_flag_arg(parser): 12 | result = elements.flag_with_arg.parseString("-A INT")[0] 13 | assert isinstance(result, FlagSynonym) 14 | assert result.argtype.name == "INT" 15 | assert result.name == "-A" 16 | 17 | 18 | def test_flag(parser): 19 | result = parser.flag.parseString( 20 | "-A INT score for a sequence match, which scales options -TdBOELU unless overridden [1]" 21 | )[0] 22 | assert isinstance(result, Flag) 23 | assert result.synonyms[0] == "-A" 24 | assert result.args.name == "INT" 25 | 26 | 27 | def test_flag_b(parser): 28 | result = parser.flag.parseString("-B INT penalty for a mismatch [4]") 29 | print(result) 30 | 31 | 32 | def test_multiarg_flag(parser): 33 | result = parser.flag.parseString( 34 | "-O INT[,INT] gap open penalties for deletions and insertions [6,6]" 35 | )[0] 36 | assert isinstance(result, Flag) 37 | 38 | 39 | def test_flags(parser): 40 | result = parser.flags.parseString( 41 | """ 42 | -A INT score for a sequence match, which scales options -TdBOELU unless overridden [1] 43 | -B INT penalty for a mismatch [4] 44 | """, 45 | parseAll=True, 46 | ) 47 | 48 | 49 | def test_bwa_segmented_options(parser): 50 | result = parser.flag_block.parseString( 51 | """ 52 | -A INT score for a sequence match, which scales options -TdBOELU unless overridden [1] 53 | -B INT penalty for a mismatch [4] 54 | -O INT[,INT] gap open penalties for deletions and insertions [6,6] 55 | -E INT[,INT] gap extension penalty; a gap of size k cost '{-O} + {-E}*k' [1,1] 56 | -L INT[,INT] penalty for 5'- and 3'-end clipping [5,5] 57 | -U INT penalty for an unpaired read pair [17] 58 | 59 | -x STR read type. Setting -x changes multiple parameters unless overriden [null] 60 | pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 (PacBio reads to ref) 61 | ont2d: -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to ref) 62 | intractg: -B9 -O16 -L5 (intra-species contigs to ref) 63 | """, 64 | parseAll=True, 65 | ) 66 | assert len(result) == 7 67 | 68 | 69 | def test_bwa_help_part(parser): 70 | results = list( 71 | parser.flags.scanString( 72 | """ 73 | Algorithm options: 74 | 75 | -t INT number of threads [1] 76 | -k INT minimum seed length [19] 77 | -w INT band width for banded alignment [100] 78 | -d INT off-diagonal X-dropoff [100] 79 | -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [1.5] 80 | -y INT seed occurrence for the 3rd round seeding [20] 81 | -c INT skip seeds with more than INT occurrences [500] 82 | -D FLOAT drop chains shorter than FLOAT fraction of the longest overlapping chain [0.50] 83 | -W INT discard a chain if seeded bases shorter than INT [0] 84 | -m INT perform at most INT rounds of mate rescues for each read [50] 85 | -S skip mate rescue 86 | -P skip pairing; mate rescue performed unless -S also in use 87 | """ 88 | ) 89 | ) 90 | assert len(results) == 1 91 | 92 | for tokens, start, end in results: 93 | assert len(tokens) == 12 94 | 95 | 96 | def test_bwa_multisection(parser): 97 | s = """ 98 | Scoring options: 99 | 100 | -x STR read type. Setting -x changes multiple parameters unless overriden [null] 101 | pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 (PacBio reads to ref) 102 | ont2d: -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to ref) 103 | intractg: -B9 -O16 -L5 (intra-species contigs to ref) 104 | 105 | Input/output options: 106 | 107 | -p smart pairing (ignoring in2.fq) 108 | """ 109 | result_lists = list(parser.flags.scanString(s)) 110 | assert len(result_lists) == 2 111 | for result_list, b, c in result_lists: 112 | assert len(result_list) == 1 113 | 114 | 115 | def test_complex_optionals(parser): 116 | s = """ 117 | -I FLOAT[,FLOAT[,INT[,INT]]] 118 | specify the mean, standard deviation (10% of the mean if absent), max 119 | (4 sigma from the mean if absent) and min of the insert size distribution. 120 | FR orientation only. [inferred] 121 | """ 122 | results = list(parser.flag_block.parseString(s))[0] 123 | assert isinstance(results, Flag) 124 | assert isinstance(results.args, OptionalFlagArg) 125 | assert results.args.names == ["FLOAT", "FLOAT", "INT", "INT"] 126 | 127 | 128 | def test_bwa_skipping(parser): 129 | s = """ 130 | Input/output options: 131 | 132 | -p smart pairing (ignoring in2.fq) 133 | -R STR read group header line such as '@RG\tID:foo\tSM:bar' [null] 134 | -H STR/FILE insert STR to header if it starts with @; or insert lines in FILE [null] 135 | -o FILE sam file to output results to [stdout] 136 | -j treat ALT contigs as part of the primary assembly (i.e. ignore .alt file) 137 | -5 for split alignment, take the alignment with the smallest coordinate as primary 138 | """ 139 | cmd = parser.parse_command(cmd=s, name=["bwa", "mem"]) 140 | assert len(cmd.named) == 6 141 | 142 | 143 | def test_bwa_root(bwa_help): 144 | command = parse_help(["bwa"], bwa_help) 145 | assert len(command.named) == 0 146 | assert len(command.positional) == 14 147 | assert command.positional[0].name == "index" 148 | assert command.positional[-1].name == "bwt2sa" 149 | 150 | 151 | def test_bwa(parser, bwamem_help): 152 | # Parse help 153 | command = parse_help(["bwa", "mem"], text=bwamem_help) 154 | 155 | assert len(command.named) == 36 156 | assert len(command.positional) == 3 157 | -------------------------------------------------------------------------------- /test/flags/test_bwakit.py: -------------------------------------------------------------------------------- 1 | def test_single_flag(parser): 2 | 3 | txt = """ 4 | --use_strict (enforce strict mode) 5 | type: bool default: false 6 | """ 7 | 8 | result = parser.flag_block.parseString(txt)[0] 9 | assert "type: bool" in result.description 10 | 11 | 12 | def test_multiple_flags(parser): 13 | 14 | txt = """ 15 | --use_strict (enforce strict mode) 16 | type: bool default: false 17 | --es5_readonly (activate correct semantics for inheriting readonliness) 18 | type: bool default: true 19 | """ 20 | 21 | result = parser.flag_block.setDebug().parseString(txt) 22 | assert len(result) == 2 23 | -------------------------------------------------------------------------------- /test/flags/test_gth.py: -------------------------------------------------------------------------------- 1 | from aclimatise.model import Flag 2 | 3 | 4 | def test_unindented_flags(parser): 5 | """ 6 | Verify that we can parse blocks of flags that aren't intended (which is unusual) 7 | """ 8 | 9 | text = """ 10 | -genomic specify input files containing genomic sequences 11 | mandatory option 12 | -cdna specify input files containing cDNA/EST sequences 13 | -protein specify input files containing protein sequences 14 | """.strip() 15 | flags = parser.flags.parseString(text) 16 | assert len(flags) == 3 17 | for flag in flags: 18 | assert isinstance(flag, Flag) 19 | -------------------------------------------------------------------------------- /test/flags/test_htseq.py: -------------------------------------------------------------------------------- 1 | """ 2 | Uses htseq-count, which is used as an example of a Python argparse CLI 3 | """ 4 | import shutil 5 | from textwrap import dedent 6 | 7 | import pytest 8 | 9 | from aclimatise.flag_parser import elements 10 | from aclimatise.model import EmptyFlagArg, FlagSynonym, RepeatFlagArg 11 | 12 | 13 | def test_short(parser): 14 | flag = elements.flag_with_arg.parseString( 15 | dedent( 16 | """ 17 | -i IDATTR 18 | """ 19 | ) 20 | )[0] 21 | assert isinstance(flag, FlagSynonym) 22 | 23 | 24 | def test_long_short_synonyms(parser): 25 | flag = elements.flag_synonyms.parseString( 26 | dedent( 27 | """ 28 | -i IDATTR, --idattr IDATTR 29 | """ 30 | ) 31 | )[0] 32 | print(flag) 33 | 34 | 35 | def test_long_short_desc(parser): 36 | flag = parser.flag_block.parseString( 37 | """ 38 | -i IDATTR, --idattr IDATTR 39 | GFF attribute to be used as feature ID (default, 40 | suitable for Ensembl GTF files: gene_id) 41 | """ 42 | )[0] 43 | print(flag) 44 | 45 | 46 | def test_long_short_choices(parser): 47 | flag = parser.flag_block.parseString( 48 | """ 49 | -m {union,intersection-strict,intersection-nonempty}, --mode {union,intersection-strict,intersection-nonempty} 50 | mode to handle reads overlapping more than one feature 51 | (choices: union, intersection-strict, intersection- 52 | nonempty; default: union) 53 | """ 54 | ) 55 | 56 | 57 | def test_help_section_preamble(parser): 58 | flags = list( 59 | parser.flags.searchString( 60 | dedent( 61 | """ 62 | optional arguments: 63 | -h, --help show this help message and exit 64 | -f {sam,bam}, --format {sam,bam} 65 | type of data, either 'sam' or 'bam' 66 | (default: sam) 67 | """ 68 | ) 69 | ) 70 | )[0] 71 | assert len(flags) == 2 72 | 73 | 74 | def test_repeat_type(parser): 75 | flag = elements.flag_synonyms.parseString( 76 | "--additional-attr ADDITIONAL_ATTR [ADDITIONAL_ATTR ...]" 77 | )[0] 78 | assert flag.name == "--additional-attr" 79 | assert isinstance(flag.argtype, RepeatFlagArg) 80 | assert flag.argtype.name == "ADDITIONAL_ATTR" 81 | 82 | 83 | def test_full_flags(parser): 84 | results = parser.flags.parseString( 85 | """ 86 | -h, --help show this help message and exit 87 | -f {sam,bam}, --format {sam,bam} 88 | type of data, either 'sam' or 'bam' 89 | (default: sam) 90 | -r {pos,name}, --order {pos,name} 91 | 'pos' or 'name'. Sorting order of 92 | (default: name). Paired-end sequencing data must be 93 | sorted either by position or by read name, and the 94 | sorting order must be specified. Ignored for single- 95 | end data. 96 | --max-reads-in-buffer MAX_BUFFER_SIZE 97 | When is paired end sorted by 98 | position, allow only so many reads to stay in memory 99 | until the mates are found (raising this number will 100 | use more memory). Has no effect for single end or 101 | paired end sorted by name 102 | -s {yes,no,reverse}, --stranded {yes,no,reverse} 103 | whether the data is from a strand-specific assay. 104 | Specify 'yes', 'no', or 'reverse' (default: yes). 105 | 'reverse' means 'yes' with reversed strand 106 | interpretation 107 | -a MINAQUAL, --minaqual MINAQUAL 108 | skip all reads with alignment quality lower than the 109 | given minimum value (default: 10) 110 | -t FEATURETYPE, --type FEATURETYPE 111 | feature type (3rd column in GFF file) to be used, all 112 | features of other type are ignored (default, suitable 113 | for Ensembl GTF files: exon) 114 | -i IDATTR, --idattr IDATTR 115 | GFF attribute to be used as feature ID (default, 116 | suitable for Ensembl GTF files: gene_id) 117 | --additional-attr ADDITIONAL_ATTR 118 | Additional feature attributes (default: none, suitable 119 | for Ensembl GTF files: gene_name). Use multiple times 120 | for each different attribute 121 | -m {union,intersection-strict,intersection-nonempty}, --mode {union,intersection-strict,intersection-nonempty} 122 | mode to handle reads overlapping more than one feature 123 | (choices: union, intersection-strict, intersection- 124 | nonempty; default: union) 125 | --nonunique {none,all} 126 | Whether to score reads that are not uniquely aligned 127 | or ambiguously assigned to features 128 | --secondary-alignments {score,ignore} 129 | Whether to score secondary alignments (0x100 flag) 130 | --supplementary-alignments {score,ignore} 131 | Whether to score supplementary alignments (0x800 flag) 132 | -o SAMOUTS, --samout SAMOUTS 133 | write out all SAM alignment records into SAM files 134 | (one per input file needed), annotating each line with 135 | its feature assignment (as an optional field with tag 136 | 'XF') 137 | -q, --quiet suppress progress report 138 | """ 139 | ) 140 | assert len(list(results)) == 15 141 | 142 | 143 | def test_choice(parser): 144 | flag = elements.flag_with_arg.parseString("--format {sam,bam}")[0] 145 | assert flag.name == "--format" 146 | 147 | # Both sets should be the same 148 | assert len(flag.argtype.choices & {"sam", "bam"}) == 2 149 | 150 | 151 | def test_noarg(parser): 152 | flag = parser.flag.parseString("-q, --quiet suppress progress report")[0] 153 | assert flag.longest_synonym == "--quiet" 154 | assert len(flag.synonyms) == 2 155 | assert isinstance(flag.args, EmptyFlagArg) 156 | 157 | 158 | @pytest.mark.skipif( 159 | not shutil.which("htseq-count"), reason="htseq-count is not installed" 160 | ) 161 | def test_full(parser, local_executor): 162 | # Parse help 163 | help_text = local_executor.execute(["htseq-count", "--help"]) 164 | flag_sections = parser.flags.searchString(help_text) 165 | # There is one section for positional arguments and one for named arguments 166 | assert len(flag_sections) == 2 167 | # There are two positional arguments 168 | assert len(flag_sections[0]) == 2 169 | # There are at least 15 flags 170 | assert len(flag_sections[1]) >= 15 171 | -------------------------------------------------------------------------------- /test/flags/test_pisces.py: -------------------------------------------------------------------------------- 1 | from textwrap import dedent 2 | 3 | from pkg_resources import resource_filename 4 | 5 | from aclimatise.flag_parser import elements 6 | from aclimatise.flag_parser.parser import CliParser 7 | from aclimatise.model import SimpleFlagArg 8 | 9 | 10 | def test_pisces_flag(parser): 11 | cmd = """ 12 | --targetlodfrequency, --targetvf 13 | """ 14 | flag_synonyms = elements.flag_synonyms.parseString(cmd) 15 | # There is one section for positional arguments and one for named arguments 16 | assert len(flag_synonyms) == 2 17 | assert isinstance(flag_synonyms[1].argtype, SimpleFlagArg) 18 | assert flag_synonyms[1].argtype.name == "FLOAT" 19 | 20 | 21 | def test_pisces_arg(parser): 22 | cmd = """ 23 | --targetlodfrequency, --targetvf 24 | FLOAT Target Frequency to call a variant. Ie, to 25 | target a 5% allele frequency, we must call down 26 | to 2.6%, to capture that 5% allele 95% of the 27 | time. This parameter is used by the Somatic 28 | Genotyping Model 29 | """ 30 | flag = parser.flag_block.parseString(cmd)[0] 31 | 32 | assert len(flag.synonyms) == 2 33 | assert flag.description.startswith("FLOAT Target Frequency") 34 | assert flag.args.name == "FLOAT" 35 | 36 | 37 | def test_pisces_arg_2(parser): 38 | cmd = """ 39 | --vqfilter, --variantqualityfilter 40 | INT FilteredVariantQScore to report variant as 41 | filtered 42 | """ 43 | flag = parser.flag_block.parseString(cmd)[0] 44 | 45 | assert len(flag.synonyms) == 2 46 | assert flag.description.startswith("INT FilteredVariantQScore ") 47 | assert flag.args.name == "INT" 48 | 49 | 50 | def test_pisces_indent_dedent(parser): 51 | cmd = """ 52 | -i, --intervalpaths 53 | PATHS IntervalPath(s), single value or comma 54 | delimited list corresponding to BAMPath(s). At 55 | most one value should be provided if BAM folder 56 | is specified 57 | --coveragemethod 58 | STRING'approximate' or 'exact'. Exact is more 59 | precise but requires more memory (minimum 8 GB). 60 | Default approximate 61 | --baselogname STRING 62 | -d, --debug BOOL 63 | --usestitchedxd BOOL Set to true to make use of the consensus 64 | read-direction information (the XD tag) from 65 | stitched reads. This is on by default when using 66 | Stitcher output bam, but must be deliberately 67 | set for Gemini output. 68 | """ 69 | flags = parser.flag_block.parseString(cmd) 70 | 71 | assert len(flags) == 5 72 | 73 | assert isinstance(flags[0].args, SimpleFlagArg) 74 | assert flags[0].synonyms == ["-i", "--intervalpaths"] 75 | 76 | assert isinstance(flags[3].args, SimpleFlagArg) 77 | assert flags[3].synonyms == ["-d", "--debug"] 78 | assert flags[3].description == "BOOL" 79 | 80 | 81 | def test_pisces_triple_long_flag_synonyms(parser): 82 | cmd = "--minvf, --minimumvariantfrequency, --minimumfrequency " 83 | synonyms = elements.flag_synonyms.parseString(cmd) 84 | 85 | assert len(synonyms) == 3 86 | 87 | 88 | def test_pisces_triple_long_flag(parser): 89 | cmd = """ 90 | --minvf, --minimumvariantfrequency, --minimumfrequency 91 | FLOAT MinimumFrequency to call a variant 92 | """ 93 | flag = parser.flag_block.parseString(cmd)[0] 94 | 95 | assert len(flag.synonyms) == 3 96 | assert flag.description.startswith("FLOAT MinimumFrequency") 97 | 98 | 99 | def test_pisces_quad_flag_synonyms(parser): 100 | cmd = "-c, --mindp, --mindepth, --mincoverage " 101 | synonyms = elements.flag_synonyms.parseString(cmd) 102 | 103 | assert len(synonyms) == 4 104 | 105 | 106 | def test_pisces_quad_flag(parser): 107 | cmd = """ 108 | -c, --mindp, --mindepth, --mincoverage 109 | INT Minimum depth to call a variant 110 | """ 111 | flag = parser.flag_block.parseString(cmd)[0] 112 | 113 | assert len(flag.synonyms) == 4 114 | assert flag.description.startswith("INT Minimum") 115 | 116 | 117 | def test_pisces_multi_indent(parser): 118 | cmd = """ 119 | --minvq, --minvariantqscore 120 | INT MinimumVariantQScore to report variant 121 | -c, --mindp, --mindepth, --mincoverage 122 | INT Minimum depth to call a variant 123 | --minvf, --minimumvariantfrequency, --minimumfrequency 124 | FLOAT MinimumFrequency to call a variant 125 | --targetlodfrequency, --targetvf 126 | FLOAT Target Frequency to call a variant. Ie, to 127 | target a 5% allele frequency, we must call down 128 | to 2.6%, to capture that 5% allele 95% of the 129 | time. This parameter is used by the Somatic 130 | Genotyping Model 131 | --vqfilter, --variantqualityfilter 132 | INT FilteredVariantQScore to report variant as 133 | filtered 134 | 135 | """ 136 | flags = parser.flags.parseString(cmd) 137 | 138 | assert len(flags) == 5 139 | 140 | 141 | def test_pisces(parser, pisces_help): 142 | # Parse help 143 | flag_sections = parser.flags.searchString(pisces_help) 144 | # There is one section for positional arguments and one for named arguments 145 | assert len(flag_sections) == 5 146 | 147 | # There are two arguments in the first block 148 | assert len(flag_sections[0]) == 2 149 | 150 | # There are 23 arguments in the second block 151 | assert len(flag_sections[1]) == 24 152 | 153 | # There are 4 arguments in the third block 154 | assert len(flag_sections[2]) == 4 155 | 156 | # There are 23 arguments in the fourth block 157 | assert len(flag_sections[3]) == 23 158 | 159 | # There are 6 arguments in the fifth block 160 | assert len(flag_sections[4]) == 6 161 | 162 | # The very first argument has 3 synonyms 163 | assert len(flag_sections[0][0].synonyms) == 3 164 | -------------------------------------------------------------------------------- /test/flags/test_podchecker.py: -------------------------------------------------------------------------------- 1 | from aclimatise.model import Flag 2 | 3 | 4 | def test_podchecker_flags(parser): 5 | cmd = """ 6 | -warnings -nowarnings 7 | Turn on/off printing of warnings. Repeating -warnings increases 8 | the warning level, i.e. more warnings are printed. Currently 9 | increasing to level two causes flagging of unescaped "<,>" 10 | characters. 11 | """ 12 | flag = parser.flag_block.parseString(cmd) 13 | assert isinstance(flag[0], Flag) 14 | assert len(flag[0].synonyms) == 2 15 | 16 | 17 | def test_podchecker(podchecker_help, parser): 18 | cmd = """ 19 | Options and Arguments: 20 | -help Print a brief help message and exit. 21 | 22 | -man Print the manual page and exit. 23 | 24 | -warnings -nowarnings 25 | Turn on/off printing of warnings. Repeating -warnings increases 26 | the warning level, i.e. more warnings are printed. Currently 27 | increasing to level two causes flagging of unescaped "<,>" 28 | characters. 29 | 30 | file The pathname of a POD file to syntax-check (defaults to standard 31 | input). 32 | """ 33 | flags = parser.flags.searchString(cmd)[0] 34 | assert len(flags) == 4 35 | -------------------------------------------------------------------------------- /test/flags/test_samtools.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | 3 | import pytest 4 | 5 | from aclimatise.model import Flag 6 | 7 | 8 | def test_samtools_bedcov_j(parser): 9 | text = """ 10 | -j do not include deletions (D) and ref skips (N) in bedcov computation 11 | """ 12 | flag = parser.flag_block.parseString(text)[0] 13 | assert isinstance(flag, Flag) 14 | assert flag.synonyms[0] == "-j" 15 | 16 | 17 | def test_samtools_bedcov_qjfmt(parser): 18 | text = """ 19 | -Q mapping quality threshold [0] 20 | -j do not include deletions (D) and ref skips (N) in bedcov computation 21 | --input-fmt-option OPT[=VAL] 22 | Specify a single input file format option in the form 23 | of OPTION or OPTION=VALUE 24 | """ 25 | flags = list(parser.flags.setDebug().searchString(text)[0]) 26 | assert len(flags) == 3 27 | 28 | 29 | def test_samtools(parser, samtools_help): 30 | # Parse the root samtools command 31 | samtools = parser.parse_command(name=["samtools"], cmd=samtools_help) 32 | assert len(samtools.named) == 0 33 | assert len(samtools.positional) > 25 34 | 35 | 36 | @pytest.mark.skipif(not shutil.which("samtools"), reason="samtools is not installed") 37 | def test_samtools_index(parser, local_executor): 38 | # Parse help 39 | help_text = local_executor.execute(["samtools", "index"]) 40 | flag_sections = parser.flags.searchString(help_text) 41 | # There is one section for positional arguments and one for named arguments 42 | assert len(flag_sections) == 1 43 | # There are two positional arguments 44 | assert len(flag_sections[0]) == 4 45 | -------------------------------------------------------------------------------- /test/flags/test_singularity.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | 3 | import pytest 4 | 5 | from aclimatise.flag_parser.parser import CliParser 6 | 7 | 8 | def test_singularity_style_flags(parser): 9 | flag = parser.flag_block.parseString( 10 | " -n|--name Specify a custom container name (first priority)" 11 | )[0] 12 | assert len(flag.synonyms) == 2 13 | assert flag.synonyms == ["-n", "--name"] 14 | 15 | 16 | @pytest.mark.skipif( 17 | not shutil.which("singularity"), reason="singularity is not installed" 18 | ) 19 | def test_singularity_pull(local_executor): 20 | parser = CliParser(parse_positionals=False) 21 | 22 | # Parse help 23 | help_text = local_executor.execute(["singularity", "pull", "--help"]) 24 | flag_sections = parser.flags.searchString(help_text) 25 | # There is one section for positional arguments and one for named arguments 26 | assert len(flag_sections) == 1 27 | # There are two positional arguments 28 | assert len(flag_sections[0]) >= 5 29 | -------------------------------------------------------------------------------- /test/name_generation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aCLImatise/CliHelpParser/f573543d61f2be487063d3517c199cf3a80cbe53/test/name_generation/__init__.py -------------------------------------------------------------------------------- /test/name_generation/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from aclimatise.converter import WrapperGenerator 4 | 5 | 6 | @pytest.fixture() 7 | def snake_gen(): 8 | return WrapperGenerator(case="snake", generate_names=True) 9 | 10 | 11 | @pytest.fixture() 12 | def camel_gen(): 13 | return WrapperGenerator(case="camel", generate_names=True) 14 | 15 | 16 | @pytest.fixture() 17 | def gen(): 18 | return WrapperGenerator() 19 | -------------------------------------------------------------------------------- /test/name_generation/test_batch.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test all the test data files 3 | """ 4 | import pytest 5 | from pkg_resources import resource_filename 6 | 7 | from aclimatise import WrapperGenerator, parse_help 8 | 9 | from ..util import HelpText, all_tests, convert_validate, validate_cwl, validate_wdl 10 | 11 | 12 | @pytest.mark.parametrize("test", all_tests) 13 | def test_all(test: HelpText): 14 | """ 15 | Tests that generate_names can work on real-life Commands without exceeding reasonable system resources 16 | """ 17 | with open(resource_filename("test", test.path)) as fp: 18 | help_text = fp.read() 19 | 20 | cmd = parse_help(test.cmd, help_text) 21 | 22 | WrapperGenerator().choose_variable_names([*cmd.positional, *cmd.named]) 23 | -------------------------------------------------------------------------------- /test/name_generation/test_case.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test the casing (snake_case vs camelCase) used by the converters 3 | """ 4 | import pytest 5 | 6 | from aclimatise.converter import WrapperGenerator 7 | from aclimatise.model import EmptyFlagArg, Flag 8 | 9 | 10 | def test_camel_short(camel_gen): 11 | flag = Flag( 12 | synonyms=["-t"], description="number of threads [1]", args=EmptyFlagArg() 13 | ) 14 | names = camel_gen.choose_variable_names([flag], length=3) 15 | assert names[0].name == "numberOfThreads" 16 | 17 | 18 | def test_snake_short(snake_gen): 19 | flag = Flag( 20 | synonyms=["-t"], description="number of threads [1]", args=EmptyFlagArg() 21 | ) 22 | names = snake_gen.choose_variable_names([flag], length=2) 23 | assert "number" in names[0].name 24 | assert "threads" in names[0].name 25 | 26 | 27 | def test_camel_long(camel_gen): 28 | flag = Flag( 29 | synonyms=["-g", "--genomepaths", "--genomefolders"], 30 | description="number of threads [1]", 31 | args=EmptyFlagArg(), 32 | ) 33 | names = camel_gen.choose_variable_names([flag], length=2) 34 | assert names[0].name == "genomeFolders" 35 | 36 | 37 | def test_snake_long(snake_gen): 38 | flag = Flag( 39 | synonyms=["-g", "--genomepaths", "--genomefolders"], 40 | description="number of threads [1]", 41 | args=EmptyFlagArg(), 42 | ) 43 | names = snake_gen.choose_variable_names([flag], length=2) 44 | assert names[0].name == "genome_folders" 45 | -------------------------------------------------------------------------------- /test/name_generation/test_description.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests the generate_name function, which converts a paragraph of text into a variable name 3 | """ 4 | from aclimatise.name_generation import generate_name, preprocess 5 | 6 | 7 | def test_bwa_mem_t(): 8 | name = next(generate_name(preprocess("number of threads [1]"))) 9 | assert len(name) < 5 10 | assert "number" in name 11 | assert "threads" in name 12 | 13 | 14 | def test_bwa_mem_p(): 15 | name = next(generate_name(preprocess("smart pairing (ignoring in2.fq)"))) 16 | assert len(name) <= 3 17 | assert "smart" in name 18 | assert "pairing" in name 19 | 20 | 21 | def test_bwa_mem_r(): 22 | name = next( 23 | generate_name( 24 | preprocess("read group header line such as '@RG\tID:foo\tSM:bar' [null]") 25 | ) 26 | ) 27 | assert len(name) < 5 28 | assert "read" in name 29 | # assert 'header' in name 30 | 31 | 32 | def test_bwa_mem_i(): 33 | name = next( 34 | generate_name( 35 | preprocess( 36 | "specify the mean, standard deviation (10% of the mean if absent), max (4 sigma from the mean if absent) and min of the insert size distribution. FR orientation only. [inferred]" 37 | ) 38 | ) 39 | ) 40 | assert len(name) < 5 41 | assert "specify" in name 42 | 43 | # Ideally this would return "mean" first, but the POS engine thinks that "mean" describes "deviation" 44 | # assert "mean" in name 45 | assert "deviation" in name 46 | 47 | 48 | def test_bedtools_coverage_d(): 49 | name = next( 50 | generate_name( 51 | preprocess( 52 | "Report the depth at each position in each A feature. Positions reported are one based. Each position and depth follow the complete A feature." 53 | ) 54 | ) 55 | ) 56 | assert len(name) < 5 57 | assert "report" in name 58 | assert "depth" in name 59 | 60 | 61 | def test_bedtools_coverage_s(): 62 | name = next( 63 | generate_name( 64 | preprocess( 65 | "Require same strandedness. That is, only report hits in B that overlap A on the _same_ strand. By default, overlaps are reported without respect to strand" 66 | ) 67 | ) 68 | ) 69 | assert len(name) < 5 70 | assert "require" in name 71 | assert "strandedness" in name 72 | 73 | 74 | def test_bedtools_coverage_g(): 75 | name = next( 76 | generate_name( 77 | preprocess( 78 | "Provide a genome file to enforce consistent chromosome sort order across input files. Only applies when used with -sorted option." 79 | ) 80 | ) 81 | ) 82 | assert len(name) < 5 83 | assert "provide" in name 84 | assert "file" in name 85 | 86 | 87 | def test_symbol(): 88 | """ 89 | Check that symbols are correctly removed from the output 90 | """ 91 | name = next(generate_name(preprocess("/genome@ #file$"))) 92 | assert len(name) < 5 93 | assert "genome" in name 94 | assert "file" in name 95 | 96 | 97 | def test_hyphens(): 98 | name = next(generate_name(preprocess("penalty for 5'- and 3'-end clipping [5,5]"))) 99 | assert len(name) < 5 100 | assert "penalty" in name 101 | 102 | for word in name: 103 | assert "-" not in word 104 | assert "[" not in word 105 | assert "," not in word 106 | -------------------------------------------------------------------------------- /test/name_generation/test_group.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests certain groups of flags used together 3 | """ 4 | from aclimatise.converter import WrapperGenerator 5 | from aclimatise.model import EmptyFlagArg, Flag, SimpleFlagArg 6 | 7 | 8 | def test_bedtools_window_sm(): 9 | """ 10 | These two flags have almost the same name, and almost the same description 11 | """ 12 | flags = [ 13 | Flag( 14 | synonyms=["-sm"], 15 | description="Only report hits in B that overlap A on the _same_ strand.", 16 | args=EmptyFlagArg(), 17 | ), 18 | Flag( 19 | synonyms=["-sm"], 20 | description="Only report hits in B that overlap A on the _opposite_ strand.", 21 | args=EmptyFlagArg(), 22 | ), 23 | Flag( 24 | synonyms=["-c"], 25 | description="For each entry in A, report the number of overlaps with B.", 26 | args=EmptyFlagArg(), 27 | ), 28 | ] 29 | args = WrapperGenerator().choose_variable_names(flags) 30 | assert len(set([arg.name for arg in args])) == 3 31 | 32 | 33 | def test_same_description(): 34 | """ 35 | Normally we ignore one-character flag names, and instead try to read their descriptions for a more informative name. 36 | However, if the descriptions are identical to each other, we have to fall back to the description 37 | """ 38 | flags = [ 39 | Flag( 40 | synonyms=["-a"], 41 | description="Makes the program do a certain thing", 42 | args=EmptyFlagArg(), 43 | ), 44 | Flag( 45 | synonyms=["-b"], 46 | description="Makes the program do a certain thing", 47 | args=EmptyFlagArg(), 48 | ), 49 | ] 50 | names = WrapperGenerator().choose_variable_names(flags) 51 | assert names[0].name == "a" 52 | assert names[1].name == "b" 53 | 54 | 55 | def test_same_arg(): 56 | """ 57 | Normally we ignore one-character flag names, and instead try to read their descriptions for a more informative name. 58 | However, if the descriptions are identical to each other, we have to fall back to the description 59 | """ 60 | flags = [ 61 | Flag(synonyms=["-a"], description="", args=SimpleFlagArg("SomeThing")), 62 | Flag(synonyms=["-b"], description="", args=SimpleFlagArg("SomeThing")), 63 | ] 64 | names = WrapperGenerator().choose_variable_names(flags) 65 | assert names[0].name == "a" 66 | assert names[1].name == "b" 67 | -------------------------------------------------------------------------------- /test/name_generation/test_single_flag.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for the name generation of single flags 3 | """ 4 | from aclimatise.converter.wdl import WdlGenerator 5 | from aclimatise.model import EmptyFlagArg, Flag, Positional, SimpleFlagArg 6 | 7 | 8 | def test_samtools_dict_output(): 9 | gen = WdlGenerator() 10 | arg = Flag( 11 | synonyms=["-o", "--output"], 12 | description="file to write out dict file [stdout]", 13 | args=SimpleFlagArg(name="str"), 14 | ) 15 | name = gen.choose_variable_names([arg])[0].name 16 | # The WDL converter should avoid naming a variable "output" since that's a WDL keyword 17 | assert name != "output" 18 | 19 | # Also, since we have a description, the generator shouldn't choose the lazy option of var_output 20 | assert name != "var_output" 21 | 22 | 23 | def test_samtools_quickcheck_output(): 24 | gen = WdlGenerator() 25 | arg = Positional(description="", position=0, name="input") 26 | name = gen.choose_variable_names([arg])[0].name 27 | # The WDL converter should avoid naming a variable "output" since that's a WDL keyword 28 | assert name != "input" 29 | 30 | 31 | def test_bwt2sa_i(gen): 32 | arg = Flag(synonyms=["-i"], description="", args=SimpleFlagArg(name="32")) 33 | 34 | name = gen.choose_variable_names([arg])[0].name 35 | # 32 isn't a valid variable name, so the only option here is to use the letter i 36 | assert name == "i" 37 | 38 | 39 | def test_name_to_words_symbol(gen): 40 | """ 41 | Check that we can get an argument name even if the argument's flag is a symbol 42 | """ 43 | arg = Flag( 44 | synonyms=["-@"], 45 | description="Number of additional threads to use", 46 | args=EmptyFlagArg(), 47 | ) 48 | 49 | name = gen.choose_variable_names([arg])[0].name 50 | assert name == "at" 51 | 52 | 53 | def test_name_to_words(gen): 54 | """ 55 | Check that we can get an argument name even if the argument's flag is a symbol 56 | """ 57 | arg = Flag( 58 | synonyms=["--genomepaths"], 59 | description="", 60 | args=EmptyFlagArg(), 61 | ) 62 | 63 | name = gen.choose_variable_names([arg])[0].name 64 | assert "genome" in name 65 | assert "paths" in name 66 | # assert list(arg._name_from_name()) == ["genome", "paths"] 67 | 68 | 69 | def test_bwa_mem_infq(gen): 70 | arg = Positional(name="in1.fq", description="", position=0) 71 | name = gen.choose_variable_names([arg])[0].name 72 | # name = arg.variable_name([]) 73 | assert "1" in name or "one" in name 74 | assert "in" in name 75 | assert "fq" in name 76 | -------------------------------------------------------------------------------- /test/test_cli.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import tempfile 3 | import traceback 4 | from pathlib import Path 5 | 6 | import pyparsing 7 | import pytest 8 | from click.testing import CliRunner 9 | from packaging import version 10 | 11 | from aclimatise.cli import main 12 | from aclimatise.yaml import yaml 13 | 14 | from .util import skip_not_installed, validate_cwl, validate_janis, validate_wdl 15 | 16 | 17 | @pytest.fixture() 18 | def runner(): 19 | return CliRunner() 20 | 21 | 22 | def cli_worked(result): 23 | if result.exit_code == 0: 24 | return True 25 | else: 26 | traceback.print_exception(*result.exc_info) 27 | assert result.exit_code == 0 28 | 29 | 30 | def test_pipe_wdl(runner, htseq_help): 31 | result = runner.invoke( 32 | main, ["pipe", "htseq-count", "--format", "wdl"], input=htseq_help 33 | ) 34 | cli_worked(result) 35 | validate_wdl(result.output) 36 | 37 | 38 | def test_pipe_cwl(runner, htseq_help): 39 | result = runner.invoke( 40 | main, ["pipe", "htseq-count", "--format", "cwl"], input=htseq_help 41 | ) 42 | cli_worked(result) 43 | validate_cwl(result.output) 44 | 45 | 46 | def test_pipe_janis(runner, htseq_help): 47 | result = runner.invoke( 48 | main, ["pipe", "htseq-count", "--format", "janis"], input=htseq_help 49 | ) 50 | cli_worked(result) 51 | validate_janis(result.output) 52 | 53 | 54 | @skip_not_installed("htseq-count") 55 | def test_explore_htseq(runner, caplog): 56 | caplog.set_level(100000) 57 | with tempfile.TemporaryDirectory() as tempdir: 58 | result = runner.invoke(main, ["explore", "htseq-count", "--out-dir", tempdir]) 59 | cli_worked(result) 60 | assert len(list(Path(tempdir).iterdir())) == 3 61 | 62 | 63 | @skip_not_installed("ls") 64 | @skip_not_installed("man") 65 | def test_explore_ls_man(runner, caplog): 66 | caplog.set_level(100000) 67 | with tempfile.TemporaryDirectory() as tempdir: 68 | result = runner.invoke(main, ["explore", "ls", "--man", "--out-dir", tempdir]) 69 | cli_worked(result) 70 | with (Path(tempdir) / "ls.yml").open() as fp: 71 | parsed = yaml.load(fp) 72 | assert parsed.help_text.startswith("LS(1)") 73 | 74 | 75 | @skip_not_installed("samtools") 76 | def test_explore_samtools(runner, caplog): 77 | caplog.set_level(100000) 78 | with tempfile.TemporaryDirectory() as tempdir: 79 | result = runner.invoke( 80 | main, ["explore", "samtools", "--help-flag", "--help", "--out-dir", tempdir] 81 | ) 82 | cli_worked(result) 83 | assert len(list(Path(tempdir).iterdir())) > 20 84 | 85 | 86 | @skip_not_installed("samtools") 87 | def test_explore_samtools_no_subcommands(runner, caplog): 88 | caplog.set_level(100000) 89 | with tempfile.TemporaryDirectory() as tempdir: 90 | result = runner.invoke( 91 | main, 92 | [ 93 | "explore", 94 | "samtools", 95 | "--no-subcommands", 96 | "--out-dir", 97 | tempdir, 98 | ], 99 | ) 100 | cli_worked(result) 101 | # Since we aren't looking at subcommands, there should be one file for each format 102 | assert len(list(Path(tempdir).iterdir())) >= 3 103 | 104 | 105 | @pytest.mark.skipif( 106 | version.parse(pyparsing.__version__) < version.parse("3.0.0a2"), 107 | reason="PyParsing 3.0.0+ is not installed", 108 | ) 109 | def test_grammar(runner): 110 | result = runner.invoke(main, ["grammar"]) 111 | assert result.exit_code == 0 112 | assert len(result.output) > 20 113 | -------------------------------------------------------------------------------- /test/test_convert.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import tempfile 3 | from pathlib import Path 4 | 5 | import pytest 6 | from WDL import parse_document 7 | 8 | from aclimatise import explore_command 9 | from aclimatise.converter.cwl import CwlGenerator 10 | from aclimatise.converter.wdl import WdlGenerator 11 | from aclimatise.model import CliArgument, Flag, SimpleFlagArg 12 | from aclimatise.yaml import yaml 13 | 14 | from .util import convert_validate, skip_not_installed 15 | 16 | # Note: the parse and explore tests run conversion tests already. These tests are for specific edge cases 17 | 18 | 19 | def test_premade_samtools(samtools_cmd): 20 | """ 21 | Use a command tree that was generated beforehand, to quickly detect issues relating to the conversion of command 22 | trees 23 | """ 24 | convert_validate(samtools_cmd, explore=True) 25 | 26 | 27 | def test_premade_bedtools(bedtools_cmd): 28 | """ 29 | Use a command tree that was generated beforehand, to quickly detect issues relating to the conversion of command 30 | trees 31 | """ 32 | convert_validate(bedtools_cmd, explore=True) 33 | 34 | 35 | @skip_not_installed("samtools") 36 | @skip_not_installed("samtools.pl") 37 | def test_explore_samtools_pl(yaml_converter): 38 | """ 39 | Tests that commands with a non-standard file extension include their extension in the final output, and don't 40 | override another command with the same stem 41 | """ 42 | samtools = explore_command(["samtools"], max_depth=0) 43 | samtools_pl = explore_command(["samtools.pl"], max_depth=0) 44 | with tempfile.TemporaryDirectory() as temp_dir: 45 | path = Path(temp_dir) 46 | filenames = set() 47 | for path, command in itertools.chain( 48 | yaml_converter.generate_tree(samtools, temp_dir), 49 | yaml_converter.generate_tree(samtools_pl, temp_dir), 50 | ): 51 | filenames.add(path.name) 52 | 53 | assert filenames == {"samtools.yml", "samtools.pl.yml"} 54 | 55 | 56 | def test_docker_conversion(bedtools_cmd): 57 | intersect = bedtools_cmd["intersect"] 58 | container = "quay.io/biocontainers/bedtools:2.29.2--hc088bd4_0" 59 | intersect.docker_image = container 60 | with tempfile.NamedTemporaryFile() as cwl_file: 61 | CwlGenerator().save_to_file(intersect, path=Path(cwl_file.name)) 62 | cwl_file.seek(0) 63 | parsed_cwl = yaml.load(cwl_file) 64 | assert any( 65 | [ 66 | hint["class"] == "DockerRequirement" and hint["dockerPull"] == container 67 | for hint in parsed_cwl["hints"] 68 | ] 69 | ) 70 | 71 | wdl = WdlGenerator().save_to_string(intersect) 72 | parsed_wdl = parse_document(wdl).tasks[0] 73 | assert parsed_wdl.runtime["docker"].literal.value == container 74 | 75 | 76 | @pytest.mark.parametrize( 77 | "flag,cwltype,wdltype", 78 | [ 79 | [ 80 | Flag( 81 | synonyms=["--some-flag"], 82 | optional=True, 83 | args=SimpleFlagArg("string"), 84 | description="", 85 | ), 86 | "string?", 87 | "String?", 88 | ], 89 | [ 90 | Flag( 91 | synonyms=["--some-flag"], 92 | optional=False, 93 | args=SimpleFlagArg("string"), 94 | description="", 95 | ), 96 | "string", 97 | "String", 98 | ], 99 | ], 100 | ) 101 | def test_types_conversion(flag: CliArgument, cwltype: str, wdltype: str): 102 | """ 103 | Test that types are being correctly translated from aCLImatise types to CWL and WDL 104 | """ 105 | assert CwlGenerator.arg_to_cwl_type(flag) == cwltype 106 | assert ( 107 | WdlGenerator.type_to_wdl(flag.get_type(), optional=flag.optional).get_string() 108 | == wdltype 109 | ) 110 | -------------------------------------------------------------------------------- /test/test_data/bedtools.txt: -------------------------------------------------------------------------------- 1 | bedtools: flexible tools for genome arithmetic and DNA sequence analysis. 2 | usage: bedtools [options] 3 | 4 | The bedtools sub-commands include: 5 | 6 | [ Genome arithmetic ] 7 | intersect Find overlapping intervals in various ways. 8 | window Find overlapping intervals within a window around an interval. 9 | closest Find the closest, potentially non-overlapping interval. 10 | coverage Compute the coverage over defined intervals. 11 | map Apply a function to a column for each overlapping interval. 12 | genomecov Compute the coverage over an entire genome. 13 | merge Combine overlapping/nearby intervals into a single interval. 14 | cluster Cluster (but don't merge) overlapping/nearby intervals. 15 | complement Extract intervals _not_ represented by an interval file. 16 | shift Adjust the position of intervals. 17 | subtract Remove intervals based on overlaps b/w two files. 18 | slop Adjust the size of intervals. 19 | flank Create new intervals from the flanks of existing intervals. 20 | sort Order the intervals in a file. 21 | random Generate random intervals in a genome. 22 | shuffle Randomly redistrubute intervals in a genome. 23 | sample Sample random records from file using reservoir sampling. 24 | spacing Report the gap lengths between intervals in a file. 25 | annotate Annotate coverage of features from multiple files. 26 | 27 | [ Multi-way file comparisons ] 28 | multiinter Identifies common intervals among multiple interval files. 29 | unionbedg Combines coverage intervals from multiple BEDGRAPH files. 30 | 31 | [ Paired-end manipulation ] 32 | pairtobed Find pairs that overlap intervals in various ways. 33 | pairtopair Find pairs that overlap other pairs in various ways. 34 | 35 | [ Format conversion ] 36 | bamtobed Convert BAM alignments to BED (& other) formats. 37 | bedtobam Convert intervals to BAM records. 38 | bamtofastq Convert BAM records to FASTQ records. 39 | bedpetobam Convert BEDPE intervals to BAM records. 40 | bed12tobed6 Breaks BED12 intervals into discrete BED6 intervals. 41 | 42 | [ Fasta manipulation ] 43 | getfasta Use intervals to extract sequences from a FASTA file. 44 | maskfasta Use intervals to mask sequences from a FASTA file. 45 | nuc Profile the nucleotide content of intervals in a FASTA file. 46 | 47 | [ BAM focused tools ] 48 | multicov Counts coverage from multiple BAMs at specific intervals. 49 | tag Tag BAM alignments based on overlaps with interval files. 50 | 51 | [ Statistical relationships ] 52 | jaccard Calculate the Jaccard statistic b/w two sets of intervals. 53 | reldist Calculate the distribution of relative distances b/w two files. 54 | fisher Calculate Fisher statistic b/w two feature files. 55 | 56 | [ Miscellaneous tools ] 57 | overlap Computes the amount of overlap from two intervals. 58 | igv Create an IGV snapshot batch script. 59 | links Create a HTML page of links to UCSC locations. 60 | makewindows Make interval "windows" across a genome. 61 | groupby Group by common cols. & summarize oth. cols. (~ SQL "groupBy") 62 | expand Replicate lines based on lists of values in columns. 63 | split Split a file into multiple files with equal records or base pairs. 64 | 65 | [ General help ] 66 | --help Print this help menu. 67 | --version What version of bedtools are you using?. 68 | --contact Feature requests, bugs, mailing lists, etc. 69 | 70 | -------------------------------------------------------------------------------- /test/test_data/bedtools_closest.txt: -------------------------------------------------------------------------------- 1 | 2 | Tool: bedtools closest (aka closestBed) 3 | Version: v2.26.0 4 | Summary: For each feature in A, finds the closest 5 | feature (upstream or downstream) in B. 6 | 7 | Usage: bedtools closest [OPTIONS] -a -b 8 | 9 | Options: 10 | -d In addition to the closest feature in B, 11 | report its distance to A as an extra column. 12 | - The reported distance for overlapping features will be 0. 13 | 14 | -D Like -d, report the closest feature in B, and its distance to A 15 | as an extra column. Unlike -d, use negative distances to report 16 | upstream features. 17 | The options for defining which orientation is "upstream" are: 18 | - "ref" Report distance with respect to the reference genome. 19 | B features with a lower (start, stop) are upstream 20 | - "a" Report distance with respect to A. 21 | When A is on the - strand, "upstream" means B has a 22 | higher (start,stop). 23 | - "b" Report distance with respect to B. 24 | When B is on the - strand, "upstream" means A has a 25 | higher (start,stop). 26 | 27 | -io Ignore features in B that overlap A. That is, we want close, 28 | yet not touching features only. 29 | 30 | -iu Ignore features in B that are upstream of features in A. 31 | This option requires -D and follows its orientation 32 | rules for determining what is "upstream". 33 | 34 | -id Ignore features in B that are downstream of features in A. 35 | This option requires -D and follows its orientation 36 | rules for determining what is "downstream". 37 | 38 | -fu Choose first from features in B that are upstream of features in A. 39 | This option requires -D and follows its orientation 40 | rules for determining what is "upstream". 41 | 42 | -fd Choose first from features in B that are downstream of features in A. 43 | This option requires -D and follows its orientation 44 | rules for determining what is "downstream". 45 | 46 | -t How ties for closest feature are handled. This occurs when two 47 | features in B have exactly the same "closeness" with A. 48 | By default, all such features in B are reported. 49 | Here are all the options: 50 | - "all" Report all ties (default). 51 | - "first" Report the first tie that occurred in the B file. 52 | - "last" Report the last tie that occurred in the B file. 53 | 54 | -mdb How multiple databases are resolved. 55 | - "each" Report closest records for each database (default). 56 | - "all" Report closest records among all databases. 57 | 58 | -k Report the k closest hits. Default is 1. If tieMode = "all", 59 | - all ties will still be reported. 60 | 61 | -N Require that the query and the closest hit have different names. 62 | For BED, the 4th column is compared. 63 | 64 | -s Require same strandedness. That is, only report hits in B 65 | that overlap A on the _same_ strand. 66 | - By default, overlaps are reported without respect to strand. 67 | 68 | -S Require different strandedness. That is, only report hits in B 69 | that overlap A on the _opposite_ strand. 70 | - By default, overlaps are reported without respect to strand. 71 | 72 | -f Minimum overlap required as a fraction of A. 73 | - Default is 1E-9 (i.e., 1bp). 74 | - FLOAT (e.g. 0.50) 75 | 76 | -F Minimum overlap required as a fraction of B. 77 | - Default is 1E-9 (i.e., 1bp). 78 | - FLOAT (e.g. 0.50) 79 | 80 | -r Require that the fraction overlap be reciprocal for A AND B. 81 | - In other words, if -f is 0.90 and -r is used, this requires 82 | that B overlap 90% of A and A _also_ overlaps 90% of B. 83 | 84 | -e Require that the minimum fraction be satisfied for A OR B. 85 | - In other words, if -e is used with -f 0.90 and -F 0.10 this requires 86 | that either 90% of A is covered OR 10% of B is covered. 87 | Without -e, both fractions would have to be satisfied. 88 | 89 | -split Treat "split" BAM or BED12 entries as distinct BED intervals. 90 | 91 | -g Provide a genome file to enforce consistent chromosome sort order 92 | across input files. Only applies when used with -sorted option. 93 | 94 | -nonamecheck For sorted data, don't throw an error if the file has different naming conventions 95 | for the same chromosome. ex. "chr1" vs "chr01". 96 | 97 | -names When using multiple databases, provide an alias for each that 98 | will appear instead of a fileId when also printing the DB record. 99 | 100 | -filenames When using multiple databases, show each complete filename 101 | instead of a fileId when also printing the DB record. 102 | 103 | -sortout When using multiple databases, sort the output DB hits 104 | for each record. 105 | 106 | -bed If using BAM input, write output as BED. 107 | 108 | -header Print the header from the A file prior to results. 109 | 110 | -nobuf Disable buffered output. Using this option will cause each line 111 | of output to be printed as it is generated, rather than saved 112 | in a buffer. This will make printing large output files 113 | noticeably slower, but can be useful in conjunction with 114 | other software tools and scripts that need to process one 115 | line of bedtools output at a time. 116 | 117 | -iobuf Specify amount of memory to use for input buffer. 118 | Takes an integer argument. Optional suffixes K/M/G supported. 119 | Note: currently has no effect with compressed files. 120 | 121 | Notes: 122 | Reports "none" for chrom and "-1" for all other fields when a feature 123 | is not found in B on the same chromosome as the feature in A. 124 | E.g. none -1 -1 125 | 126 | -------------------------------------------------------------------------------- /test/test_data/bedtools_coverage.txt: -------------------------------------------------------------------------------- 1 | Tool: bedtools coverage (aka coverageBed) 2 | Version: v2.26.0 3 | Summary: Returns the depth and breadth of coverage of features from B 4 | on the intervals in A. 5 | 6 | Usage: bedtools coverage [OPTIONS] -a -b 7 | 8 | Options: 9 | -hist Report a histogram of coverage for each feature in A 10 | as well as a summary histogram for _all_ features in A. 11 | 12 | Output (tab delimited) after each feature in A: 13 | 1) depth 14 | 2) # bases at depth 15 | 3) size of A 16 | 4) % of A at depth 17 | 18 | -d Report the depth at each position in each A feature. 19 | Positions reported are one based. Each position 20 | and depth follow the complete A feature. 21 | 22 | -counts Only report the count of overlaps, don't compute fraction, etc. 23 | 24 | -mean Report the mean depth of all positions in each A feature. 25 | 26 | -s Require same strandedness. That is, only report hits in B 27 | that overlap A on the _same_ strand. 28 | - By default, overlaps are reported without respect to strand. 29 | 30 | -S Require different strandedness. That is, only report hits in B 31 | that overlap A on the _opposite_ strand. 32 | - By default, overlaps are reported without respect to strand. 33 | 34 | -f Minimum overlap required as a fraction of A. 35 | - Default is 1E-9 (i.e., 1bp). 36 | - FLOAT (e.g. 0.50) 37 | 38 | -F Minimum overlap required as a fraction of B. 39 | - Default is 1E-9 (i.e., 1bp). 40 | - FLOAT (e.g. 0.50) 41 | 42 | -r Require that the fraction overlap be reciprocal for A AND B. 43 | - In other words, if -f is 0.90 and -r is used, this requires 44 | that B overlap 90% of A and A _also_ overlaps 90% of B. 45 | 46 | -e Require that the minimum fraction be satisfied for A OR B. 47 | - In other words, if -e is used with -f 0.90 and -F 0.10 this requires 48 | that either 90% of A is covered OR 10% of B is covered. 49 | Without -e, both fractions would have to be satisfied. 50 | 51 | -split Treat "split" BAM or BED12 entries as distinct BED intervals. 52 | 53 | -g Provide a genome file to enforce consistent chromosome sort order 54 | across input files. Only applies when used with -sorted option. 55 | 56 | -nonamecheck For sorted data, don't throw an error if the file has different naming conventions 57 | for the same chromosome. ex. "chr1" vs "chr01". 58 | 59 | -sorted Use the "chromsweep" algorithm for sorted (-k1,1 -k2,2n) input. 60 | 61 | -bed If using BAM input, write output as BED. 62 | 63 | -header Print the header from the A file prior to results. 64 | 65 | -nobuf Disable buffered output. Using this option will cause each line 66 | of output to be printed as it is generated, rather than saved 67 | in a buffer. This will make printing large output files 68 | noticeably slower, but can be useful in conjunction with 69 | other software tools and scripts that need to process one 70 | line of bedtools output at a time. 71 | 72 | -iobuf Specify amount of memory to use for input buffer. 73 | Takes an integer argument. Optional suffixes K/M/G supported. 74 | Note: currently has no effect with compressed files. 75 | 76 | Default Output: 77 | After each entry in A, reports: 78 | 1) The number of features in B that overlapped the A interval. 79 | 2) The number of bases in A that had non-zero coverage. 80 | 3) The length of the entry in A. 81 | 4) The fraction of bases in A that had non-zero coverage. 82 | 83 | -------------------------------------------------------------------------------- /test/test_data/bedtools_multiinter.txt: -------------------------------------------------------------------------------- 1 | 2 | Tool: bedtools multiinter (aka multiIntersectBed) 3 | Version: v2.26.0 4 | Summary: Identifies common intervals among multiple 5 | BED/GFF/VCF files. 6 | 7 | Usage: bedtools multiinter [OPTIONS] -i FILE1 FILE2 .. FILEn 8 | Requires that each interval file is sorted by chrom/start. 9 | 10 | Options: 11 | -cluster Invoke Ryan Layers's clustering algorithm. 12 | 13 | -header Print a header line. 14 | (chrom/start/end + names of each file). 15 | 16 | -names A list of names (one/file) to describe each file in -i. 17 | These names will be printed in the header line. 18 | 19 | -g Use genome file to calculate empty regions. 20 | - STRING. 21 | 22 | -empty Report empty regions (i.e., start/end intervals w/o 23 | values in all files). 24 | - Requires the '-g FILE' parameter. 25 | 26 | -filler TEXT Use TEXT when representing intervals having no value. 27 | - Default is '0', but you can use 'N/A' or any text. 28 | 29 | -examples Show detailed usage examples. 30 | 31 | -------------------------------------------------------------------------------- /test/test_data/bedtools_random.txt: -------------------------------------------------------------------------------- 1 | 2 | ***** 3 | *****ERROR: Need a genome (-g) file. 4 | ***** 5 | 6 | Tool: bedtools random (aka randomBed) 7 | Version: v2.26.0 8 | Summary: Generate random intervals among a genome. 9 | 10 | Usage: bedtools random [OPTIONS] -g 11 | 12 | Options: 13 | -l The length of the intervals to generate. 14 | - Default = 100. 15 | - (INTEGER) 16 | 17 | -n The number of intervals to generate. 18 | - Default = 1,000,000. 19 | - (INTEGER) 20 | 21 | -seed Supply an integer seed for the shuffling. 22 | - By default, the seed is chosen automatically. 23 | - (INTEGER) 24 | 25 | Notes: 26 | (1) The genome file should tab delimited and structured as follows: 27 | 28 | 29 | For example, Human (hg19): 30 | chr1 249250621 31 | chr2 243199373 32 | ... 33 | chr18_gl000207_random 4262 34 | 35 | Tips: 36 | One can use the UCSC Genome Browser's MySQL database to extract 37 | chromosome sizes. For example, H. sapiens: 38 | 39 | mysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \ 40 | "select chrom, size from hg19.chromInfo" > hg19.genome 41 | 42 | -------------------------------------------------------------------------------- /test/test_data/bedtools_spacing.txt: -------------------------------------------------------------------------------- 1 | 2 | Tool: bedtools spacing 3 | Version: v2.26.0 4 | Summary: Report (last col.) the gap lengths between intervals in a file. 5 | 6 | Usage: bedtools spacing [OPTIONS] -i 7 | 8 | Notes: 9 | (1) Input must be sorted by chrom,start (sort -k1,1 -k2,2n for BED). 10 | (2) The 1st element for each chrom will have NULL distance. ("."). 11 | (3) Distance for overlapping intervaks is -1 and bookended is 0. 12 | 13 | Example: 14 | $ cat test.bed 15 | chr1 0 10 16 | chr1 10 20 17 | chr1 21 30 18 | chr1 35 45 19 | chr1 100 200 20 | 21 | $ bedtools spacing -i test.bed 22 | chr1 0 10 . 23 | chr1 10 20 0 24 | chr1 21 30 1 25 | chr1 35 45 5 26 | chr1 100 200 55 27 | 28 | -bed If using BAM input, write output as BED. 29 | 30 | -header Print the header from the A file prior to results. 31 | 32 | -nobuf Disable buffered output. Using this option will cause each line 33 | of output to be printed as it is generated, rather than saved 34 | in a buffer. This will make printing large output files 35 | noticeably slower, but can be useful in conjunction with 36 | other software tools and scripts that need to process one 37 | line of bedtools output at a time. 38 | 39 | -iobuf Specify amount of memory to use for input buffer. 40 | Takes an integer argument. Optional suffixes K/M/G supported. 41 | Note: currently has no effect with compressed files. 42 | 43 | -------------------------------------------------------------------------------- /test/test_data/bedtools_subtract.txt: -------------------------------------------------------------------------------- 1 | 2 | Tool: bedtools subtract (aka subtractBed) 3 | Version: v2.26.0 4 | Summary: Removes the portion(s) of an interval that is overlapped 5 | by another feature(s). 6 | 7 | Usage: bedtools subtract [OPTIONS] -a -b 8 | 9 | Options: 10 | -A Remove entire feature if any overlap. That is, by default, 11 | only subtract the portion of A that overlaps B. Here, if 12 | any overlap is found (or -f amount), the entire feature is removed. 13 | 14 | -N Same as -A except when used with -f, the amount is the sum 15 | of all features (not any single feature). 16 | 17 | -wb Write the original entry in B for each overlap. 18 | - Useful for knowing _what_ A overlaps. Restricted by -f and -r. 19 | 20 | -wo Write the original A and B entries plus the number of base 21 | pairs of overlap between the two features. 22 | - Overlaps restricted by -f and -r. 23 | Only A features with overlap are reported. 24 | 25 | -s Require same strandedness. That is, only report hits in B 26 | that overlap A on the _same_ strand. 27 | - By default, overlaps are reported without respect to strand. 28 | 29 | -S Require different strandedness. That is, only report hits in B 30 | that overlap A on the _opposite_ strand. 31 | - By default, overlaps are reported without respect to strand. 32 | 33 | -f Minimum overlap required as a fraction of A. 34 | - Default is 1E-9 (i.e., 1bp). 35 | - FLOAT (e.g. 0.50) 36 | 37 | -F Minimum overlap required as a fraction of B. 38 | - Default is 1E-9 (i.e., 1bp). 39 | - FLOAT (e.g. 0.50) 40 | 41 | -r Require that the fraction overlap be reciprocal for A AND B. 42 | - In other words, if -f is 0.90 and -r is used, this requires 43 | that B overlap 90% of A and A _also_ overlaps 90% of B. 44 | 45 | -e Require that the minimum fraction be satisfied for A OR B. 46 | - In other words, if -e is used with -f 0.90 and -F 0.10 this requires 47 | that either 90% of A is covered OR 10% of B is covered. 48 | Without -e, both fractions would have to be satisfied. 49 | 50 | -split Treat "split" BAM or BED12 entries as distinct BED intervals. 51 | 52 | -g Provide a genome file to enforce consistent chromosome sort order 53 | across input files. Only applies when used with -sorted option. 54 | 55 | -nonamecheck For sorted data, don't throw an error if the file has different naming conventions 56 | for the same chromosome. ex. "chr1" vs "chr01". 57 | 58 | -sorted Use the "chromsweep" algorithm for sorted (-k1,1 -k2,2n) input. 59 | 60 | -bed If using BAM input, write output as BED. 61 | 62 | -header Print the header from the A file prior to results. 63 | 64 | -nobuf Disable buffered output. Using this option will cause each line 65 | of output to be printed as it is generated, rather than saved 66 | in a buffer. This will make printing large output files 67 | noticeably slower, but can be useful in conjunction with 68 | other software tools and scripts that need to process one 69 | line of bedtools output at a time. 70 | 71 | -iobuf Specify amount of memory to use for input buffer. 72 | Takes an integer argument. Optional suffixes K/M/G supported. 73 | Note: currently has no effect with compressed files. 74 | 75 | -------------------------------------------------------------------------------- /test/test_data/bedtools_window.txt: -------------------------------------------------------------------------------- 1 | 2 | Tool: bedtools window (aka windowBed) 3 | Version: v2.26.0 4 | Summary: Examines a "window" around each feature in A and 5 | reports all features in B that overlap the window. For each 6 | overlap the entire entry in A and B are reported. 7 | 8 | Usage: bedtools window [OPTIONS] -a -b 9 | 10 | Options: 11 | -abam The A input file is in BAM format. Output will be BAM as well. Replaces -a. 12 | 13 | -ubam Write uncompressed BAM output. Default writes compressed BAM. 14 | 15 | -bed When using BAM input (-abam), write output as BED. The default 16 | is to write output in BAM when using -abam. 17 | 18 | -w Base pairs added upstream and downstream of each entry 19 | in A when searching for overlaps in B. 20 | - Creates symterical "windows" around A. 21 | - Default is 1000 bp. 22 | - (INTEGER) 23 | 24 | -l Base pairs added upstream (left of) of each entry 25 | in A when searching for overlaps in B. 26 | - Allows one to define assymterical "windows". 27 | - Default is 1000 bp. 28 | - (INTEGER) 29 | 30 | -r Base pairs added downstream (right of) of each entry 31 | in A when searching for overlaps in B. 32 | - Allows one to define assymterical "windows". 33 | - Default is 1000 bp. 34 | - (INTEGER) 35 | 36 | -sw Define -l and -r based on strand. For example if used, -l 500 37 | for a negative-stranded feature will add 500 bp downstream. 38 | - Default = disabled. 39 | 40 | -sm Only report hits in B that overlap A on the _same_ strand. 41 | - By default, overlaps are reported without respect to strand. 42 | 43 | -Sm Only report hits in B that overlap A on the _opposite_ strand. 44 | - By default, overlaps are reported without respect to strand. 45 | 46 | -u Write the original A entry _once_ if _any_ overlaps found in B. 47 | - In other words, just report the fact >=1 hit was found. 48 | 49 | -c For each entry in A, report the number of overlaps with B. 50 | - Reports 0 for A entries that have no overlap with B. 51 | - Overlaps restricted by -f. 52 | 53 | -v Only report those entries in A that have _no overlaps_ with B. 54 | - Similar to "grep -v." 55 | 56 | -header Print the header from the A file prior to results. 57 | 58 | -------------------------------------------------------------------------------- /test/test_data/bowtie2_build.txt: -------------------------------------------------------------------------------- 1 | Bowtie 2 version 2.3.5.1 by Ben Langmead (langmea@cs.jhu.edu, www.cs.jhu.edu/~langmea) 2 | Usage: bowtie2-build [options]* 3 | reference_in comma-separated list of files with ref sequences 4 | bt2_index_base write bt2 data to files with this dir/basename 5 | *** Bowtie 2 indexes work only with v2 (not v1). Likewise for v1 indexes. *** 6 | Options: 7 | -f reference files are Fasta (default) 8 | -c reference sequences given on cmd line (as 9 | ) 10 | --large-index force generated index to be 'large', even if ref 11 | has fewer than 4 billion nucleotides 12 | --debug use the debug binary; slower, assertions enabled 13 | --sanitized use sanitized binary; slower, uses ASan and/or UBSan 14 | --verbose log the issued command 15 | -a/--noauto disable automatic -p/--bmax/--dcv memory-fitting 16 | -p/--packed use packed strings internally; slower, less memory 17 | --bmax max bucket sz for blockwise suffix-array builder 18 | --bmaxdivn max bucket sz as divisor of ref len (default: 4) 19 | --dcv diff-cover period for blockwise (default: 1024) 20 | --nodc disable diff-cover (algorithm becomes quadratic) 21 | -r/--noref don't build .3/.4 index files 22 | -3/--justref just build .3/.4 index files 23 | -o/--offrate SA is sampled every 2^ BWT chars (default: 5) 24 | -t/--ftabchars # of chars consumed in initial lookup (default: 10) 25 | --threads # of threads 26 | --seed seed for random number generator 27 | -q/--quiet verbose output (for debugging) 28 | -h/--help print detailed description of tool and its options 29 | --usage print this usage message 30 | --version print version information and quit 31 | -------------------------------------------------------------------------------- /test/test_data/bwa.txt: -------------------------------------------------------------------------------- 1 | 2 | Program: bwa (alignment via Burrows-Wheeler transformation) 3 | Version: 0.7.17-r1188 4 | Contact: Heng Li 5 | 6 | Usage: bwa [options] 7 | 8 | Command: index index sequences in the FASTA format 9 | mem BWA-MEM algorithm 10 | fastmap identify super-maximal exact matches 11 | pemerge merge overlapping paired ends (EXPERIMENTAL) 12 | aln gapped/ungapped alignment 13 | samse generate alignment (single ended) 14 | sampe generate alignment (paired ended) 15 | bwasw BWA-SW for long queries 16 | 17 | shm manage indices in shared memory 18 | fa2pac convert FASTA to PAC format 19 | pac2bwt generate BWT from PAC 20 | pac2bwtgen alternative algorithm for generating BWT 21 | bwtupdate update .bwt to the new format 22 | bwt2sa generate SA from BWT and Occ 23 | 24 | Note: To use BWA, you need to first index the genome with `bwa index'. 25 | There are three alignment algorithms in BWA: `mem', `bwasw', and 26 | `aln/samse/sampe'. If you are not sure which to use, try `bwa mem' 27 | first. Please `man ./bwa.1' for the manual. 28 | 29 | -------------------------------------------------------------------------------- /test/test_data/bwa_bwt2sa.txt: -------------------------------------------------------------------------------- 1 | Usage: bwa bwt2sa [-i 32] 2 | -------------------------------------------------------------------------------- /test/test_data/bwa_bwtupdate.txt: -------------------------------------------------------------------------------- 1 | Usage: bwa bwtupdate 2 | -------------------------------------------------------------------------------- /test/test_data/bwa_index.txt: -------------------------------------------------------------------------------- 1 | 2 | Usage: bwa index [options] 3 | 4 | Options: -a STR BWT construction algorithm: bwtsw, is or rb2 [auto] 5 | -p STR prefix of the index [same as fasta name] 6 | -b INT block size for the bwtsw algorithm (effective with -a bwtsw) [10000000] 7 | -6 index files named as .64.* instead of .* 8 | 9 | Warning: `-a bwtsw' does not work for short genomes, while `-a is' and 10 | `-a div' do not work not for long genomes. 11 | 12 | -------------------------------------------------------------------------------- /test/test_data/bwa_mem.txt: -------------------------------------------------------------------------------- 1 | 2 | Usage: bwa mem [options] [in2.fq] 3 | 4 | Algorithm options: 5 | 6 | -t INT number of threads [1] 7 | -k INT minimum seed length [19] 8 | -w INT band width for banded alignment [100] 9 | -d INT off-diagonal X-dropoff [100] 10 | -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [1.5] 11 | -y INT seed occurrence for the 3rd round seeding [20] 12 | -c INT skip seeds with more than INT occurrences [500] 13 | -D FLOAT drop chains shorter than FLOAT fraction of the longest overlapping chain [0.50] 14 | -W INT discard a chain if seeded bases shorter than INT [0] 15 | -m INT perform at most INT rounds of mate rescues for each read [50] 16 | -S skip mate rescue 17 | -P skip pairing; mate rescue performed unless -S also in use 18 | 19 | Scoring options: 20 | 21 | -A INT score for a sequence match, which scales options -TdBOELU unless overridden [1] 22 | -B INT penalty for a mismatch [4] 23 | -O INT[,INT] gap open penalties for deletions and insertions [6,6] 24 | -E INT[,INT] gap extension penalty; a gap of size k cost '{-O} + {-E}*k' [1,1] 25 | -L INT[,INT] penalty for 5'- and 3'-end clipping [5,5] 26 | -U INT penalty for an unpaired read pair [17] 27 | 28 | -x STR read type. Setting -x changes multiple parameters unless overridden [null] 29 | pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0 (PacBio reads to ref) 30 | ont2d: -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0 (Oxford Nanopore 2D-reads to ref) 31 | intractg: -B9 -O16 -L5 (intra-species contigs to ref) 32 | 33 | Input/output options: 34 | 35 | -p smart pairing (ignoring in2.fq) 36 | -R STR read group header line such as '@RG\tID:foo\tSM:bar' [null] 37 | -H STR/FILE insert STR to header if it starts with @; or insert lines in FILE [null] 38 | -o FILE sam file to output results to [stdout] 39 | -j treat ALT contigs as part of the primary assembly (i.e. ignore .alt file) 40 | -5 for split alignment, take the alignment with the smallest coordinate as primary 41 | -q don't modify mapQ of supplementary alignments 42 | -K INT process INT input bases in each batch regardless of nThreads (for reproducibility) [] 43 | 44 | -v INT verbosity level: 1=error, 2=warning, 3=message, 4+=debugging [3] 45 | -T INT minimum score to output [30] 46 | -h INT[,INT] if there are 80% of the max score, output all in XA [5,200] 47 | -a output all alignments for SE or unpaired PE 48 | -C append FASTA/FASTQ comment to SAM output 49 | -V output the reference FASTA header in the XR tag 50 | -Y use soft clipping for supplementary alignments 51 | -M mark shorter split hits as secondary 52 | 53 | -I FLOAT[,FLOAT[,INT[,INT]]] 54 | specify the mean, standard deviation (10% of the mean if absent), max 55 | (4 sigma from the mean if absent) and min of the insert size distribution. 56 | FR orientation only. [inferred] 57 | 58 | Note: Please read the man page for detailed description of the command line and options. 59 | 60 | -------------------------------------------------------------------------------- /test/test_data/dinosaur.txt: -------------------------------------------------------------------------------- 1 | usage: 2 | > java -jar Dinosaur-1.1.3.jar [OPTIONS] mzML 3 | OPTIONS: 4 | PARAMETER DEFAULT DESCRIPTION 5 | advHelp false set to output adv param file help and quit 6 | advParams path to adv param file 7 | concurrency 2 the number of assays to analyze in parallel 8 | force false ignore missing mzML params 9 | maxCharge 6 max searched ion charge 10 | minCharge 1 min searched ion charge 11 | mode global analysis mode: global or target. Global mode reports all isotope patterns, targeted only those matching targets. 12 | mzML - The shotgun MzML file to analyze 13 | nReport 10 number of random assay to export control figure for 14 | outDir output directory (by default same as input mzML) 15 | outName basename for output files (by default same as input mzML) 16 | profiling false set to enable CPU profiling 17 | reportDeisoMzHeight 15.0 mz range in deisotoper reports 18 | reportHighRes false generate high-resolution plot trail when supported (for print) 19 | reportSeed -1 seed to use for report assay selection (<0 means random) 20 | reportTargets false set to create a special report figure for each target 21 | targetPreference rt if multiple isotope patterns fit target, take the closest rt apex (rt) or the most intense (intensity) 22 | targets path to isotope patterns target file (not used by default) 23 | verbose false increase details in output 24 | writeBinary false set to output binary MSFeatureProtocol file 25 | writeHills false set to output csv file with all hills assigned to isotope patterns 26 | writeMsInspect false set to output MsInspect feature csv file 27 | writeQuantML false set to output mzQuantML file 28 | zipQcFolder false set to zip the entire qc folder on algorithm completion 29 | 30 | Not enough arguments! 31 | -------------------------------------------------------------------------------- /test/test_data/gth.txt: -------------------------------------------------------------------------------- 1 | Usage: gth [option ...] -genomic file [...] -cdna file [...] -protein file [...] 2 | Compute similarity-based gene structure predictions (spliced alignments) 3 | using cDNA/EST and/or protein sequences and assemble the resulting spliced 4 | alignments to consensus spliced alignments. 5 | 6 | -genomic specify input files containing genomic sequences 7 | mandatory option 8 | -cdna specify input files containing cDNA/EST sequences 9 | -protein specify input files containing protein sequences 10 | -species specify species to select splice site model which is most 11 | appropriate; possible species: 12 | "human" 13 | "mouse" 14 | "rat" 15 | "chicken" 16 | "drosophila" 17 | "nematode" 18 | "fission_yeast" 19 | "aspergillus" 20 | "arabidopsis" 21 | "maize" 22 | "rice" 23 | "medicago" 24 | default: undefined 25 | -bssm read bssm parameter from file in the path given by the 26 | environment variable BSSMDIR 27 | default: undefined 28 | -scorematrix read amino acid substitution scoring matrix from file in the 29 | path given by the environment variable GTHDATADIR 30 | default: BLOSUM62 31 | -translationtable set the codon translation table used for codon translation in 32 | matching, DP, and output 33 | default: 1 34 | -f analyze only forward strand of genomic sequences 35 | default: no 36 | -r analyze only reverse strand of genomic sequences 37 | default: no 38 | -cdnaforward align only forward strand of cDNAs 39 | default: no 40 | -frompos analyze genomic sequence from this position 41 | requires -topos or -width; counting from 1 on 42 | default: 0 43 | -topos analyze genomic sequence to this position 44 | requires -frompos; counting from 1 on 45 | default: 0 46 | -width analyze only this width of genomic sequence 47 | requires -frompos 48 | default: 0 49 | -v be verbose 50 | default: no 51 | -xmlout show output in XML format 52 | default: no 53 | -gff3out show output in GFF3 format 54 | default: no 55 | -md5ids show MD5 fingerprints as sequence IDs 56 | default: no 57 | -o redirect output to specified file 58 | default: undefined 59 | -gzip write gzip compressed output file 60 | default: no 61 | -bzip2 write bzip2 compressed output file 62 | default: no 63 | -force force writing to output file 64 | default: no 65 | -gs2out output in old GeneSeqer2 format 66 | default: no 67 | -minmatchlen specify minimum match length (cDNA matching) 68 | default: 20 69 | -seedlength specify the seed length (cDNA matching) 70 | default: 18 71 | -exdrop specify the Xdrop value for edit distance extension (cDNA 72 | matching) 73 | default: 2 74 | -prminmatchlen specify minimum match length (protein matches) 75 | default: 24 76 | -prseedlength specify seed length (protein matching) 77 | default: 10 78 | -prhdist specify Hamming distance (protein matching) 79 | default: 4 80 | -gcmaxgapwidth set the maximum gap width for global chains 81 | defines approximately the maximum intron length 82 | set to 0 to allow for unlimited length 83 | in order to avoid false-positive exons (lonely exons) at the 84 | sequence ends, it is very important to set this parameter 85 | appropriately! 86 | default: 1000000 87 | -gcmincoverage set the minimum coverage of global chains regarding to the 88 | reference sequence 89 | default: 50 90 | -paralogs compute paralogous genes (different chaining procedure) 91 | default: no 92 | -introncutout enable the intron cutout technique 93 | default: no 94 | -fastdp use jump table to increase speed of DP calculation 95 | default: no 96 | -autointroncutout set the automatic intron cutout matrix size in megabytes and 97 | enable the automatic intron cutout technique 98 | default: 0 99 | -intermediate stop after calculation of spliced alignments and output 100 | results in reusable XML format. Do not process this output 101 | yourself, use the ``normal'' XML output instead! 102 | default: no 103 | -first set the maximum number of spliced alignments per genomic DNA 104 | input. Set to 0 for unlimited number. 105 | default: 0 106 | -help display help for basic options and exit 107 | -help+ display help for all options and exit 108 | -version display version information and exit 109 | 110 | For detailed information, please refer to the manual of GenomeThreader. 111 | Report bugs to . 112 | -------------------------------------------------------------------------------- /test/test_data/htseq_count.txt: -------------------------------------------------------------------------------- 1 | usage: htseq-count [options] alignment_file gff_file 2 | 3 | This script takes one or more alignment files in SAM/BAM format and a feature 4 | file in GFF format and calculates for each feature the number of reads mapping 5 | to it. See http://htseq.readthedocs.io/en/master/count.html for details. 6 | 7 | positional arguments: 8 | samfilenames Path to the SAM/BAM files containing the mapped reads. 9 | If '-' is selected, read from standard input 10 | featuresfilename Path to the GTF file containing the features 11 | 12 | optional arguments: 13 | -h, --help show this help message and exit 14 | -f {sam,bam,auto}, --format {sam,bam,auto} 15 | Type of data. DEPRECATED: file format 16 | is detected automatically. This option is ignored. 17 | -r {pos,name}, --order {pos,name} 18 | 'pos' or 'name'. Sorting order of 19 | (default: name). Paired-end sequencing data must be 20 | sorted either by position or by read name, and the 21 | sorting order must be specified. Ignored for single- 22 | end data. 23 | --max-reads-in-buffer MAX_BUFFER_SIZE 24 | When is paired end sorted by 25 | position, allow only so many reads to stay in memory 26 | until the mates are found (raising this number will 27 | use more memory). Has no effect for single end or 28 | paired end sorted by name 29 | -s {yes,no,reverse}, --stranded {yes,no,reverse} 30 | Whether the data is from a strand-specific assay. 31 | Specify 'yes', 'no', or 'reverse' (default: yes). 32 | 'reverse' means 'yes' with reversed strand 33 | interpretation 34 | -a MINAQUAL, --minaqual MINAQUAL 35 | Skip all reads with MAPQ alignment quality lower than 36 | the given minimum value (default: 10). MAPQ is the 5th 37 | column of a SAM/BAM file and its usage depends on the 38 | software used to map the reads. 39 | -t FEATURETYPE, --type FEATURETYPE 40 | Feature type (3rd column in GTF file) to be used, all 41 | features of other type are ignored (default, suitable 42 | for Ensembl GTF files: exon) 43 | -i IDATTR, --idattr IDATTR 44 | GTF attribute to be used as feature ID (default, 45 | suitable for Ensembl GTF files: gene_id). All feature 46 | of the right type (see -t option) within the same GTF 47 | attribute will be added together. The typical way of 48 | using this option is to count all exonic reads from 49 | each gene and add the exons but other uses are 50 | possible as well. 51 | --additional-attr ADDITIONAL_ATTR 52 | Additional feature attributes (default: none, suitable 53 | for Ensembl GTF files: gene_name). Use multiple times 54 | for more than one additional attribute. These 55 | attributes are only used as annotations in the output, 56 | while the determination of how the counts are added 57 | together is done based on option -i. 58 | -m {union,intersection-strict,intersection-nonempty}, --mode {union,intersection-strict,intersection-nonempty} 59 | Mode to handle reads overlapping more than one feature 60 | (choices: union, intersection-strict, intersection- 61 | nonempty; default: union) 62 | --nonunique {none,all,fraction,random} 63 | Whether and how to score reads that are not uniquely 64 | aligned or ambiguously assigned to features (choices: 65 | none, all, fraction, random; default: none) 66 | --secondary-alignments {score,ignore} 67 | Whether to score secondary alignments (0x100 flag) 68 | --supplementary-alignments {score,ignore} 69 | Whether to score supplementary alignments (0x800 flag) 70 | -o SAMOUTS, --samout SAMOUTS 71 | Write out all SAM alignment records into SAM/BAM files 72 | (one per input file needed), annotating each line with 73 | its feature assignment (as an optional field with tag 74 | 'XF'). See the -p option to use BAM instead of SAM. 75 | -p {SAM,BAM,sam,bam}, --samout-format {SAM,BAM,sam,bam} 76 | Format to use with the --samout option. 77 | -d OUTPUT_DELIMITER, --delimiter OUTPUT_DELIMITER 78 | Column delimiter in output (default: TAB). 79 | -c OUTPUT_FILENAME, --counts_output OUTPUT_FILENAME 80 | Filename to output the counts to instead of stdout. 81 | --append-output Append counts output. This option is useful if you 82 | have already creates a TSV/CSV/similar file with a 83 | header for your samples (with additional columns for 84 | the feature name and any additionl attributes) and 85 | want to fill in the rest of the file. 86 | -n NPROCESSES, --nprocesses NPROCESSES 87 | Number of parallel CPU processes to use (default: 1). 88 | --feature-query FEATURE_QUERY 89 | Restrict to features descibed in this expression. 90 | Currently supports a single kind of expression: 91 | attribute == "one attr" to restrict the GFF to a 92 | single gene or transcript, e.g. --feature-query 93 | 'gene_name == "ACTB"' - notice the single quotes 94 | around the argument of this option and the double 95 | quotes around the gene name. Broader queries might 96 | become available in the future. 97 | -q, --quiet Suppress progress report 98 | --version Show software version and exit 99 | 100 | Written by Simon Anders (sanders@fs.tum.de), European Molecular Biology 101 | Laboratory (EMBL) and Fabio Zanini (fabio.zanini@unsw.edu.au), UNSW Sydney. 102 | (c) 2010-2020. Released under the terms of the GNU General Public License v3. 103 | Part of the 'HTSeq' framework, version 0.12.4. 104 | -------------------------------------------------------------------------------- /test/test_data/mauve.txt: -------------------------------------------------------------------------------- 1 | Usage: 2 | mauveAligner [options] ... 3 | Options: 4 | --output= Output file name. Prints to screen by default 5 | --mums Find MUMs only, do not attempt to determine locally collinear blocks (LCBs) 6 | --no-recursion Don't perform recursive anchor identification (implies --no-gapped-alignment) 7 | --no-lcb-extension If determining LCBs, don't attempt to extend the LCBs 8 | --seed-size= Initial seed match size, default is log_2( average seq. length ) 9 | --max-extension-iterations= Limit LCB extensions to this number of attempts, default is 4 10 | --eliminate-inclusions Eliminate linked inclusions in subset matches. 11 | --weight= Minimum LCB weight in base pairs per sequence 12 | --match-input= Use specified match file instead of searching for matches 13 | --lcb-match-input Indicates that the match input file contains matches that have been clustered into LCBs 14 | --lcb-input= Use specified lcb file instead of constructing LCBs (skips LCB generation) 15 | --scratch-path= For large genomes, use a directory for storage of temporary data. Should be given two or more times to with different paths. 16 | --id-matrix= Generate LCB stats and write them to the specified file 17 | --island-size= Find islands larger than the given number 18 | --island-output= Output islands the given file (requires --island-size) 19 | --backbone-size= Find stretches of backbone longer than the given number of b.p. 20 | --max-backbone-gap= Allow backbone to be interrupted by gaps up to this length in b.p. 21 | --backbone-output= Output islands the given file (requires --island-size) 22 | --coverage-output= Output a coverage list to the specified file (- for stdout) 23 | --repeats Generates a repeat map. Only one sequence can be specified 24 | --output-guide-tree= Write out a guide tree to the designated file 25 | --collinear Assume that input sequences are collinear--they have no rearrangements 26 | 27 | Gapped alignment controls: 28 | --no-gapped-alignment Don't perform a gapped alignment 29 | --max-gapped-aligner-length= Maximum number of base pairs to attempt aligning with the gapped aligner 30 | --min-recursive-gap-length= Minimum size of gaps that Mauve will perform recursive MUM anchoring on (Default is 200) 31 | 32 | Signed permutation matrix options: 33 | --permutation-matrix-output= Write out the LCBs as a signed permutation matrix to the given file 34 | --permutation-matrix-min-weight= A permutation matrix will be written for every set of LCBs with weight between this value and the value of --weight 35 | 36 | Alignment output options: 37 | --alignment-output-dir= Outputs a set of alignment files (one per LCB) to a given directory 38 | --alignment-output-format= Selects the output format for --alignment-output-dir 39 | --output-alignment= Write out an XMFA format alignment to the designated file 40 | 41 | Supported alignment output formats are: phylip, clustal, msf, nexus, mega, codon 42 | 43 | -------------------------------------------------------------------------------- /test/test_data/podchecker.txt: -------------------------------------------------------------------------------- 1 | Usage: 2 | podchecker [-help] [-man] [-(no)warnings] [file ...] 3 | 4 | Options and Arguments: 5 | -help Print a brief help message and exit. 6 | 7 | -man Print the manual page and exit. 8 | 9 | -warnings -nowarnings 10 | Turn on/off printing of warnings. Repeating -warnings increases 11 | the warning level, i.e. more warnings are printed. Currently 12 | increasing to level two causes flagging of unescaped "<,>" 13 | characters. 14 | 15 | file The pathname of a POD file to syntax-check (defaults to standard 16 | input). 17 | -------------------------------------------------------------------------------- /test/test_data/samtools.txt: -------------------------------------------------------------------------------- 1 | 2 | Program: samtools (Tools for alignments in the SAM format) 3 | Version: 1.9 (using htslib 1.9) 4 | 5 | Usage: samtools [options] 6 | 7 | Commands: 8 | -- Indexing 9 | dict create a sequence dictionary file 10 | faidx index/extract FASTA 11 | fqidx index/extract FASTQ 12 | index index alignment 13 | 14 | -- Editing 15 | calmd recalculate MD/NM tags and '=' bases 16 | fixmate fix mate information 17 | reheader replace BAM header 18 | targetcut cut fosmid regions (for fosmid pool only) 19 | addreplacerg adds or replaces RG tags 20 | markdup mark duplicates 21 | 22 | -- File operations 23 | collate shuffle and group alignments by name 24 | cat concatenate BAMs 25 | merge merge sorted alignments 26 | mpileup multi-way pileup 27 | sort sort alignment file 28 | split splits a file by read group 29 | quickcheck quickly check if SAM/BAM/CRAM file appears intact 30 | fastq converts a BAM to a FASTQ 31 | fasta converts a BAM to a FASTA 32 | 33 | -- Statistics 34 | bedcov read depth per BED region 35 | depth compute the depth 36 | flagstat simple stats 37 | idxstats BAM index stats 38 | phase phase heterozygotes 39 | stats generate stats (former bamcheck) 40 | 41 | -- Viewing 42 | flags explain BAM flags 43 | tview text alignment viewer 44 | view SAM<->BAM<->CRAM conversion 45 | depad convert padded BAM to unpadded BAM 46 | 47 | -------------------------------------------------------------------------------- /test/test_data/samtools_bedcov.txt: -------------------------------------------------------------------------------- 1 | Usage: samtools bedcov [options] [...] 2 | 3 | Options: 4 | -Q mapping quality threshold [0] 5 | -j do not include deletions (D) and ref skips (N) in bedcov computation 6 | --input-fmt-option OPT[=VAL] 7 | Specify a single input file format option in the form 8 | of OPTION or OPTION=VALUE 9 | --reference FILE 10 | Reference sequence FASTA FILE [null] 11 | -------------------------------------------------------------------------------- /test/test_data/samtools_dict.txt: -------------------------------------------------------------------------------- 1 | About: Create a sequence dictionary file from a fasta file 2 | Usage: samtools dict [options] 3 | 4 | Options: -a, --assembly STR assembly 5 | -H, --no-header do not print @HD line 6 | -o, --output STR file to write out dict file [stdout] 7 | -s, --species STR species 8 | -u, --uri STR URI [file:///abs/path/to/file.fa] 9 | 10 | -------------------------------------------------------------------------------- /test/test_data/samtools_pl.txt: -------------------------------------------------------------------------------- 1 | Program: samtools.pl (helper script for SAMtools) 2 | Version: 0.3.3 3 | Contact: Heng Li 4 | 5 | Usage: samtools.pl [] 6 | 7 | Command: varFilter filtering SNPs and short indels 8 | pileup2fq generate fastq from `pileup -c' 9 | showALEN print alignment length (ALEN) following CIGAR 10 | 11 | -------------------------------------------------------------------------------- /test/test_data/samtools_quickcheck.txt: -------------------------------------------------------------------------------- 1 | Usage: samtools quickcheck [options] [...] 2 | Options: 3 | -v verbose output (repeat for more verbosity) 4 | -q suppress warning messages 5 | 6 | Notes: 7 | 8 | 1. By default quickcheck will emit a warning message if and only if a file 9 | fails the checks, in which case the exit status is non-zero. Under normal 10 | behaviour with valid data it will be silent and has a zero exit status. 11 | The warning messages are purely for manual inspection and should not be 12 | parsed by scripts. 13 | 14 | 2. In order to use this command programmatically, you should check its exit 15 | status. One way to use quickcheck might be as a check that all BAM files in 16 | a directory are okay: 17 | 18 | samtools quickcheck *.bam && echo 'all ok' \ 19 | || echo 'fail!' 20 | 21 | The first level of verbosity lists only files that fail to stdout. 22 | To obtain a parsable list of files that have failed, use this option: 23 | 24 | samtools quickcheck -qv *.bam > bad_bams.fofn \ 25 | && echo 'all ok' \ 26 | || echo 'some files failed check, see bad_bams.fofn' 27 | -------------------------------------------------------------------------------- /test/test_explore_e2e.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import tempfile 4 | from unittest.mock import Mock, patch 5 | 6 | import pytest 7 | 8 | from aclimatise import explore_command 9 | from aclimatise.model import Command, Positional 10 | 11 | from .util import ( 12 | HelpText, 13 | all_ids, 14 | all_tests, 15 | convert_validate, 16 | ensure_conda, 17 | skip_not_installed, 18 | ) 19 | 20 | 21 | @pytest.mark.parametrize("test", all_tests, ids=all_ids) 22 | def test_explore(test: HelpText): 23 | """ 24 | A comprehensive end-to-end test that tests the parser and converters, after exploring a given command 25 | """ 26 | if not shutil.which(test.cmd[0]): 27 | pytest.skip("{} is not installed".format(test.cmd[0])) 28 | 29 | try: 30 | ensure_conda() 31 | except: 32 | pytest.skip("Not in a conda environment") 33 | 34 | # For speed's sake, only explore to depth 2 35 | command = explore_command(test.cmd, max_depth=1) 36 | 37 | # Check we parsed correctly 38 | test.run_assertions(command, explore=True) 39 | 40 | 41 | @skip_not_installed("dinosaur") 42 | @pytest.mark.timeout(360) 43 | def test_explore_dinosaur(): 44 | """ 45 | Python has an issue with killing process trees, whereby the subprocess runs another subprocess. 46 | This tests that dinosaur 47 | :return: 48 | """ 49 | command = explore_command(["dinosaur"], max_depth=1) 50 | 51 | 52 | @pytest.mark.skipif(not shutil.which("bwa"), reason="bwa is not installed") 53 | def test_explore_bwa(): 54 | """ 55 | This tests specifically that exploring bwa yields a proper bwa mem 56 | """ 57 | command = explore_command(["bwa"], max_depth=1) 58 | 59 | # Check that we parsed bwa mem correctly 60 | mem = [cmd for cmd in command.subcommands if cmd.command[1] == "mem"][0] 61 | assert len(mem.positional) == 3 62 | assert len(mem.subcommands) == 0 63 | assert len(mem.named) >= 30 64 | 65 | 66 | def test_repeat_positionals(): 67 | """ 68 | Test that, if we have multiple duplicate positionals, only the first is tested 69 | """ 70 | parent = Command( 71 | command=[], 72 | positional=[ 73 | Positional(name="a", description="", position=i) for i in range(10) 74 | ], 75 | ) 76 | child = Command(command=[]) 77 | 78 | count = 0 79 | 80 | def mock_convert(*args, **kwargs): 81 | nonlocal count 82 | if count == 0: 83 | count += 1 84 | return parent 85 | return child 86 | 87 | # with patch("aclimatise.execution.help.CliHelpExecutor.explore", new=lambda *args, **kwargs: child): 88 | with patch( 89 | "aclimatise.execution.help.CliHelpExecutor.convert", 90 | new=Mock(side_effect=mock_convert), 91 | ) as mocked: 92 | explore_command([]) 93 | 94 | # We should only call convert twice, once for the parent and once for the child, since there's only one unique positional 95 | assert mocked.call_count == 2 96 | -------------------------------------------------------------------------------- /test/test_model.py: -------------------------------------------------------------------------------- 1 | from aclimatise.model import Command 2 | 3 | 4 | def test_reanalyse(samtools_cmd: Command): 5 | """ 6 | Test the command.reanalyse() method 7 | """ 8 | reanalysed = samtools_cmd.reanalyse() 9 | assert reanalysed.help_text == samtools_cmd.help_text 10 | assert len(reanalysed.subcommands) == len(samtools_cmd.subcommands) 11 | 12 | re_sort = reanalysed["sort"] 13 | assert len(re_sort.positional) > 0 14 | assert len(re_sort.named) > 0 15 | -------------------------------------------------------------------------------- /test/test_parse_e2e.py: -------------------------------------------------------------------------------- 1 | import random 2 | import string 3 | 4 | import pytest 5 | from pkg_resources import resource_filename 6 | 7 | from aclimatise.integration import parse_help 8 | 9 | from .util import ( 10 | HelpText, 11 | all_ids, 12 | all_tests, 13 | all_tests_lookup, 14 | convert_validate, 15 | validate_cwl, 16 | validate_wdl, 17 | ) 18 | 19 | 20 | @pytest.mark.parametrize("test", all_tests, ids=all_ids) 21 | def test_all(test: HelpText): 22 | """ 23 | A comprehensive end-to-end test that tests the parser and converters, using the test data files 24 | """ 25 | with open(resource_filename("test", test.path)) as fp: 26 | help_text = fp.read() 27 | 28 | cmd = parse_help(test.cmd, help_text) 29 | 30 | # Check that the help text is included in the command 31 | assert cmd.help_text == help_text 32 | 33 | test.run_assertions(cmd, explore=False) 34 | 35 | 36 | @pytest.mark.timeout(20) 37 | def test_long_text(): 38 | """ 39 | This tests the case where the parse function is handed an inordinate amount of text. In this case, we shouldn't 40 | bother parsing, and just return an empty command 41 | """ 42 | text = "\n".join( 43 | [ 44 | "".join( 45 | random.choices( 46 | string.ascii_letters + " ", 47 | weights=[1] * len(string.ascii_letters) + [5], 48 | k=100, 49 | ) 50 | ) 51 | for i in range(2000) 52 | ] 53 | ) 54 | command = parse_help(["some", "command"], text=text) 55 | assert len(command.positional) == 0 56 | assert len(command.named) == 0 57 | -------------------------------------------------------------------------------- /test/test_type_inference.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from aclimatise.cli_types import ( 4 | CliBoolean, 5 | CliDir, 6 | CliFile, 7 | CliFloat, 8 | CliInteger, 9 | CliString, 10 | CliType, 11 | ) 12 | from aclimatise.model import CliArgument, EmptyFlagArg, Flag, SimpleFlagArg, infer_type 13 | 14 | 15 | @pytest.mark.parametrize( 16 | "string,typ", 17 | [ 18 | ("", None), 19 | ("int", CliInteger()), 20 | ("size", CliInteger()), 21 | ("length", CliInteger()), 22 | ("max", CliInteger()), 23 | ("min", CliInteger()), 24 | ("str", CliString()), 25 | ("float", CliFloat()), 26 | ("decimal", CliFloat()), 27 | ("bool", CliBoolean()), 28 | ("file", CliFile()), 29 | ("path", CliFile()), 30 | ("input file", CliFile(output=False)), 31 | ("output file", CliFile(output=True)), 32 | ("folder", CliDir()), 33 | ("directory", CliDir()), 34 | ("output directory", CliDir(output=True)), 35 | ("blah 23 blub", CliInteger()), 36 | ("nonsense 23.42", CliFloat()), 37 | (".42 gibberish", CliFloat()), 38 | ("1E-5", CliFloat()), 39 | ("BOOL Output strand bias files, 'true' or 'false'", CliBoolean()), 40 | ("file to write out dict file [stdout]", CliFile(output=True)), 41 | ("Filename to output the counts to instead of stdout.", CliFile(output=True)), 42 | pytest.param( 43 | "Write out all SAM alignment records into SAM/BAM files (one per input file needed), annotating each line with its feature assignment (as an optional field with tag 'XF'). See the -p option to use BAM instead of SAM.", 44 | CliFile(output=True), 45 | marks=pytest.mark.xfail( 46 | reason="This description doesn't make it clear that it wants an output file. I'm not sure how this could ever be parsed" 47 | ), 48 | ), 49 | ], 50 | ) 51 | def test_type_inference(string, typ): 52 | inferred_type = infer_type(string) 53 | assert inferred_type == typ 54 | 55 | 56 | @pytest.mark.parametrize( 57 | "flag,typ", 58 | [ 59 | [ 60 | Flag( 61 | description="Filename to output the counts to instead of stdout.", 62 | synonyms=["-c", "--counts_output"], 63 | args=SimpleFlagArg("OUTPUT_FILENAME"), 64 | ), 65 | CliFile(output=True), 66 | ], 67 | pytest.param( 68 | Flag( 69 | description="redirect output to specified file\ndefault: undefined", 70 | synonyms=["-o"], 71 | args=EmptyFlagArg(), 72 | ), 73 | CliFile(output=True), 74 | marks=pytest.mark.xfail( 75 | reason="Because the help doesn't indicate an argument, we can't know that this is an output file" 76 | ), 77 | ), 78 | ], 79 | ) 80 | def test_flag_type_inference(flag: CliArgument, typ: CliType): 81 | inferred_type = flag.get_type() 82 | assert inferred_type == typ 83 | -------------------------------------------------------------------------------- /test/test_yaml_dump.py: -------------------------------------------------------------------------------- 1 | from io import StringIO 2 | 3 | from aclimatise.integration import parse_help 4 | from aclimatise.yaml import yaml 5 | 6 | 7 | def test_round_trip(bwamem_help): 8 | command = parse_help(["bwa", "mem"], bwamem_help) 9 | 10 | # Dump 11 | buffer = StringIO() 12 | yaml.dump(command, buffer) 13 | 14 | # Load 15 | buffer.seek(0) 16 | output = yaml.load(buffer) 17 | 18 | # Assert the round trip worked 19 | assert command == output 20 | -------------------------------------------------------------------------------- /test/usage/test_usage.py: -------------------------------------------------------------------------------- 1 | from itertools import chain 2 | 3 | import pytest 4 | 5 | from aclimatise.flag_parser.elements import arg_expression, flag_with_arg, list_type_arg 6 | from aclimatise.model import Flag, RepeatFlagArg, SimpleFlagArg 7 | from aclimatise.usage_parser.elements import ( # short_flag_list, 8 | stack, 9 | usage, 10 | usage_element, 11 | ) 12 | from aclimatise.usage_parser.model import UsageElement 13 | 14 | 15 | def test_bwa(): 16 | txt = "Usage: bwa mem [options] [in2.fq]" 17 | els = usage.parseString(txt) 18 | print(els) 19 | 20 | 21 | @pytest.mark.skip( 22 | "It's impossible to distinguish between a grouped list of short flags and one long flag with a single dash" 23 | ) 24 | def test_samtools_merge_short_flags(): 25 | text = "-nurlf" 26 | els = short_flag_list.parseString(text) 27 | assert len(els) == 5 28 | assert isinstance(els[0], Flag) 29 | 30 | 31 | @pytest.mark.skip( 32 | "It's impossible to distinguish between a grouped list of short flags and one long flag with a single dash" 33 | ) 34 | def test_samtools_merge_optional_short_flags(): 35 | text = "[-nurlf]" 36 | els = usage_element.parseString(text) 37 | assert len(els) == 5 38 | assert isinstance(els[0], Flag) 39 | assert els[0].optional 40 | 41 | 42 | def test_samtools_merge_variable(): 43 | text = "" 44 | els = usage_element.parseString(text) 45 | assert len(els) == 1 46 | assert isinstance(els[0], UsageElement) 47 | assert els[0].variable 48 | 49 | 50 | def test_samtools_merge_flag_arg(): 51 | text = "-h inh.sam" 52 | els = usage_element.parseString(text) 53 | assert len(els) == 1 54 | assert isinstance(els[0], Flag) 55 | assert isinstance(els[0].args, SimpleFlagArg) 56 | 57 | 58 | def test_samtools_merge_optional_flag_arg(): 59 | text = "[-h inh.sam]" 60 | els = usage_element.setDebug().parseString(text) 61 | assert len(els) == 1 62 | assert isinstance(els[0], Flag) 63 | assert els[0].optional 64 | assert isinstance(els[0].args, SimpleFlagArg) 65 | 66 | 67 | def test_samtools_merge_list_args(): 68 | text = "[ ... ]" 69 | el = usage_element.parseString(text) 70 | assert len(el) == 1 71 | assert isinstance(el[0], UsageElement) 72 | assert el[0].repeatable 73 | 74 | 75 | def test_samtools_merge_full(process, usage_parser): 76 | text = process( 77 | """ 78 | Usage: samtools merge [-nurlf] [-h inh.sam] [-b ] [ ... ] 79 | """ 80 | ) 81 | command = usage_parser.parse_usage(cmd=["samtools", "merge"], usage=text) 82 | 83 | assert len(command.positional) == 3 84 | assert command.positional[0].name == "out.bam" 85 | assert command.positional[1].name == "in1.bam" 86 | 87 | assert len(command.named) == 3 88 | assert command.all_synonyms == {"-nurlf", "-h", "-b"} 89 | 90 | 91 | def test_pisces_usage(usage_parser): 92 | text = "USAGE: dotnet Pisces.dll -bam -g " 93 | command = usage_parser.parse_usage(["pisces"], text) 94 | assert len(command.named) == 2 95 | assert len(command.positional) == 0 96 | assert command.all_synonyms == {"-bam", "-g"} 97 | 98 | 99 | def test_trailing_text(process, usage_parser): 100 | """ 101 | Tests that the usage parser will not parse text after the usage section has ended 102 | """ 103 | text = process( 104 | """ 105 | usage: htseq-count [options] alignment_file gff_file 106 | 107 | This script takes one or more alignment files in SAM/BAM format and a feature 108 | file in GFF format and calculates for each feature the number of reads mapping 109 | to it. See http://htseq.readthedocs.io/en/master/count.html for details. 110 | """ 111 | ) 112 | command = usage_parser.parse_usage(["htseq-count"], text) 113 | # We don't count either the command "htseq-count", or "[options]" as an argument, so there are only 2 positionals 114 | assert len(command.positional) == 2 115 | 116 | 117 | def test_bwt2sa(usage_parser): 118 | text = """ 119 | Usage: bwa bwt2sa [-i 32] 120 | """ 121 | 122 | command = usage_parser.parse_usage(["bwa", "bwt2sa"], text) 123 | 124 | # in and out 125 | assert len(command.positional) == 2 126 | 127 | # -i 128 | assert len(command.named) == 1 129 | 130 | 131 | def test_bedtools_multiinter_flag_arg(): 132 | text = " FILE1 FILE2 .. FILEn" 133 | arg = arg_expression.parseString(text)[0] 134 | assert isinstance(arg, RepeatFlagArg) 135 | assert arg.name == "FILEn" 136 | 137 | 138 | def test_bedtools_multiinter_flag(): 139 | text = "-i FILE1 FILE2 .. FILEn" 140 | arg = flag_with_arg.parseString(text)[0] 141 | assert isinstance(arg.argtype, RepeatFlagArg) 142 | assert arg.name == "-i" 143 | 144 | 145 | def test_bedtools_multiinter(usage_parser): 146 | text = """ 147 | Summary: Identifies common intervals among multiple 148 | BED/GFF/VCF files. 149 | 150 | Usage: bedtools multiinter [OPTIONS] -i FILE1 FILE2 .. FILEn 151 | Requires that each interval file is sorted by chrom/start. 152 | 153 | Options: 154 | -cluster Invoke Ryan Layers's clustering algorithm. 155 | """ 156 | 157 | command = usage_parser.parse_usage(["bedtools", "multiinter"], text) 158 | 159 | assert len(command.positional) == 0 160 | assert len(command.named) == 1 161 | assert command.named[0].longest_synonym == "-i" 162 | assert isinstance(command.named[0].args, RepeatFlagArg) 163 | 164 | 165 | def test_samtools_dict(usage_parser): 166 | text = """ 167 | Usage: samtools dict [options] 168 | """ 169 | command = usage_parser.parse_usage(["samtools", "dict"], text, debug=True) 170 | assert len(command.positional) == 1 171 | 172 | 173 | def test_mid_line_usage(usage_parser): 174 | text = """ 175 | Can't open --usage: No such file or directory at /usr/bin/samtools.pl line 50. 176 | """ 177 | command = usage_parser.parse_usage(["samtools.pl", "showALEN"], text, debug=True) 178 | assert command.empty 179 | 180 | 181 | def test_usage_description_block(usage_parser): 182 | text = """ 183 | Usage: 184 | shell [options] -e string 185 | execute string in V8 186 | shell [options] file1 file2 ... filek 187 | run JavaScript scripts in file1, file2, ..., filek 188 | shell [options] 189 | shell [options] --shell [file1 file2 ... filek] 190 | run an interactive JavaScript shell 191 | d8 [options] file1 file2 ... filek 192 | d8 [options] 193 | d8 [options] --shell [file1 file2 ... filek] 194 | run the new debugging shell 195 | """ 196 | command = usage_parser.parse_usage(["typeHLA.js"], text, debug=True) 197 | 198 | positional_names = {pos.name for pos in command.positional} 199 | flag_synonyms = set(chain.from_iterable([flag.synonyms for flag in command.named])) 200 | 201 | assert "shell" in positional_names 202 | assert "filek" in positional_names 203 | assert "d8" in positional_names 204 | 205 | assert "--shell" in flag_synonyms 206 | assert "-e" in flag_synonyms 207 | --------------------------------------------------------------------------------