├── .github ├── CODEOWNERS ├── logos │ └── fulcrumgenomics.svg └── workflows │ └── pythonpackage.yml ├── .gitignore ├── LICENSE ├── README.md ├── ci ├── flake8.cfg ├── mypy.ini └── precommit.sh ├── conda-requirements-minimal.txt ├── conda-requirements-test.txt ├── pip-requirements.txt ├── setup.py └── src ├── python └── pyclient │ ├── __init__.py │ ├── core │ └── logging.py │ ├── pipeline │ ├── __init__.py │ └── snakemake_utils.py │ ├── tests │ ├── __init__.py │ ├── test_hello_world.py │ └── util.py │ └── tools │ ├── __init__.py │ ├── __main__.py │ └── hello_world.py ├── scripts ├── common.sh └── run_snakemake.sh └── snakemake └── hello_world.smk /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @clintval @tfenne @nh13 2 | -------------------------------------------------------------------------------- /.github/logos/fulcrumgenomics.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.github/workflows/pythonpackage.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: [push] 4 | env: 5 | PYTHON_VERSION: 3.9 6 | 7 | jobs: 8 | testing: 9 | runs-on: ubuntu-24.04 10 | steps: 11 | - name: Checkout 12 | uses: actions/checkout@v2 13 | - name: Set up Python ${{env.PYTHON_VERSION}} 14 | uses: actions/setup-python@v1 15 | with: 16 | python-version: ${{env.PYTHON_VERSION}} 17 | - name: Set up miniconda 18 | uses: conda-incubator/setup-miniconda@v3 19 | with: 20 | miniforge-variant: Miniforge3 21 | miniforge-version: latest 22 | channels: conda-forge 23 | channel-priority: true 24 | auto-update-conda: true 25 | auto-activate-base: true 26 | python-version: ${{env.PYTHON_VERSION}} 27 | - name: Set up the conda environment 28 | shell: bash -l {0} 29 | run: | 30 | mamba create --override-channels -c bioconda -c conda-forge -n pyclient --file conda-requirements-minimal.txt --file conda-requirements-test.txt 31 | - name: Install pip-requirements 32 | shell: bash -l {0} 33 | run: | 34 | conda activate pyclient 35 | python -m pip install --upgrade pip 36 | pip install -r pip-requirements.txt 37 | - name: Run pre-commit testing 38 | shell: bash -l {0} 39 | run: | 40 | conda activate pyclient 41 | bash ci/precommit.sh 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # PyCharm 2 | .idea 3 | 4 | # Python compiled & optimized files 5 | *.pyc 6 | *.pyo 7 | 8 | # MyPy Cache directory 9 | .mypy_cache 10 | 11 | # for develop installs 12 | *.egg-info 13 | 14 | # local dirs 15 | envs 16 | environments 17 | .conda 18 | conda-env-builder 19 | 20 | # snakemake 21 | .snakemake 22 | 23 | # VSCode dir 24 | .vscode 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2022 Fulcrum Genomics LLC 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 | 5 | [Visit us at Fulcrum Genomics](https://www.fulcrumgenomics.com) to learn more about how we can power your Bioinformatics with python-snakemake-template and beyond. 6 | 7 |

8 |

9 | 10 | [![build](https://github.com/fulcrumgenomics/python-snakemake-skeleton/actions/workflows/pythonpackage.yml/badge.svg)](https://github.com/fulcrumgenomics/python-snakemake-skeleton/actions/workflows/pythonpackage.yml) 11 | [![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/fulcrumgenomics/fgbio/blob/main/LICENSE) 12 | [![Language](https://img.shields.io/badge/python-3.6.10-brightgreen)](https://www.python.org/downloads/release/python-3610/) 13 | 14 | A skeleton repository for Snakemake pipeline(s) and a python command-line toolkit. 15 | 16 | ## Why this repo? 17 | 18 | This the starting point for [Fulcrum Genomics][fulcrum-genomics-link] projects that contain Snakemake pipelines 19 | and a python toolkit. 20 | 21 | This repo contains the following, in no particular order: 22 | 23 | - a hello world snakefile in `src/snakemake/hello_world.smk` 24 | - this uses the `onerror` directive to better display rule errors, in particular the last file 25 | lines of the rule's log 26 | - a python toolkit (`client-tools`) in `src/python/pyclient` 27 | - uses `defopt` for arg parsing 28 | - has custom logging in `core/logging.py` 29 | - has utility methods to support the above `onerror` snakemake directive in `pipeline/snakemake_utils.py` 30 | - has a unit test to ensure the above snakefile is runnable and generally executes the expected rules in `tests/test_hello_world.py`. 31 | This also includes a utility method to support running and verifying snakemake in `tests/util.py` 32 | - supports multiple sub-commands in `tools/__main__.py` with some nice logging when a tool fails 33 | - a little hello world tool in `tools/hello_world.py` 34 | 35 | ## Modifying this repo for a new client 36 | 37 | This repo is a skeleton for Snakemake pipelines and a Python toolkit. 38 | 39 | - [ ] Modify `setup.py` 40 | - [ ] update `conda-requirements-minimal.txt` with minimal requirements for the `client-tools` toolkit 41 | - [ ] update `conda-requirements-test.txt` with minimal requirements for the `client-tools` unit testing 42 | - [ ] update `pip-requirements.txt` with minimal requirements for the `client-tools` (prefer conda) 43 | - [ ] update `src/python/pyclient` source code (search for terms: `PYCLIENT`, `pyclient`, `client-tools` 44 | 45 | ## Install client-tools 46 | 47 | - [Install conda][conda-link] 48 | 49 | 50 | - Create the `pyclient` conda environment 51 | 52 | 53 | ```console 54 | mamba create -n pyclient \ 55 | --override-channels -y \ 56 | -c bioconda -c conda-forge \ 57 | --file conda-requirements-minimal.txt \ 58 | --file conda-requirements-test.txt 59 | ``` 60 | 61 | - Activate the `pyclient` conda environment 62 | 63 | ```bash 64 | conda activate pyclient 65 | ``` 66 | 67 | - Install all non-conda dependencies via pip 68 | 69 | ```bash 70 | pip install -r pip-requirements.txt 71 | ``` 72 | 73 | - Install `pyclient` (in developer mode) 74 | 75 | ```bash 76 | python setup.py develop 77 | ``` 78 | 79 | - Validate the install via the help message 80 | 81 | ```bash 82 | client-tools -h 83 | ``` 84 | 85 | - Validate the snakemake install 86 | 87 | ```bash 88 | snakemake --snakefile src/snakemake/hello_world.smk -j 1 89 | ``` 90 | 91 | [fulcrum-genomics-link]: https://www.fulcrumgenomics.com 92 | [conda-link]: https://docs.conda.io/projects/conda/en/latest/user-guide/install/ 93 | 94 | -------------------------------------------------------------------------------- /ci/flake8.cfg: -------------------------------------------------------------------------------- 1 | # flake8 config file for pyclient 2 | 3 | [flake8] 4 | max_line_length = 99 5 | show-source = true 6 | ignore = E701 W504 W503 E203 7 | -------------------------------------------------------------------------------- /ci/mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | strict_optional = False 3 | ignore_missing_imports = True 4 | disallow_untyped_decorators = False 5 | follow_imports = "silent" 6 | disallow_untyped_defs = True 7 | -------------------------------------------------------------------------------- /ci/precommit.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ############################################################################### 4 | # Script that should be run pre-commit after making any changes to the pyclient 5 | # package / subdirectory. 6 | # 7 | # Runs: 8 | # Unit tests 9 | # Linting 10 | # Type checking 11 | ############################################################################### 12 | 13 | set -e 14 | 15 | failures="" 16 | 17 | function banner() { 18 | echo 19 | echo "================================================================================" 20 | echo "$*" 21 | echo "================================================================================" 22 | echo 23 | } 24 | 25 | ##################################################################### 26 | # Takes two parameters, a "name" and a "command". 27 | # Runs the command and prints out whether it succeeded or failed, and 28 | # also tracks a list of failed steps in $failures. 29 | ##################################################################### 30 | function run() { 31 | local name=$1 32 | local cmd=$2 33 | 34 | banner "Running $name [$cmd]" 35 | set +e 36 | $cmd 37 | exit_code=$? 38 | set -e 39 | 40 | if [[ $exit_code == 0 ]]; then 41 | echo Passed $name 42 | else 43 | echo Failed $name [$cmd] 44 | if [ -z "$failures" ]; then 45 | failures="$failures $name" 46 | else 47 | failures="$failures, $name" 48 | fi 49 | fi 50 | } 51 | 52 | parent=$(cd "$(dirname $0)" && pwd -P) 53 | root=$(dirname ${parent})/src/python 54 | r_root=$(dirname ${parent} | xargs dirname)/R 55 | 56 | if [[ -z ${CONDA_DEFAULT_ENV} ]]; then 57 | banner "Conda not active. pyclient conda environment must be active." 58 | exit 1 59 | fi 60 | 61 | pushd $root > /dev/null 62 | banner "Executing in conda environment ${CONDA_DEFAULT_ENV} in directory ${root}" 63 | run "Unit Tests" "pytest -vv -r sx pyclient" 64 | run "Style Checking" "black --line-length 99 --check pyclient" 65 | run "Linting" "flake8 --config=$parent/flake8.cfg pyclient" 66 | run "Type Checking" "mypy -p pyclient --config $parent/mypy.ini" 67 | popd > /dev/null 68 | 69 | if [ -z "$failures" ]; then 70 | banner "Precommit Passed" 71 | else 72 | banner "Precommit Failed with failures in: $failures" 73 | exit 1 74 | fi 75 | 76 | -------------------------------------------------------------------------------- /conda-requirements-minimal.txt: -------------------------------------------------------------------------------- 1 | # Use the following channels when building a conda environment: 2 | # --override-channels -c bioconda -c conda-forge 3 | 4 | # Packages required to run Snakemake 5 | python=3.6.10 6 | snakemake-minimal=5.11.1 7 | 8 | # packages used by the pyclient module 9 | defopt=6.0.2 10 | attrs=19.3.0 11 | samwell==0.0.2 12 | typing_extensions=4.1.1 13 | 14 | -------------------------------------------------------------------------------- /conda-requirements-test.txt: -------------------------------------------------------------------------------- 1 | # testing 2 | pytest==5.3.5 3 | flake8==3.7.9 4 | mypy==0.761 5 | black==19.10b0 6 | -------------------------------------------------------------------------------- /pip-requirements.txt: -------------------------------------------------------------------------------- 1 | # All packages get installed in order. 2 | distutils-strtobool==0.1.0 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | import sys 3 | 4 | if sys.version_info < (3, 6): 5 | sys.exit('Sorry, Python < 3.6 is not supported') 6 | 7 | # todo: requirements 8 | setup( 9 | name='pyclient', 10 | version='0.1', 11 | author='Fulcrum Genomics', 12 | author_email='no-reply@fulcrumgenomics.com', 13 | maintainer='Fulcrum Genomics', 14 | maintainer_email='no-reply@fulcrumgenomics.com', 15 | description='Python/Snakemake Skeleton', 16 | url='https://github.com/fulcrumgenomics/python-snakemake-skeleton', 17 | packages=['pyclient'], 18 | package_dir={'': 'src/python'}, 19 | entry_points={ 20 | 'console_scripts': ['client-tools=pyclient.tools.__main__:main'] 21 | }, 22 | include_package_data=True, 23 | zip_safe=False, 24 | classifiers=[ 25 | 'Environment :: Console', 26 | "Programming Language :: Python :: 3", 27 | "Development Status :: 3 - Alpha", 28 | "Intended Audience :: Developers", 29 | ] 30 | ) 31 | -------------------------------------------------------------------------------- /src/python/pyclient/__init__.py: -------------------------------------------------------------------------------- 1 | from pyclient.core import logging 2 | 3 | logging.setup_logging() 4 | -------------------------------------------------------------------------------- /src/python/pyclient/core/logging.py: -------------------------------------------------------------------------------- 1 | """ 2 | Methods for setting up logging for tools. 3 | ----------------------------------------- 4 | """ 5 | 6 | import logging 7 | import socket 8 | from threading import RLock 9 | 10 | 11 | # Global that is set to True once logging initialization is run to prevent running > once. 12 | __PYCLIENT_LOGGING_SETUP: bool = False 13 | 14 | # A lock used to make sure initialization is performed only once 15 | __LOCK = RLock() 16 | 17 | 18 | def setup_logging(level: str = "INFO") -> None: 19 | """Globally configure logging for all modules under pyclient. 20 | 21 | Configures logging to run at a specific level and output messages to stderr with 22 | useful information preceding the actual log message. 23 | """ 24 | global __PYCLIENT_LOGGING_SETUP 25 | 26 | with __LOCK: 27 | if not __PYCLIENT_LOGGING_SETUP: 28 | format = ( 29 | f"%(asctime)s {socket.gethostname()} %(name)s:%(funcName)s:%(lineno)s " 30 | + "[%(levelname)s]: %(message)s" 31 | ) 32 | handler = logging.StreamHandler() 33 | handler.setLevel(level) 34 | handler.setFormatter(logging.Formatter(format)) 35 | 36 | logger = logging.getLogger("pyclient") 37 | logger.setLevel(level) 38 | logger.addHandler(handler) 39 | else: 40 | logging.getLogger(__name__).warn("Logging already initialized.") 41 | 42 | __PYCLIENT_LOGGING_SETUP = True 43 | -------------------------------------------------------------------------------- /src/python/pyclient/pipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fulcrumgenomics/python-snakemake-template/68424fbd0dc135e2b2e3fedeceed2b13104d2a16/src/python/pyclient/pipeline/__init__.py -------------------------------------------------------------------------------- /src/python/pyclient/pipeline/snakemake_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions for working with snakemake. 3 | --------------------------------------------- 4 | 5 | This module contains utility functions for interacting with and working with snakemake. 6 | Currently this includes functions to parse key information out of the snakemake log 7 | file and summarize any failures. 8 | """ 9 | 10 | import enum 11 | import logging 12 | from itertools import dropwhile 13 | from pathlib import Path 14 | from typing import Any 15 | from typing import ClassVar 16 | from typing import List 17 | from typing import Optional 18 | 19 | import attr 20 | 21 | # The default number of lines to return from the log files for each failed job 22 | __LINES_PER_LOGFILE: int = 50 23 | 24 | 25 | def last_lines(path: Path, n: Optional[int] = __LINES_PER_LOGFILE) -> List[str]: 26 | """Returns the last N lines from a file as a List. 27 | 28 | Args: 29 | path: the path to the file (must exist) 30 | n: the number of line to return, None will return all lines 31 | Return: 32 | the last n lines of the file as a list, or the whole file < n lines. 33 | """ 34 | try: 35 | lines = read_lines(path) 36 | if n is not None and len(lines) > n: 37 | lines = lines[-n : len(lines)] 38 | return lines 39 | except Exception: 40 | return [f">>> Could not open log file for reading: {path}. <<<"] 41 | 42 | 43 | def read_lines(path: Path) -> List[str]: 44 | """ 45 | Reads a file and returns it as a list of lines with newlines stripped. 46 | 47 | Args: 48 | path: the path of the file to read 49 | Return: 50 | the list of lines from the file 51 | """ 52 | with path.open("r") as fh: 53 | lines: List[str] = fh.readlines() 54 | return [l.rstrip() for l in lines] 55 | 56 | 57 | def write_lines(path: Path, lines: List[str]) -> None: 58 | """ 59 | Writes a list of lines to a file with newlines between the lines. 60 | 61 | Args: 62 | path: the path to write to 63 | lines: the list of lines to write 64 | """ 65 | with path.open("w") as out: 66 | for line in lines: 67 | out.write(line) 68 | out.write("\n") 69 | 70 | 71 | @attr.s 72 | class RuleLog: 73 | """Stores the path and name for the log file for a rule. 74 | 75 | Attributes: 76 | path: the path to the log file for the rule 77 | name: the name of the rule 78 | """ 79 | 80 | path: Path = attr.ib() 81 | name: str = attr.ib() 82 | 83 | RULE_ERROR_PREFIX: ClassVar[str] = "Error in rule " 84 | LOG_PREFIX: ClassVar[str] = " log: " 85 | LOG_SUFFIX: ClassVar[str] = " (check log file(s) for error message)" 86 | 87 | @classmethod 88 | def get_logs(cls, snakemake_log: Path) -> List["RuleLog"]: 89 | """Gets the logs for the rules from a Snakemake log file.""" 90 | with snakemake_log.open("r") as fh: 91 | lines: List[str] = list(fh.readlines()) 92 | 93 | logs: List[RuleLog] = [] 94 | while lines: 95 | lines = list(dropwhile(lambda l: not l.startswith(cls.RULE_ERROR_PREFIX), iter(lines))) 96 | if lines: 97 | rule_name: str = lines[0].rstrip()[len(cls.RULE_ERROR_PREFIX) : -1] 98 | lines = list(dropwhile(lambda l: not l.startswith(cls.LOG_PREFIX), iter(lines))) 99 | dir: Path = Path(".").absolute() 100 | log_path = dir / lines[0].rstrip()[len(cls.LOG_PREFIX) : -len(cls.LOG_SUFFIX)] 101 | lines = lines[1:] 102 | logs.append(RuleLog(path=log_path, name=rule_name)) 103 | 104 | return logs 105 | 106 | 107 | def summarize_snakemake_errors( 108 | path: Path, lines_per_log: Optional[int] = __LINES_PER_LOGFILE 109 | ) -> List[str]: 110 | """Summarizes any errors that occurred during a run of a pipeline. Uses the snakemake log 111 | to find all failed rule invocations and their log files. Produces a list of lines containing 112 | summary information per failed rule invocation and the last 50 lines of each log file. 113 | 114 | Notes: 115 | * fails if rule has more than one log file defined 116 | * fails if rule has no log file defined 117 | 118 | Args: 119 | path: the path to the main snakemake log file 120 | lines_per_log: the number of lines to pull from each log file, None to return all lines 121 | Returns: 122 | a list of lines containing summary information on all failed rule invocations 123 | """ 124 | summary = [] 125 | 126 | logs: List[RuleLog] = RuleLog.get_logs(snakemake_log=path) 127 | 128 | for log in logs: 129 | summary.append(f"========== Start of Error Info for {log.name} ==========") 130 | summary.append(f"Failed rule: {log.name}") 131 | summary.append(f"Last {lines_per_log} lines of log file: {log.path}") 132 | for line in last_lines(log.path, lines_per_log): 133 | summary.append(f" {line}") 134 | summary.append(f"=========== End of Error Info for {log.name} ===========") 135 | 136 | return summary 137 | 138 | 139 | def on_error( 140 | snakefile: Path, 141 | config: Optional[Any], 142 | log: Path, 143 | lines_per_log: Optional[int] = __LINES_PER_LOGFILE, 144 | ) -> None: 145 | """Block of code that gets called if the snakemake pipeline exits with an error. 146 | 147 | The `log` variable contains a path to the snakemake log file which can be parsed for 148 | more information. Summarizes information on failed jobs and writes it to the output 149 | and also to an error summary file in the working directory. 150 | 151 | Args: 152 | snakefile: the path to the snakefile 153 | config: the configuration for the pipeline 154 | log: the path to the snakemake log file 155 | lines_per_log: the number of lines to pull from each log file, None to return all lines 156 | """ 157 | try: 158 | # Build the preface 159 | preface: List[str] = [ 160 | "Error in snakemake pipeline.", 161 | f"working_dir = {Path('.').absolute()}", 162 | ] 163 | # print the config attributes 164 | if config is not None: 165 | try: 166 | for attribute in attr.fields(type(config)): 167 | value = getattr(config, attribute.name) 168 | if isinstance(value, enum.Enum): 169 | value = value.value 170 | else: 171 | value = str(value) 172 | preface.append(f"{attribute.name} = {value}") 173 | except Exception: 174 | try: 175 | for key, value in config.items(): 176 | preface.append(f"{key} = {value}") 177 | except Exception: 178 | preface.append(f"config = {config}") 179 | preface.append("Detailed error information follows.") 180 | 181 | summary = preface + summarize_snakemake_errors(log, lines_per_log=lines_per_log) 182 | text = "\n".join(summary) 183 | pipeline_name = snakefile.with_suffix("").name 184 | logging.getLogger(pipeline_name).error(text) 185 | with Path("./error_summary.txt").open("w") as out: 186 | out.write(text) 187 | except Exception as ex: 188 | print("###########################################################################") 189 | print("Exception raised in Snakemake onerror handler.") 190 | print(str(ex)) 191 | print("###########################################################################") 192 | -------------------------------------------------------------------------------- /src/python/pyclient/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fulcrumgenomics/python-snakemake-template/68424fbd0dc135e2b2e3fedeceed2b13104d2a16/src/python/pyclient/tests/__init__.py -------------------------------------------------------------------------------- /src/python/pyclient/tests/test_hello_world.py: -------------------------------------------------------------------------------- 1 | """Tests for the hello-world pipeline""" 2 | 3 | 4 | from typing import Dict 5 | 6 | from py._path.local import LocalPath as TmpDir 7 | from pyclient.tests.util import run_snakemake 8 | 9 | 10 | def test_hello_world(tmpdir: TmpDir) -> None: 11 | """Basic unit test that runs the snakefile in dry-run mode to ensure it 12 | parses correctly. 13 | """ 14 | 15 | rules: Dict[str, int] = { 16 | "all": 1, 17 | "hello_world": 1, 18 | } 19 | 20 | run_snakemake(pipeline="hello-world", workdir=tmpdir, rules=rules) 21 | -------------------------------------------------------------------------------- /src/python/pyclient/tests/util.py: -------------------------------------------------------------------------------- 1 | """Tests for pipelines within :module:`~pyclient` 2 | 3 | The tests briefly test the Snakefiles to ensure they are runnable and generally execute the 4 | expected rules. They are far from comprehensive, as they do not verify the analytical results 5 | of each pipeline, which should be done elsewhere. 6 | """ 7 | 8 | 9 | from collections import defaultdict 10 | from pathlib import Path 11 | from typing import Any 12 | from typing import Callable 13 | from typing import Dict 14 | from typing import List 15 | from typing import Optional 16 | 17 | import snakemake 18 | 19 | 20 | class SnakemakeLogger(object): 21 | """Returns a log handler for snakemake and tracks if the rules that were run""" 22 | 23 | def __init__(self) -> None: 24 | self.rule_count: Dict[str, int] = defaultdict(lambda: 0) 25 | 26 | def log_handler(self) -> Callable[[Dict[str, Any]], None]: 27 | """Returns a log handler for use with snakemake.""" 28 | 29 | def fn(d: Dict[str, Any]) -> None: 30 | if d["level"] != "run_info": 31 | return 32 | # NB: skip the first two and last lines 33 | for counts_line in d["msg"].split("\n")[2:-1]: 34 | counts_line = counts_line.strip() 35 | count, job = counts_line.split("\t") 36 | assert int(count) > 0, counts_line 37 | 38 | self.rule_count[job] += int(count) 39 | 40 | return fn 41 | 42 | 43 | def run_snakemake( 44 | pipeline: str, 45 | workdir: Path, 46 | rules: Dict[str, int], 47 | config: Optional[Dict[str, Any]] = None, 48 | configfiles: Optional[List[Path]] = None, 49 | quiet: bool = True, 50 | ) -> SnakemakeLogger: 51 | """Runs Snakemake. 52 | 53 | Args: 54 | snakefile: the snake file to execute 55 | workdir: the working directory in which to run Snakemake 56 | rules: a mapping of rule name to expect # of times it should run 57 | config: the optional configuration object for Snakemake 58 | configfiles: the optional list of configuration files for Snakemake 59 | quiet: tells snakemake to not output logging, set to true for debugging failing pipelines 60 | """ 61 | filename = pipeline.replace("-", "_") + ".smk" 62 | src_dir: Path = Path(__file__).absolute().parent.parent.parent.parent 63 | snakefile: Path = src_dir / "snakemake" / filename 64 | assert snakefile.is_file(), f"{snakefile} is not a file" 65 | 66 | # run it 67 | logger = SnakemakeLogger() 68 | assert snakemake.snakemake( 69 | snakefile=str(snakefile), 70 | config=config, 71 | configfiles=configfiles, 72 | resources={"mem_gb": 8}, 73 | workdir=str(workdir), 74 | dryrun=True, 75 | quiet=quiet, 76 | log_handler=[logger.log_handler()], 77 | ignore_ambiguity=True, 78 | ) 79 | 80 | # check the "all" rule 81 | assert ( 82 | logger.rule_count["all"] == 1 83 | ), f"All rule was not run once, found: {logger.rule_count['all']}" 84 | 85 | # check that the executed rules were run the correct # of times 86 | for rule, count in logger.rule_count.items(): 87 | assert rule in rules, f"Could not find {rule} in {rules}" 88 | assert count == rules[rule], f"{rule}: {rules[rule]}" 89 | 90 | # check that all the expected rules were run 91 | for rule in rules: 92 | assert rule in logger.rule_count 93 | 94 | return logger 95 | -------------------------------------------------------------------------------- /src/python/pyclient/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fulcrumgenomics/python-snakemake-template/68424fbd0dc135e2b2e3fedeceed2b13104d2a16/src/python/pyclient/tools/__init__.py -------------------------------------------------------------------------------- /src/python/pyclient/tools/__main__.py: -------------------------------------------------------------------------------- 1 | """Main entry point for all pyclient tools.""" 2 | 3 | import logging 4 | import sys 5 | from typing import Callable 6 | from typing import List 7 | 8 | import defopt 9 | 10 | from pyclient.tools.hello_world import hello_world 11 | 12 | TOOLS: List[Callable] = sorted( 13 | [hello_world], key=lambda f: f.__name__, 14 | ) 15 | 16 | 17 | def main(argv: List[str] = sys.argv[1:]) -> None: 18 | logger = logging.getLogger(__name__) 19 | if len(argv) != 0 and all(arg not in argv for arg in ["-h", "--help"]): 20 | logger.info("Running command: client-tools " + " ".join(argv)) 21 | try: 22 | defopt.run(funcs=TOOLS, argv=argv) 23 | logger.info("Completed successfully.") 24 | except Exception as e: 25 | logger.info("Failed on command: " + " ".join(argv)) 26 | raise e 27 | -------------------------------------------------------------------------------- /src/python/pyclient/tools/hello_world.py: -------------------------------------------------------------------------------- 1 | def hello_world(*, message: str = "Hello World!") -> None: 2 | print(message) 3 | -------------------------------------------------------------------------------- /src/scripts/common.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Simplified logging function 4 | function log() { 5 | >&2 printf "[%s] %b\n" "$(date +"%F %T %Z")" "$*" 6 | } 7 | 8 | # Determine how many cores to use by default 9 | function find_core_limit() { 10 | if hash cgget 2>/dev/null; then 11 | cores=$(cgget -n --values-only --variable cpu.shares / | awk '{print int($1 / 1024.0)}') 12 | elif [[ -a /proc/cpuinfo ]]; then 13 | cores=$(grep -c ^processor /proc/cpuinfo) 14 | else 15 | cores=$(sysctl -n hw.ncpu) 16 | fi 17 | if [[ "$cores" -le "0" ]]; then 18 | log "Cores must be > 0: $cores" 19 | exit 1 20 | fi 21 | echo "$cores" 22 | } 23 | 24 | # Attempt to retrieve the amount of memory available for the pipeline. If running in a 25 | # docker container, then try to use `cgroups` to get the memory limit. This fails if `--memory` 26 | # is not set when running the docker container, so fall-back on the system resources otherwise. 27 | # AWS should always be setting `--memory`, so this is only an issue when running manually. 28 | function find_mem_limit_gb() { 29 | declare -a mem_limits 30 | if hash cgget 2>/dev/null; then 31 | # Source: https://stackoverflow.com/questions/42187085/check-mem-limit-within-a-docker-container 32 | # NB: convert from bytes to gigabytes 33 | # NB: Will correctly find the limit when set by `--memory` in a container. All other times returns a very large 34 | mem_limits=("${mem_limits[@]}" "$(cgget -n --values-only --variable memory.limit_in_bytes / | awk '{printf "%20.0f\n", int($1 / 1073741824.0)}')") 35 | fi 36 | if [[ -a /proc/meminfo ]]; then 37 | # NB: ignores `--memory` flag passed to a docker container 38 | # NB: convert from kilobytes to gigabytes 39 | mem_limits=("${mem_limits[@]}" "$(grep ^MemTotal /proc/meminfo | awk '{print int($2 / 1000000.0)}')") 40 | fi 41 | if hash sysctl 2>/dev/null && [[ "$OSTYPE" == "darwin"* ]]; then 42 | # NB: convert from bytes to gigabytes 43 | mem_limits=("${mem_limits[@]}" "$(sysctl -n hw.memsize | awk '{print int($1 / 1000000000.0)}')") 44 | fi 45 | 46 | # Check that we have at least one value 47 | if [[ "${#mem_limits[@]}" -le 0 ]]; then 48 | log "Could not determine available RAM." 49 | exit 1 50 | fi 51 | 52 | # Find min value 53 | mem_gb="${mem_limits[0]}" 54 | for lim in "${mem_limits[@]}"; do 55 | (( lim < mem_gb )) && mem_gb=$lim 56 | done 57 | 58 | if [[ "$mem_gb" -le "0" ]]; then 59 | log "Memory must be > 0: $mem_gb" 60 | exit 1 61 | fi 62 | echo "$mem_gb" 63 | } 64 | -------------------------------------------------------------------------------- /src/scripts/run_snakemake.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | usage() { 4 | local err=${1:-""}; 5 | cat <&2; 17 | echo -e "\n$err" >&2; 18 | exit 1; 19 | } 20 | 21 | dry_run="" 22 | 23 | while getopts "s:o:c:n" flag; do 24 | case "${flag}" in 25 | s) snakefile=${OPTARG};; 26 | o) out_dir=${OPTARG};; 27 | c) config_file=${OPTARG};; 28 | n) dry_run="-n";; 29 | *) usage;; 30 | esac 31 | done 32 | shift $((OPTIND-1)) 33 | 34 | extra_args="" 35 | if [ -z "${snakefile}" ]; then 36 | usage "Missing required parameter -s"; 37 | fi 38 | if [ -z "${out_dir}" ]; then 39 | usage "Missing required parameter -o"; 40 | fi 41 | if [ ! -z "${config_file}" ]; then 42 | extra_args="--configfile $config_file"; 43 | fi 44 | 45 | 46 | source $(dirname $0)/common.sh 47 | cores=$(find_core_limit) 48 | mem_gb=$(find_mem_limit_gb) 49 | log "Number of cores: $cores" 50 | log "Memory limit: $mem_gb GB" 51 | 52 | # Run Snakemake pipeline 53 | set -euo pipefail 54 | snakemake \ 55 | --printshellcmds \ 56 | --reason \ 57 | --nocolor \ 58 | --keep-going \ 59 | --rerun-incomplete \ 60 | --jobs "$cores" \ 61 | --resources "mem_gb=$mem_gb" \ 62 | --snakefile $snakefile \ 63 | --directory $out_dir \ 64 | $dry_run \ 65 | $extra_args; 66 | 67 | 68 | log "All done!" 69 | -------------------------------------------------------------------------------- /src/snakemake/hello_world.smk: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Hello World pipeline 3 | ################################################################################ 4 | 5 | from pathlib import Path 6 | from typing import List 7 | 8 | from pyclient.pipeline import snakemake_utils 9 | 10 | 11 | ################################################################################ 12 | # Utility methods and variables 13 | ################################################################################ 14 | 15 | # TODO 16 | 17 | ################################################################################ 18 | # Terminal files 19 | ################################################################################ 20 | 21 | all_terminal_files: List[Path] = [Path("message.txt")] 22 | 23 | ################################################################################ 24 | # Snakemake rules 25 | ################################################################################ 26 | 27 | onerror: 28 | """Block of code that gets called if the snakemake pipeline exits with an error.""" 29 | snakemake_utils.on_error(snakefile=Path(__file__), config=_config, log=Path(log)) 30 | 31 | 32 | rule all: 33 | input: 34 | all_terminal_files 35 | 36 | rule hello_world: 37 | output: 38 | txt = "message.txt" 39 | log: 40 | "logs/hello_world.log" 41 | benchmark: 42 | "benchmarks/hello_world.txt" 43 | shell: 44 | "(echo Hello World > {output.txt}) &> {log}" 45 | --------------------------------------------------------------------------------