├── .github
├── CODEOWNERS
├── logos
│ └── fulcrumgenomics.svg
└── workflows
│ └── pythonpackage.yml
├── .gitignore
├── LICENSE
├── README.md
├── ci
├── flake8.cfg
├── mypy.ini
└── precommit.sh
├── conda-requirements-minimal.txt
├── conda-requirements-test.txt
├── pip-requirements.txt
├── setup.py
└── src
├── python
└── pyclient
│ ├── __init__.py
│ ├── core
│ └── logging.py
│ ├── pipeline
│ ├── __init__.py
│ └── snakemake_utils.py
│ ├── tests
│ ├── __init__.py
│ ├── test_hello_world.py
│ └── util.py
│ └── tools
│ ├── __init__.py
│ ├── __main__.py
│ └── hello_world.py
├── scripts
├── common.sh
└── run_snakemake.sh
└── snakemake
└── hello_world.smk
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @clintval @tfenne @nh13
2 |
--------------------------------------------------------------------------------
/.github/logos/fulcrumgenomics.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.github/workflows/pythonpackage.yml:
--------------------------------------------------------------------------------
1 | name: build
2 |
3 | on: [push]
4 | env:
5 | PYTHON_VERSION: 3.9
6 |
7 | jobs:
8 | testing:
9 | runs-on: ubuntu-24.04
10 | steps:
11 | - name: Checkout
12 | uses: actions/checkout@v2
13 | - name: Set up Python ${{env.PYTHON_VERSION}}
14 | uses: actions/setup-python@v1
15 | with:
16 | python-version: ${{env.PYTHON_VERSION}}
17 | - name: Set up miniconda
18 | uses: conda-incubator/setup-miniconda@v3
19 | with:
20 | miniforge-variant: Miniforge3
21 | miniforge-version: latest
22 | channels: conda-forge
23 | channel-priority: true
24 | auto-update-conda: true
25 | auto-activate-base: true
26 | python-version: ${{env.PYTHON_VERSION}}
27 | - name: Set up the conda environment
28 | shell: bash -l {0}
29 | run: |
30 | mamba create --override-channels -c bioconda -c conda-forge -n pyclient --file conda-requirements-minimal.txt --file conda-requirements-test.txt
31 | - name: Install pip-requirements
32 | shell: bash -l {0}
33 | run: |
34 | conda activate pyclient
35 | python -m pip install --upgrade pip
36 | pip install -r pip-requirements.txt
37 | - name: Run pre-commit testing
38 | shell: bash -l {0}
39 | run: |
40 | conda activate pyclient
41 | bash ci/precommit.sh
42 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # PyCharm
2 | .idea
3 |
4 | # Python compiled & optimized files
5 | *.pyc
6 | *.pyo
7 |
8 | # MyPy Cache directory
9 | .mypy_cache
10 |
11 | # for develop installs
12 | *.egg-info
13 |
14 | # local dirs
15 | envs
16 | environments
17 | .conda
18 | conda-env-builder
19 |
20 | # snakemake
21 | .snakemake
22 |
23 | # VSCode dir
24 | .vscode
25 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License
2 |
3 | Copyright (c) 2022 Fulcrum Genomics LLC
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | [Visit us at Fulcrum Genomics](https://www.fulcrumgenomics.com) to learn more about how we can power your Bioinformatics with python-snakemake-template and beyond.
6 |
7 |
8 |
9 |
10 | [](https://github.com/fulcrumgenomics/python-snakemake-skeleton/actions/workflows/pythonpackage.yml)
11 | [](https://github.com/fulcrumgenomics/fgbio/blob/main/LICENSE)
12 | [](https://www.python.org/downloads/release/python-3610/)
13 |
14 | A skeleton repository for Snakemake pipeline(s) and a python command-line toolkit.
15 |
16 | ## Why this repo?
17 |
18 | This the starting point for [Fulcrum Genomics][fulcrum-genomics-link] projects that contain Snakemake pipelines
19 | and a python toolkit.
20 |
21 | This repo contains the following, in no particular order:
22 |
23 | - a hello world snakefile in `src/snakemake/hello_world.smk`
24 | - this uses the `onerror` directive to better display rule errors, in particular the last file
25 | lines of the rule's log
26 | - a python toolkit (`client-tools`) in `src/python/pyclient`
27 | - uses `defopt` for arg parsing
28 | - has custom logging in `core/logging.py`
29 | - has utility methods to support the above `onerror` snakemake directive in `pipeline/snakemake_utils.py`
30 | - has a unit test to ensure the above snakefile is runnable and generally executes the expected rules in `tests/test_hello_world.py`.
31 | This also includes a utility method to support running and verifying snakemake in `tests/util.py`
32 | - supports multiple sub-commands in `tools/__main__.py` with some nice logging when a tool fails
33 | - a little hello world tool in `tools/hello_world.py`
34 |
35 | ## Modifying this repo for a new client
36 |
37 | This repo is a skeleton for Snakemake pipelines and a Python toolkit.
38 |
39 | - [ ] Modify `setup.py`
40 | - [ ] update `conda-requirements-minimal.txt` with minimal requirements for the `client-tools` toolkit
41 | - [ ] update `conda-requirements-test.txt` with minimal requirements for the `client-tools` unit testing
42 | - [ ] update `pip-requirements.txt` with minimal requirements for the `client-tools` (prefer conda)
43 | - [ ] update `src/python/pyclient` source code (search for terms: `PYCLIENT`, `pyclient`, `client-tools`
44 |
45 | ## Install client-tools
46 |
47 | - [Install conda][conda-link]
48 |
49 |
50 | - Create the `pyclient` conda environment
51 |
52 |
53 | ```console
54 | mamba create -n pyclient \
55 | --override-channels -y \
56 | -c bioconda -c conda-forge \
57 | --file conda-requirements-minimal.txt \
58 | --file conda-requirements-test.txt
59 | ```
60 |
61 | - Activate the `pyclient` conda environment
62 |
63 | ```bash
64 | conda activate pyclient
65 | ```
66 |
67 | - Install all non-conda dependencies via pip
68 |
69 | ```bash
70 | pip install -r pip-requirements.txt
71 | ```
72 |
73 | - Install `pyclient` (in developer mode)
74 |
75 | ```bash
76 | python setup.py develop
77 | ```
78 |
79 | - Validate the install via the help message
80 |
81 | ```bash
82 | client-tools -h
83 | ```
84 |
85 | - Validate the snakemake install
86 |
87 | ```bash
88 | snakemake --snakefile src/snakemake/hello_world.smk -j 1
89 | ```
90 |
91 | [fulcrum-genomics-link]: https://www.fulcrumgenomics.com
92 | [conda-link]: https://docs.conda.io/projects/conda/en/latest/user-guide/install/
93 |
94 |
--------------------------------------------------------------------------------
/ci/flake8.cfg:
--------------------------------------------------------------------------------
1 | # flake8 config file for pyclient
2 |
3 | [flake8]
4 | max_line_length = 99
5 | show-source = true
6 | ignore = E701 W504 W503 E203
7 |
--------------------------------------------------------------------------------
/ci/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | strict_optional = False
3 | ignore_missing_imports = True
4 | disallow_untyped_decorators = False
5 | follow_imports = "silent"
6 | disallow_untyped_defs = True
7 |
--------------------------------------------------------------------------------
/ci/precommit.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | ###############################################################################
4 | # Script that should be run pre-commit after making any changes to the pyclient
5 | # package / subdirectory.
6 | #
7 | # Runs:
8 | # Unit tests
9 | # Linting
10 | # Type checking
11 | ###############################################################################
12 |
13 | set -e
14 |
15 | failures=""
16 |
17 | function banner() {
18 | echo
19 | echo "================================================================================"
20 | echo "$*"
21 | echo "================================================================================"
22 | echo
23 | }
24 |
25 | #####################################################################
26 | # Takes two parameters, a "name" and a "command".
27 | # Runs the command and prints out whether it succeeded or failed, and
28 | # also tracks a list of failed steps in $failures.
29 | #####################################################################
30 | function run() {
31 | local name=$1
32 | local cmd=$2
33 |
34 | banner "Running $name [$cmd]"
35 | set +e
36 | $cmd
37 | exit_code=$?
38 | set -e
39 |
40 | if [[ $exit_code == 0 ]]; then
41 | echo Passed $name
42 | else
43 | echo Failed $name [$cmd]
44 | if [ -z "$failures" ]; then
45 | failures="$failures $name"
46 | else
47 | failures="$failures, $name"
48 | fi
49 | fi
50 | }
51 |
52 | parent=$(cd "$(dirname $0)" && pwd -P)
53 | root=$(dirname ${parent})/src/python
54 | r_root=$(dirname ${parent} | xargs dirname)/R
55 |
56 | if [[ -z ${CONDA_DEFAULT_ENV} ]]; then
57 | banner "Conda not active. pyclient conda environment must be active."
58 | exit 1
59 | fi
60 |
61 | pushd $root > /dev/null
62 | banner "Executing in conda environment ${CONDA_DEFAULT_ENV} in directory ${root}"
63 | run "Unit Tests" "pytest -vv -r sx pyclient"
64 | run "Style Checking" "black --line-length 99 --check pyclient"
65 | run "Linting" "flake8 --config=$parent/flake8.cfg pyclient"
66 | run "Type Checking" "mypy -p pyclient --config $parent/mypy.ini"
67 | popd > /dev/null
68 |
69 | if [ -z "$failures" ]; then
70 | banner "Precommit Passed"
71 | else
72 | banner "Precommit Failed with failures in: $failures"
73 | exit 1
74 | fi
75 |
76 |
--------------------------------------------------------------------------------
/conda-requirements-minimal.txt:
--------------------------------------------------------------------------------
1 | # Use the following channels when building a conda environment:
2 | # --override-channels -c bioconda -c conda-forge
3 |
4 | # Packages required to run Snakemake
5 | python=3.6.10
6 | snakemake-minimal=5.11.1
7 |
8 | # packages used by the pyclient module
9 | defopt=6.0.2
10 | attrs=19.3.0
11 | samwell==0.0.2
12 | typing_extensions=4.1.1
13 |
14 |
--------------------------------------------------------------------------------
/conda-requirements-test.txt:
--------------------------------------------------------------------------------
1 | # testing
2 | pytest==5.3.5
3 | flake8==3.7.9
4 | mypy==0.761
5 | black==19.10b0
6 |
--------------------------------------------------------------------------------
/pip-requirements.txt:
--------------------------------------------------------------------------------
1 | # All packages get installed in order.
2 | distutils-strtobool==0.1.0
3 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | import sys
3 |
4 | if sys.version_info < (3, 6):
5 | sys.exit('Sorry, Python < 3.6 is not supported')
6 |
7 | # todo: requirements
8 | setup(
9 | name='pyclient',
10 | version='0.1',
11 | author='Fulcrum Genomics',
12 | author_email='no-reply@fulcrumgenomics.com',
13 | maintainer='Fulcrum Genomics',
14 | maintainer_email='no-reply@fulcrumgenomics.com',
15 | description='Python/Snakemake Skeleton',
16 | url='https://github.com/fulcrumgenomics/python-snakemake-skeleton',
17 | packages=['pyclient'],
18 | package_dir={'': 'src/python'},
19 | entry_points={
20 | 'console_scripts': ['client-tools=pyclient.tools.__main__:main']
21 | },
22 | include_package_data=True,
23 | zip_safe=False,
24 | classifiers=[
25 | 'Environment :: Console',
26 | "Programming Language :: Python :: 3",
27 | "Development Status :: 3 - Alpha",
28 | "Intended Audience :: Developers",
29 | ]
30 | )
31 |
--------------------------------------------------------------------------------
/src/python/pyclient/__init__.py:
--------------------------------------------------------------------------------
1 | from pyclient.core import logging
2 |
3 | logging.setup_logging()
4 |
--------------------------------------------------------------------------------
/src/python/pyclient/core/logging.py:
--------------------------------------------------------------------------------
1 | """
2 | Methods for setting up logging for tools.
3 | -----------------------------------------
4 | """
5 |
6 | import logging
7 | import socket
8 | from threading import RLock
9 |
10 |
11 | # Global that is set to True once logging initialization is run to prevent running > once.
12 | __PYCLIENT_LOGGING_SETUP: bool = False
13 |
14 | # A lock used to make sure initialization is performed only once
15 | __LOCK = RLock()
16 |
17 |
18 | def setup_logging(level: str = "INFO") -> None:
19 | """Globally configure logging for all modules under pyclient.
20 |
21 | Configures logging to run at a specific level and output messages to stderr with
22 | useful information preceding the actual log message.
23 | """
24 | global __PYCLIENT_LOGGING_SETUP
25 |
26 | with __LOCK:
27 | if not __PYCLIENT_LOGGING_SETUP:
28 | format = (
29 | f"%(asctime)s {socket.gethostname()} %(name)s:%(funcName)s:%(lineno)s "
30 | + "[%(levelname)s]: %(message)s"
31 | )
32 | handler = logging.StreamHandler()
33 | handler.setLevel(level)
34 | handler.setFormatter(logging.Formatter(format))
35 |
36 | logger = logging.getLogger("pyclient")
37 | logger.setLevel(level)
38 | logger.addHandler(handler)
39 | else:
40 | logging.getLogger(__name__).warn("Logging already initialized.")
41 |
42 | __PYCLIENT_LOGGING_SETUP = True
43 |
--------------------------------------------------------------------------------
/src/python/pyclient/pipeline/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fulcrumgenomics/python-snakemake-template/68424fbd0dc135e2b2e3fedeceed2b13104d2a16/src/python/pyclient/pipeline/__init__.py
--------------------------------------------------------------------------------
/src/python/pyclient/pipeline/snakemake_utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Utility functions for working with snakemake.
3 | ---------------------------------------------
4 |
5 | This module contains utility functions for interacting with and working with snakemake.
6 | Currently this includes functions to parse key information out of the snakemake log
7 | file and summarize any failures.
8 | """
9 |
10 | import enum
11 | import logging
12 | from itertools import dropwhile
13 | from pathlib import Path
14 | from typing import Any
15 | from typing import ClassVar
16 | from typing import List
17 | from typing import Optional
18 |
19 | import attr
20 |
21 | # The default number of lines to return from the log files for each failed job
22 | __LINES_PER_LOGFILE: int = 50
23 |
24 |
25 | def last_lines(path: Path, n: Optional[int] = __LINES_PER_LOGFILE) -> List[str]:
26 | """Returns the last N lines from a file as a List.
27 |
28 | Args:
29 | path: the path to the file (must exist)
30 | n: the number of line to return, None will return all lines
31 | Return:
32 | the last n lines of the file as a list, or the whole file < n lines.
33 | """
34 | try:
35 | lines = read_lines(path)
36 | if n is not None and len(lines) > n:
37 | lines = lines[-n : len(lines)]
38 | return lines
39 | except Exception:
40 | return [f">>> Could not open log file for reading: {path}. <<<"]
41 |
42 |
43 | def read_lines(path: Path) -> List[str]:
44 | """
45 | Reads a file and returns it as a list of lines with newlines stripped.
46 |
47 | Args:
48 | path: the path of the file to read
49 | Return:
50 | the list of lines from the file
51 | """
52 | with path.open("r") as fh:
53 | lines: List[str] = fh.readlines()
54 | return [l.rstrip() for l in lines]
55 |
56 |
57 | def write_lines(path: Path, lines: List[str]) -> None:
58 | """
59 | Writes a list of lines to a file with newlines between the lines.
60 |
61 | Args:
62 | path: the path to write to
63 | lines: the list of lines to write
64 | """
65 | with path.open("w") as out:
66 | for line in lines:
67 | out.write(line)
68 | out.write("\n")
69 |
70 |
71 | @attr.s
72 | class RuleLog:
73 | """Stores the path and name for the log file for a rule.
74 |
75 | Attributes:
76 | path: the path to the log file for the rule
77 | name: the name of the rule
78 | """
79 |
80 | path: Path = attr.ib()
81 | name: str = attr.ib()
82 |
83 | RULE_ERROR_PREFIX: ClassVar[str] = "Error in rule "
84 | LOG_PREFIX: ClassVar[str] = " log: "
85 | LOG_SUFFIX: ClassVar[str] = " (check log file(s) for error message)"
86 |
87 | @classmethod
88 | def get_logs(cls, snakemake_log: Path) -> List["RuleLog"]:
89 | """Gets the logs for the rules from a Snakemake log file."""
90 | with snakemake_log.open("r") as fh:
91 | lines: List[str] = list(fh.readlines())
92 |
93 | logs: List[RuleLog] = []
94 | while lines:
95 | lines = list(dropwhile(lambda l: not l.startswith(cls.RULE_ERROR_PREFIX), iter(lines)))
96 | if lines:
97 | rule_name: str = lines[0].rstrip()[len(cls.RULE_ERROR_PREFIX) : -1]
98 | lines = list(dropwhile(lambda l: not l.startswith(cls.LOG_PREFIX), iter(lines)))
99 | dir: Path = Path(".").absolute()
100 | log_path = dir / lines[0].rstrip()[len(cls.LOG_PREFIX) : -len(cls.LOG_SUFFIX)]
101 | lines = lines[1:]
102 | logs.append(RuleLog(path=log_path, name=rule_name))
103 |
104 | return logs
105 |
106 |
107 | def summarize_snakemake_errors(
108 | path: Path, lines_per_log: Optional[int] = __LINES_PER_LOGFILE
109 | ) -> List[str]:
110 | """Summarizes any errors that occurred during a run of a pipeline. Uses the snakemake log
111 | to find all failed rule invocations and their log files. Produces a list of lines containing
112 | summary information per failed rule invocation and the last 50 lines of each log file.
113 |
114 | Notes:
115 | * fails if rule has more than one log file defined
116 | * fails if rule has no log file defined
117 |
118 | Args:
119 | path: the path to the main snakemake log file
120 | lines_per_log: the number of lines to pull from each log file, None to return all lines
121 | Returns:
122 | a list of lines containing summary information on all failed rule invocations
123 | """
124 | summary = []
125 |
126 | logs: List[RuleLog] = RuleLog.get_logs(snakemake_log=path)
127 |
128 | for log in logs:
129 | summary.append(f"========== Start of Error Info for {log.name} ==========")
130 | summary.append(f"Failed rule: {log.name}")
131 | summary.append(f"Last {lines_per_log} lines of log file: {log.path}")
132 | for line in last_lines(log.path, lines_per_log):
133 | summary.append(f" {line}")
134 | summary.append(f"=========== End of Error Info for {log.name} ===========")
135 |
136 | return summary
137 |
138 |
139 | def on_error(
140 | snakefile: Path,
141 | config: Optional[Any],
142 | log: Path,
143 | lines_per_log: Optional[int] = __LINES_PER_LOGFILE,
144 | ) -> None:
145 | """Block of code that gets called if the snakemake pipeline exits with an error.
146 |
147 | The `log` variable contains a path to the snakemake log file which can be parsed for
148 | more information. Summarizes information on failed jobs and writes it to the output
149 | and also to an error summary file in the working directory.
150 |
151 | Args:
152 | snakefile: the path to the snakefile
153 | config: the configuration for the pipeline
154 | log: the path to the snakemake log file
155 | lines_per_log: the number of lines to pull from each log file, None to return all lines
156 | """
157 | try:
158 | # Build the preface
159 | preface: List[str] = [
160 | "Error in snakemake pipeline.",
161 | f"working_dir = {Path('.').absolute()}",
162 | ]
163 | # print the config attributes
164 | if config is not None:
165 | try:
166 | for attribute in attr.fields(type(config)):
167 | value = getattr(config, attribute.name)
168 | if isinstance(value, enum.Enum):
169 | value = value.value
170 | else:
171 | value = str(value)
172 | preface.append(f"{attribute.name} = {value}")
173 | except Exception:
174 | try:
175 | for key, value in config.items():
176 | preface.append(f"{key} = {value}")
177 | except Exception:
178 | preface.append(f"config = {config}")
179 | preface.append("Detailed error information follows.")
180 |
181 | summary = preface + summarize_snakemake_errors(log, lines_per_log=lines_per_log)
182 | text = "\n".join(summary)
183 | pipeline_name = snakefile.with_suffix("").name
184 | logging.getLogger(pipeline_name).error(text)
185 | with Path("./error_summary.txt").open("w") as out:
186 | out.write(text)
187 | except Exception as ex:
188 | print("###########################################################################")
189 | print("Exception raised in Snakemake onerror handler.")
190 | print(str(ex))
191 | print("###########################################################################")
192 |
--------------------------------------------------------------------------------
/src/python/pyclient/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fulcrumgenomics/python-snakemake-template/68424fbd0dc135e2b2e3fedeceed2b13104d2a16/src/python/pyclient/tests/__init__.py
--------------------------------------------------------------------------------
/src/python/pyclient/tests/test_hello_world.py:
--------------------------------------------------------------------------------
1 | """Tests for the hello-world pipeline"""
2 |
3 |
4 | from typing import Dict
5 |
6 | from py._path.local import LocalPath as TmpDir
7 | from pyclient.tests.util import run_snakemake
8 |
9 |
10 | def test_hello_world(tmpdir: TmpDir) -> None:
11 | """Basic unit test that runs the snakefile in dry-run mode to ensure it
12 | parses correctly.
13 | """
14 |
15 | rules: Dict[str, int] = {
16 | "all": 1,
17 | "hello_world": 1,
18 | }
19 |
20 | run_snakemake(pipeline="hello-world", workdir=tmpdir, rules=rules)
21 |
--------------------------------------------------------------------------------
/src/python/pyclient/tests/util.py:
--------------------------------------------------------------------------------
1 | """Tests for pipelines within :module:`~pyclient`
2 |
3 | The tests briefly test the Snakefiles to ensure they are runnable and generally execute the
4 | expected rules. They are far from comprehensive, as they do not verify the analytical results
5 | of each pipeline, which should be done elsewhere.
6 | """
7 |
8 |
9 | from collections import defaultdict
10 | from pathlib import Path
11 | from typing import Any
12 | from typing import Callable
13 | from typing import Dict
14 | from typing import List
15 | from typing import Optional
16 |
17 | import snakemake
18 |
19 |
20 | class SnakemakeLogger(object):
21 | """Returns a log handler for snakemake and tracks if the rules that were run"""
22 |
23 | def __init__(self) -> None:
24 | self.rule_count: Dict[str, int] = defaultdict(lambda: 0)
25 |
26 | def log_handler(self) -> Callable[[Dict[str, Any]], None]:
27 | """Returns a log handler for use with snakemake."""
28 |
29 | def fn(d: Dict[str, Any]) -> None:
30 | if d["level"] != "run_info":
31 | return
32 | # NB: skip the first two and last lines
33 | for counts_line in d["msg"].split("\n")[2:-1]:
34 | counts_line = counts_line.strip()
35 | count, job = counts_line.split("\t")
36 | assert int(count) > 0, counts_line
37 |
38 | self.rule_count[job] += int(count)
39 |
40 | return fn
41 |
42 |
43 | def run_snakemake(
44 | pipeline: str,
45 | workdir: Path,
46 | rules: Dict[str, int],
47 | config: Optional[Dict[str, Any]] = None,
48 | configfiles: Optional[List[Path]] = None,
49 | quiet: bool = True,
50 | ) -> SnakemakeLogger:
51 | """Runs Snakemake.
52 |
53 | Args:
54 | snakefile: the snake file to execute
55 | workdir: the working directory in which to run Snakemake
56 | rules: a mapping of rule name to expect # of times it should run
57 | config: the optional configuration object for Snakemake
58 | configfiles: the optional list of configuration files for Snakemake
59 | quiet: tells snakemake to not output logging, set to true for debugging failing pipelines
60 | """
61 | filename = pipeline.replace("-", "_") + ".smk"
62 | src_dir: Path = Path(__file__).absolute().parent.parent.parent.parent
63 | snakefile: Path = src_dir / "snakemake" / filename
64 | assert snakefile.is_file(), f"{snakefile} is not a file"
65 |
66 | # run it
67 | logger = SnakemakeLogger()
68 | assert snakemake.snakemake(
69 | snakefile=str(snakefile),
70 | config=config,
71 | configfiles=configfiles,
72 | resources={"mem_gb": 8},
73 | workdir=str(workdir),
74 | dryrun=True,
75 | quiet=quiet,
76 | log_handler=[logger.log_handler()],
77 | ignore_ambiguity=True,
78 | )
79 |
80 | # check the "all" rule
81 | assert (
82 | logger.rule_count["all"] == 1
83 | ), f"All rule was not run once, found: {logger.rule_count['all']}"
84 |
85 | # check that the executed rules were run the correct # of times
86 | for rule, count in logger.rule_count.items():
87 | assert rule in rules, f"Could not find {rule} in {rules}"
88 | assert count == rules[rule], f"{rule}: {rules[rule]}"
89 |
90 | # check that all the expected rules were run
91 | for rule in rules:
92 | assert rule in logger.rule_count
93 |
94 | return logger
95 |
--------------------------------------------------------------------------------
/src/python/pyclient/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fulcrumgenomics/python-snakemake-template/68424fbd0dc135e2b2e3fedeceed2b13104d2a16/src/python/pyclient/tools/__init__.py
--------------------------------------------------------------------------------
/src/python/pyclient/tools/__main__.py:
--------------------------------------------------------------------------------
1 | """Main entry point for all pyclient tools."""
2 |
3 | import logging
4 | import sys
5 | from typing import Callable
6 | from typing import List
7 |
8 | import defopt
9 |
10 | from pyclient.tools.hello_world import hello_world
11 |
12 | TOOLS: List[Callable] = sorted(
13 | [hello_world], key=lambda f: f.__name__,
14 | )
15 |
16 |
17 | def main(argv: List[str] = sys.argv[1:]) -> None:
18 | logger = logging.getLogger(__name__)
19 | if len(argv) != 0 and all(arg not in argv for arg in ["-h", "--help"]):
20 | logger.info("Running command: client-tools " + " ".join(argv))
21 | try:
22 | defopt.run(funcs=TOOLS, argv=argv)
23 | logger.info("Completed successfully.")
24 | except Exception as e:
25 | logger.info("Failed on command: " + " ".join(argv))
26 | raise e
27 |
--------------------------------------------------------------------------------
/src/python/pyclient/tools/hello_world.py:
--------------------------------------------------------------------------------
1 | def hello_world(*, message: str = "Hello World!") -> None:
2 | print(message)
3 |
--------------------------------------------------------------------------------
/src/scripts/common.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Simplified logging function
4 | function log() {
5 | >&2 printf "[%s] %b\n" "$(date +"%F %T %Z")" "$*"
6 | }
7 |
8 | # Determine how many cores to use by default
9 | function find_core_limit() {
10 | if hash cgget 2>/dev/null; then
11 | cores=$(cgget -n --values-only --variable cpu.shares / | awk '{print int($1 / 1024.0)}')
12 | elif [[ -a /proc/cpuinfo ]]; then
13 | cores=$(grep -c ^processor /proc/cpuinfo)
14 | else
15 | cores=$(sysctl -n hw.ncpu)
16 | fi
17 | if [[ "$cores" -le "0" ]]; then
18 | log "Cores must be > 0: $cores"
19 | exit 1
20 | fi
21 | echo "$cores"
22 | }
23 |
24 | # Attempt to retrieve the amount of memory available for the pipeline. If running in a
25 | # docker container, then try to use `cgroups` to get the memory limit. This fails if `--memory`
26 | # is not set when running the docker container, so fall-back on the system resources otherwise.
27 | # AWS should always be setting `--memory`, so this is only an issue when running manually.
28 | function find_mem_limit_gb() {
29 | declare -a mem_limits
30 | if hash cgget 2>/dev/null; then
31 | # Source: https://stackoverflow.com/questions/42187085/check-mem-limit-within-a-docker-container
32 | # NB: convert from bytes to gigabytes
33 | # NB: Will correctly find the limit when set by `--memory` in a container. All other times returns a very large
34 | mem_limits=("${mem_limits[@]}" "$(cgget -n --values-only --variable memory.limit_in_bytes / | awk '{printf "%20.0f\n", int($1 / 1073741824.0)}')")
35 | fi
36 | if [[ -a /proc/meminfo ]]; then
37 | # NB: ignores `--memory` flag passed to a docker container
38 | # NB: convert from kilobytes to gigabytes
39 | mem_limits=("${mem_limits[@]}" "$(grep ^MemTotal /proc/meminfo | awk '{print int($2 / 1000000.0)}')")
40 | fi
41 | if hash sysctl 2>/dev/null && [[ "$OSTYPE" == "darwin"* ]]; then
42 | # NB: convert from bytes to gigabytes
43 | mem_limits=("${mem_limits[@]}" "$(sysctl -n hw.memsize | awk '{print int($1 / 1000000000.0)}')")
44 | fi
45 |
46 | # Check that we have at least one value
47 | if [[ "${#mem_limits[@]}" -le 0 ]]; then
48 | log "Could not determine available RAM."
49 | exit 1
50 | fi
51 |
52 | # Find min value
53 | mem_gb="${mem_limits[0]}"
54 | for lim in "${mem_limits[@]}"; do
55 | (( lim < mem_gb )) && mem_gb=$lim
56 | done
57 |
58 | if [[ "$mem_gb" -le "0" ]]; then
59 | log "Memory must be > 0: $mem_gb"
60 | exit 1
61 | fi
62 | echo "$mem_gb"
63 | }
64 |
--------------------------------------------------------------------------------
/src/scripts/run_snakemake.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | usage() {
4 | local err=${1:-""};
5 | cat <&2;
17 | echo -e "\n$err" >&2;
18 | exit 1;
19 | }
20 |
21 | dry_run=""
22 |
23 | while getopts "s:o:c:n" flag; do
24 | case "${flag}" in
25 | s) snakefile=${OPTARG};;
26 | o) out_dir=${OPTARG};;
27 | c) config_file=${OPTARG};;
28 | n) dry_run="-n";;
29 | *) usage;;
30 | esac
31 | done
32 | shift $((OPTIND-1))
33 |
34 | extra_args=""
35 | if [ -z "${snakefile}" ]; then
36 | usage "Missing required parameter -s";
37 | fi
38 | if [ -z "${out_dir}" ]; then
39 | usage "Missing required parameter -o";
40 | fi
41 | if [ ! -z "${config_file}" ]; then
42 | extra_args="--configfile $config_file";
43 | fi
44 |
45 |
46 | source $(dirname $0)/common.sh
47 | cores=$(find_core_limit)
48 | mem_gb=$(find_mem_limit_gb)
49 | log "Number of cores: $cores"
50 | log "Memory limit: $mem_gb GB"
51 |
52 | # Run Snakemake pipeline
53 | set -euo pipefail
54 | snakemake \
55 | --printshellcmds \
56 | --reason \
57 | --nocolor \
58 | --keep-going \
59 | --rerun-incomplete \
60 | --jobs "$cores" \
61 | --resources "mem_gb=$mem_gb" \
62 | --snakefile $snakefile \
63 | --directory $out_dir \
64 | $dry_run \
65 | $extra_args;
66 |
67 |
68 | log "All done!"
69 |
--------------------------------------------------------------------------------
/src/snakemake/hello_world.smk:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Hello World pipeline
3 | ################################################################################
4 |
5 | from pathlib import Path
6 | from typing import List
7 |
8 | from pyclient.pipeline import snakemake_utils
9 |
10 |
11 | ################################################################################
12 | # Utility methods and variables
13 | ################################################################################
14 |
15 | # TODO
16 |
17 | ################################################################################
18 | # Terminal files
19 | ################################################################################
20 |
21 | all_terminal_files: List[Path] = [Path("message.txt")]
22 |
23 | ################################################################################
24 | # Snakemake rules
25 | ################################################################################
26 |
27 | onerror:
28 | """Block of code that gets called if the snakemake pipeline exits with an error."""
29 | snakemake_utils.on_error(snakefile=Path(__file__), config=_config, log=Path(log))
30 |
31 |
32 | rule all:
33 | input:
34 | all_terminal_files
35 |
36 | rule hello_world:
37 | output:
38 | txt = "message.txt"
39 | log:
40 | "logs/hello_world.log"
41 | benchmark:
42 | "benchmarks/hello_world.txt"
43 | shell:
44 | "(echo Hello World > {output.txt}) &> {log}"
45 |
--------------------------------------------------------------------------------