├── .dockerignore
├── .gitignore
├── Dockerfile
├── README.md
├── Snakefile
├── config
    ├── config.yaml
    └── isolate_list.tsv
├── data
    ├── .gitignore
    └── test
    │   ├── .gitignore
    │   └── get_test_data.sh
├── docker
    ├── .gitignore
    ├── Dockerfile
    ├── Dockerfile-step-0
    ├── Dockerfile-step-1
    ├── Dockerfile-step-2
    ├── README.md
    ├── build-containers.sh
    ├── run-final.sh
    ├── run-step-0.sh
    ├── run-step-1.sh
    ├── run-step-2.sh
    └── test-final.sh
├── envs
    ├── abricate.yaml
    ├── amrfinderplus.yaml
    ├── amrplusplus.yaml
    ├── ariba.yaml
    ├── csstar.yaml
    ├── deeparg.yaml
    ├── groot.yaml
    ├── hamronization.yaml
    ├── hamronization_workflow.yaml
    ├── kmerresistance.yaml
    ├── mykrobe.yaml
    ├── resfams.yaml
    ├── resfinder.yaml
    ├── rgi.yaml
    ├── srax.yaml
    ├── srst2.yaml
    └── staramr.yaml
├── rules
    ├── abricate.smk
    ├── amrfinderplus.smk
    ├── amrplusplus.smk
    ├── ariba.smk
    ├── csstar.smk
    ├── deeparg.smk
    ├── groot.smk
    ├── kmerresistance.smk
    ├── mykrobe.smk
    ├── resfams.smk
    ├── resfinder.smk
    ├── rgi.smk
    ├── rgi_bwt.smk
    ├── srax.smk
    ├── srst2.smk
    └── staramr.smk
├── run_test.sh
└── test
    ├── get_large_test_data.sh
    ├── simple
        ├── test_R1.fq.gz
        ├── test_R2.fq.gz
        ├── test_contig.fna
        └── test_contig.fna.log
    ├── test_config.yaml
    └── test_data.tsv


/.dockerignore:
--------------------------------------------------------------------------------
1 | /*
2 | !/envs
3 | !/rules
4 | !/Snakefile
5 | !/test
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /.idea*
2 | /.snakemake
3 | /.theano
4 | /bindeps
5 | /logs
6 | /results
7 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | docker/Dockerfile


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # hAMRonization workflow
  2 | 
  3 | ## Description
  4 | 
  5 | hAMRonization is a project aiming at the harmonization of output file formats of antimicrobial resistance detection tools.
  6 | This is a workflow acting as a proof of concept test-case for the [hAMRonization](https://github.com/pha4ge/hAMRonization) parsers.
  7 | 
  8 | Specifically, this runs a set of AMR gene detection tools against a set of contigs/reads and uses `hAMRonization` to collate the results in a single unified report.
  9 | 
 10 | The following tools are currently included:
 11 | * abricate
 12 | * AMRFinderPlus
 13 | * ariba
 14 | * Groot
 15 | * RGI (for complete and draft genomes)
 16 | * RGI BWT (for metagenomes)
 17 | * staramr
 18 | * resfams
 19 | * staramr
 20 | * Resfinder (including PointFinder)
 21 | * sraX
 22 | * DeepARG
 23 | * CSSTAR
 24 | * AMRplusplus
 25 | * SRST2
 26 | * KmerResistance
 27 | 
 28 | Excluded tools:
 29 | * mykrobe (needs variant specification to be parseable)
 30 | * SEAR, ARG-ANNOT (no longer downloadable)
 31 | * RAST/PATRIC (not easily runnable on CLI)
 32 | * Single organism/or resistance tools (e.g. Kleborate, LREfinder, SSCmec Finder, U-CARE, ARGO)
 33 | * shortBRED, ARGS-OAP (rely on usearch which isn't open-source)
 34 | 
 35 | ## Installation
 36 | 
 37 | Installation from source requires Conda or Miniconda to be installed.
 38 | 
 39 | > Note: if you have Docker or Podman, then the pre-built container (see below) may be the easier way to go.
 40 | 
 41 | Install prerequisites for building this pipeline (on Ubuntu):
 42 | 
 43 |     sudo apt install build-essential git zlib1g-dev curl wget file unzip jq
 44 | 
 45 | Clone this repository:
 46 | 
 47 |     git clone https://github.com/pha4ge/hAMRonization_workflow
 48 | 
 49 | Create the Conda environment:
 50 | 
 51 |     cd hAMRonization_workflow
 52 |     conda env create -n hamronization_workflow --file envs/hamronization_workflow.yaml
 53 | 
 54 | Configure the environment for a more predictable outcome:
 55 | 
 56 |     conda activate hamronization_workflow
 57 |     conda config --env --add channels defaults --add channels bioconda --add channels conda-forge
 58 |     conda config --env --set channel_priority strict
 59 |     conda update --all
 60 | 
 61 | Run a smoke test (note this takes a while as Snakemake pulls in all the tools and databases upon its first run):
 62 | 
 63 |     ./run_test.sh
 64 | 
 65 | Running it again should seconds and report "Nothing to be done"
 66 | 
 67 | ## Running
 68 | 
 69 | To execute the pipeline with your isolates, navigate to the cloned repository and edit or copy the provided configuration file (`config/config.yaml`) and isolate list (`config/isolate_list.tsv`).
 70 | 
 71 | Remember to activate the Conda environment:
 72 | 
 73 |     conda activate hamronization_workflow
 74 | 
 75 | Run the configured workflow (change the job count according to your compute capacity):
 76 | 
 77 |     snakemake --configfile config/config.yaml --use-conda --jobs 2
 78 | 
 79 | Podman / Docker
 80 | ---------------
 81 | 
 82 | **NOTE the Docker image for the latest version of hAMRonization is not yet available for download but a build script is available in the `docker` directory.**
 83 | 
 84 | Alternatively, the workflow can be run using a pre-built image that contains all the tools and their databases.  Given the collective quirks of the bundled tools this is probably easier for most users.
 85 | 
 86 | To get the container using `podman` (preferred) or `docker`:
 87 | 
 88 |     podman pull docker.io/finlaymaguire/hamronization_workflow:1.1.0
 89 |     docker pull docker.io/finlaymaguire/hamronization_workflow:1.1.0
 90 | 
 91 | To run the workflow on your isolates, the container needs access to (1a) a workflow configuration (`config.yaml`) and (1b) isolate list (`isolates.tsv`), (2) the actual data (FASTA/FASTQ files), and (3) a `results` directory to write the its output in. (A `logs` directory in case things fail will also be helpful.)
 92 | 
 93 | We suggest starting with this setup:
 94 | 
 95 |  * Create a new empty directory which will serve as your workspace
 96 |  * Inside the workspace create four directories: `config`, `inputs`, `results`, and `logs`
 97 |  * Copy your FASTA/FASTQ files into the `inputs` directory (possibly organised in subdirectories)
 98 |  * In the `config` directory create a file `isolates.tsv` (take `../test/test_data.tsv` as an example)
 99 |  * In `config/isolates.tsv` add a line for each isolate and (this is the important bit) _make sure their file paths start with `inputs/`_ because this is where the container will see them.
100 |  * In the `config` directory create a file `config.yaml` (again take `../test/test_config.yaml` as an example)
101 |  * In `config/config.yaml` change _only one setting_: `samples: "config/isolates.tsv"` (again, this is where the container will see the isolates file).
102 | 
103 | You are ready to run the container.  While in the workspace directory:
104 | 
105 |     # Works identically for docker (just use 'docker' instead of 'podman')
106 |     podman run -ti --rm --tmpfs /.cache --tmpfs /tmp --tmpfs /run \
107 |         -v $PWD/inputs:/inputs:ro -v $PWD/config:/config:ro -v $PWD/results:/results -v $PWD/tlogs:/logs \
108 |         run finlaymaguire/hamronization_workflow:1.1.0 \
109 |         snakemake --configfile config/config.yaml --use-conda --cores 6
110 | 
111 | If the workflow runs successfully, results will be in `./results`.  In case of an error, check the most recent file in `./logs`.
112 | 
113 | You are not bound to the above setup: you can mount any host directory in the container, at any mountpoint you like **except for the output directory which must be mounted at `/results`**.  (If you don't mount anything on `/results`, the results get written _inside_ the container.)  Just remember that the file paths in your isolate list are interpreted from _within_ the container (and relative to `/`).
114 | 
115 | 
116 | Initial Run
117 | -----------
118 | 
119 | ### Run Data
120 | 
121 | Following datasets are currently used for result file generation:
122 | ```
123 | organism    Biosample   Assembly    Run
124 | Salmonella enterica SAMN13012778    GCA_009009245.1 SRR10258315
125 | Salmonella enterica SAMN13064234    GCA_009239915.1 SRR10313698
126 | Salmonella enterica SAMN10872197    GCA_007657735.1 SRR8528923
127 | Salmonella enterica SAMN13064249    GCA_009239785.1 SRR10313716
128 | Salmonella enterica SAMN07255713    GCA_009439415.1 SRR5921214
129 | Salmonella enterica SAMN03098832    GCA_006629605.1 SRR1616829
130 | Klebsiella pneumoniae   SAMN02927805    GCA_004302785.1 SRR1561295
131 | Salmonella enterica SAMEA6058467    GCA_009625195.1 ERR3581801
132 | E. coli SAMN05980528    GCA_004268245.1 SRR4897319
133 | Mycobacterium tuberculosis  SAMN02599008    GCA_000662585.1 SRR1182980 SRR1180160
134 | Mycobacterium tuberculosis  SAMN02599179    GCA_000665745.1 SRR1172848 SRR1172873
135 | Mycobacterium tuberculosis  SAMN02599095    GCA_000706105.1 SRR1173728 SRR1173217
136 | Mycobacterium tuberculosis  SAMN02599061    GCA_000663625.1 SRR1175151 SRR1172938
137 | Mycobacterium tuberculosis  SAMN02598983    GCA_000654735.1 SRR1174279 SRR1173257
138 | ```
139 | Links to data and corresponding metadata need to be stored in a tab separated sample sheet with the following columns:
140 | `species biosample       assembly        reads   read1   read2`
141 | 
142 | 
143 | ### Results
144 | 
145 | The results generated on the aforementioned datasets can be retrieved [here](https://databay.bfrlab.de/d/c937ce66a7f2406e9a0f/).
146 | 
147 | Contact
148 | -------
149 | Please consult the [PHA4GE project website](https://github.com/pha4ge) for questions.
150 | 
151 | For technical questions, please feel free to consult:
152 |  * Finlay Maguire <finlaymaguire (at) gmail.com>
153 |  * Simon H. Tausch <Simon.Tausch (at) bfr.bund.de>
154 | 
155 | 


--------------------------------------------------------------------------------
/Snakefile:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import os
  3 | import re
  4 | shell.executable("bash")
  5 | 
  6 | # Samples Table ---
  7 | 
  8 | # Read the samples TSV verbatim into an all-strings dataframe
  9 | # - Empty lines and comment lines are skipped
 10 | # - Empty cells yield zero-length strings (no NaN or NA weirdness)
 11 | # - Lenienty skip spaces that start a cell value
 12 | # - The 'biosample' index column must be unique (checked below)
 13 | # - The 'usecols' array lists our required columns (others are ignored)
 14 | 
 15 | samples = pd.read_table(config['samples'], index_col="biosample",
 16 |     dtype='str', na_filter=False, comment='#', skipinitialspace=True,
 17 |     usecols=['biosample', 'species', 'assembly', 'read1', 'read2'])
 18 | 
 19 | # Sanity checks on the samples table: biosample index must be unique and non-empty
 20 | if not all(map(len, samples.index)) or samples.index.size != samples.index.unique().size:
 21 |     raise Exception("Every sample must have a unique 'biosample' identfier in {}".format(config['samples']))
 22 | 
 23 | # Check that biosample has no characters that break things (like '/')
 24 | pat = re.compile(r"^[\w.@:=-]+$")
 25 | if not all(map(lambda id: re.match(pat, id), samples.index)):
 26 |     raise Exception("Biosample IDs must not contain spaces or punctuation other than: . @ : = - _")
 27 | 
 28 | # Sample Lists ---
 29 | 
 30 | # Some tools run on assemblies, others on read pairs.  Here we define
 31 | # lists of indices (= biosample IDs) of samples that have an assembly
 32 | # and/or a read pair, to be used in the 'expand' definitions below.
 33 | 
 34 | samples_with_assembly = samples[samples.assembly != ''].index
 35 | samples_with_readpair = samples[samples.read2 != ''].index
 36 | samples_with_either = samples[samples.assembly + samples.read2 != ''].index
 37 | 
 38 | # Input Helpers ---
 39 | 
 40 | # Functions to retrieve column values from the samples table
 41 | # - All these use the {sample} variable in the wildcards that Snakemake passes in
 42 | # - All return empty string when the value is absent so can be used as boolean tests
 43 | 
 44 | get_species = lambda w: samples.loc[w.sample].species
 45 | get_assembly = lambda w: samples.loc[w.sample].assembly
 46 | get_read1 = lambda w: samples.loc[w.sample].read1
 47 | get_read2 = lambda w: samples.loc[w.sample].read2
 48 | 
 49 | # Convenience functions for tools that are can take both reads and assemblies
 50 | # - All functions return empty list when they find nothing, thus can be used as tests
 51 | 
 52 | get_reads = lambda w: list(filter(None, [get_read1(w), get_read2(w)]))
 53 | get_reads_or_assembly = lambda w: get_reads(w) or list(filter(None, [get_assembly(w)]))
 54 | get_assembly_or_reads = lambda w: list(filter(None, [get_assembly(w)])) or get_reads(w)
 55 | 
 56 | # Target rules ---
 57 | 
 58 | rule all:
 59 |     input:
 60 |         "results/hamronized_report.tsv",
 61 |         "results/hamronized_report.json",
 62 |         "results/hamronized_report.html"
 63 | 
 64 | rule summarize_all:
 65 |     output:
 66 |         tsv = "results/hamronized_report.tsv",
 67 |         json = "results/hamronized_report.json",
 68 |         html = "results/hamronized_report.html"
 69 |     input:
 70 |         expand("results/{sample}/{sample}_hamronized.tsv", sample=samples_with_either),
 71 |     conda:
 72 |         "envs/hamronization.yaml"
 73 |     shell:
 74 |         """
 75 |         hamronize summarize -t tsv -o {output.tsv} {input}
 76 |         hamronize summarize -t json -o {output.json} {input}
 77 |         hamronize summarize -t interactive -o {output.html} {input}
 78 |         """
 79 | 
 80 | rule summarize_sample:
 81 |     output:
 82 |         tsv = "results/{sample}/{sample}_hamronized.tsv",
 83 |         json = "results/{sample}/{sample}_hamronized.json",
 84 |         html = "results/{sample}/{sample}_hamronized.html"
 85 |     input:
 86 |         expand("results/{sample}/abricate/hamronized_report.tsv", sample=samples_with_assembly),
 87 |         expand("results/{sample}/amrfinderplus/hamronized_report.tsv", sample=samples_with_assembly),
 88 |         expand("results/{sample}/amrplusplus/hamronized_report.tsv", sample=samples_with_readpair),
 89 |         expand("results/{sample}/ariba/hamronized_report.tsv", sample=samples_with_readpair),
 90 | #       expand("results/{sample}/csstar/hamronized_report.tsv", sample=samples_with_assembly),
 91 |         expand("results/{sample}/deeparg-fna/hamronized_report.tsv", sample=samples_with_assembly),
 92 |         expand("results/{sample}/deeparg-fqs/hamronized_report.tsv", sample=samples_with_readpair),
 93 |         expand("results/{sample}/groot/hamronized_report.tsv", sample=samples_with_readpair),
 94 |         expand("results/{sample}/kmerresistance/hamronized_report.tsv", sample=samples_with_readpair),
 95 |         expand("results/{sample}/mykrobe/hamronized_report.tsv", sample=samples_with_readpair),
 96 |         expand("results/{sample}/resfams/hamronized_report.tsv", sample=samples_with_assembly),
 97 |         expand("results/{sample}/resfinder-fna/hamronized_report.tsv", sample=samples_with_assembly),
 98 |         expand("results/{sample}/resfinder-fqs/hamronized_report.tsv", sample=samples_with_readpair),
 99 |         expand("results/{sample}/rgi/hamronized_report.tsv", sample=samples_with_assembly),
100 |         expand("results/{sample}/rgibwt/hamronized_report.tsv", sample=samples_with_readpair),
101 |         expand("results/{sample}/srax/hamronized_report.tsv", sample=samples_with_assembly),
102 |         expand("results/{sample}/staramr/hamronized_report.tsv", sample=samples_with_assembly),
103 |         expand("results/{sample}/srst2/hamronized_report.tsv", sample=samples_with_readpair)
104 |     conda:
105 |         "envs/hamronization.yaml"
106 |     shell:
107 |         """
108 |         hamronize summarize -t tsv -o {output.tsv} {input}
109 |         hamronize summarize -t json -o {output.json} {input}
110 |         hamronize summarize -t interactive -o {output.html} {input}
111 |         """
112 | 
113 | include: "rules/abricate.smk"
114 | include: "rules/amrfinderplus.smk"
115 | include: "rules/amrplusplus.smk"
116 | include: "rules/ariba.smk"
117 | #include: "rules/csstar.smk"
118 | include: "rules/deeparg.smk"
119 | include: "rules/groot.smk"
120 | include: "rules/kmerresistance.smk"
121 | include: "rules/mykrobe.smk"
122 | include: "rules/resfams.smk"
123 | include: "rules/resfinder.smk"
124 | include: "rules/rgi.smk"
125 | include: "rules/rgi_bwt.smk"
126 | include: "rules/srax.smk"
127 | include: "rules/srst2.smk"
128 | include: "rules/staramr.smk"
129 | 
130 | 


--------------------------------------------------------------------------------
/config/config.yaml:
--------------------------------------------------------------------------------
 1 | # Configuration file for a hAMRronization workflow run
 2 | 
 3 | # Path or URL to the sample sheet
 4 | samples: "config/isolate_list.tsv"
 5 | 
 6 | params:
 7 |   db_dir: "data/dbs" # directory to install databases in
 8 |   dateformat: "%Y-%b-%d" # date formats for database downloads
 9 |   binary_dir: "bindeps" # directory to install non-conda dependencies
10 |   threads: 1 #number of threads per rule
11 |   abricate:
12 |     name: "ncbi"
13 |     minid: 75 #minimum identity threshold
14 |     mincov: 0 #minimum coverage threshold
15 |   amrfinderplus:
16 |     #empty, no options exposed
17 |   groot:
18 |     db_source: "card" #which preclustered db to use
19 |     read_length: 150 # readlengh to use for indexing the preclustered db
20 |     window: 20 # window size to allow min and max read length (min-len = read_length - window)
21 |   rgi:
22 |     #empty, no options exposed
23 |   srax:
24 |     dbtype: "basic"
25 |   amrplusplus:
26 |     resistome_analyzer_version: c6b097ad054e0620560f3bcd22e2a63b896ab33a
27 |     snpfinder_version: 28a20e981d33f8d22814ed6f0aeba36f101a8037
28 |     rarefactionanalyzer_version: de06630986c10c03b8540fd53925075baca5d38e
29 |   ariba:
30 |     #empty, no options exposed
31 |   staramr:
32 |     #empty, no options exposed
33 |   resfams:
34 |     #empty, no options exposed
35 |   mykrobe:
36 |     #empty, no options exposed
37 |   resfinder:
38 |     # git tags for databases used; each component has its own at https://bitbucket.org/genomicepidemiology
39 |     res_db_version: "resfinder-4.6.0"
40 |     point_db_version: "resfinder-4.6.0"
41 |     disinf_db_version: "resfinder-4.6.0"
42 |   kmerresistance:
43 |     # uses same res_db as resfinder but also KmerFinder database for species detection (TODO: check needed?)
44 |     # due to the the large size and slow FTP for KmerFinder db we use just use a single dummy klebsiella type genome
45 |     res_db_version: "resfinder-4.6.0"
46 |   csstar:
47 |     db_source: "https://raw.githubusercontent.com/tomdeman-bio/Sequence-Search-Tool-for-Antimicrobial-Resistance-SSTAR-/master/Latest_AR_database/ResGANNOT_srst2.fasta"
48 |   srst2:
49 |     gene_db: "ARGannot.fasta"
50 |     db_source: "https://raw.githubusercontent.com/katholt/srst2/master/data/ARGannot.fasta"
51 |     min_depth: 5
52 |     max_divergence: 10
53 |     forward: "_R1"
54 |     reverse: "_R2"
55 | 
56 | 


--------------------------------------------------------------------------------
/config/isolate_list.tsv:
--------------------------------------------------------------------------------
 1 | # This TSV file specifies the list of inputs for a hAMRonization workflow run.
 2 | # Its first line (ex. comment lines and empty lines) must have column headings,
 3 | # and must define at least the columns listed below. Columns may be in any order.
 4 | #
 5 | # Every row must have a (possibly empty) value in every column.  The 'biosample'
 6 | # column must have a unique non-empty value in every row.  If you omit assembly,
 7 | # reads, or species, then tools that require these inputs will fail to run.
 8 | 
 9 | species	biosample	assembly	read1	read2
10 | Salmonella enterica	SAMN13012778	sample_folder/genome.fna	sample_folder/R1.fq.gz	sample_folder/R2.fq.gz
11 | 
12 | 


--------------------------------------------------------------------------------
/data/.gitignore:
--------------------------------------------------------------------------------
1 | /*
2 | !/.gitignore
3 | !/test
4 | 


--------------------------------------------------------------------------------
/data/test/.gitignore:
--------------------------------------------------------------------------------
1 | /*
2 | !/.gitignore
3 | !/get_test_data.sh
4 | 


--------------------------------------------------------------------------------
/data/test/get_test_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # stop on errors
 3 | set -o errexit
 4 | 
 5 | wget -O SAMN02599008.tar.gz  https://osf.io/6ma8p/download
 6 | tar xvf SAMN02599008.tar.gz 
 7 | 
 8 | wget -O SAMEA6634591.tar.gz  https://osf.io/4tqxc/download
 9 | tar xvf SAMEA6634591.tar.gz
10 | 


--------------------------------------------------------------------------------
/docker/.gitignore:
--------------------------------------------------------------------------------
1 | /build-patch.sh
2 | /Dockerfile.patch
3 | /run-patch.sh
4 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Base image 
 2 | FROM docker.io/continuumio/miniconda3
 3 | 
 4 | # metadata
 5 | LABEL base.image="miniconda3"
 6 | LABEL version="1"
 7 | LABEL software="hAMRonization"
 8 | LABEL software.version="1.0.0"
 9 | LABEL description="Workflow for running many AMR tools on a set of reads/contigs"
10 | LABEL website="https://github.com/pha4ge/hamronization"
11 | LABEL documentation="https://github.com/pha4ge/hamronization_workflow"
12 | LABEL license="https://github.com/pha4ge/hAMRonization/blob/master/LICENSE.txt"
13 | LABEL tags="Genomics"
14 | 
15 | # maintainer
16 | MAINTAINER Finlay Maguire <finlaymaguire@gmail.com>
17 | 
18 | # install system requirements
19 | RUN apt-get -qq update --fix-missing && apt-get -qq install apt-utils && dpkg --configure -a && \
20 |     apt-get -qq install --no-install-recommends git build-essential curl wget unzip bzip2 gnupg zlib1g-dev file jq vim \
21 |     && apt-get -qq clean && rm -rf /var/lib/apt/lists/*
22 | 
23 | # stop container's bash from leaving .bash_histories everywhere and add convenience aliases for interactive use
24 | RUN echo "unset HISTFILE" >>/etc/bash.bashrc && \
25 |     echo "alias ls='ls --color=auto' l='ls -CF' la='l -a' ll='l -l' lla='ll -a'" >>/etc/bash.bashrc
26 | 
27 | # system-wide channels probably best off with the usual trio and strict priority for predictability
28 | RUN conda config --system --add channels defaults --add channels bioconda --add channels conda-forge && \
29 |     conda config --system --set channel_priority strict && \
30 |     conda update -n base --all
31 | 
32 | # install and run in root (weird but keeps user mounts simplest)
33 | WORKDIR /
34 | 
35 | # install the setup (note the .dockerignore file!)
36 | COPY envs envs
37 | COPY rules rules
38 | COPY test test
39 | COPY Snakefile .
40 | 
41 | # install snakemake straight in base, so no activate needed from user (shouldn't break conda)
42 | RUN conda env update -n base -f envs/hamronization_workflow.yaml
43 | 
44 | # The following three steps could all be done in a single compound RUN command, but as especially
45 | # steps 2 and 3 take ages, having Docker commits in between them makes debugging easy: comment
46 | # out the subsequent steps and rebuild up to the last successful commit, then enter that image.
47 | 
48 | # make Snakemake install all conda environments (but not the non-conda binary deps or databases)
49 | RUN snakemake --configfile test/test_config.yaml --use-conda --conda-cleanup-pkgs cache --jobs 1 --conda-create-envs-only
50 | 
51 | # make Snakemake install the the non-conda binary deps (but not the databases)
52 | RUN snakemake --configfile test/test_config.yaml --use-conda --conda-cleanup-pkgs cache --jobs 1 bindeps/resistome bindeps/rarefaction bindeps/snpfinder
53 | 
54 | # this maiden run pulls all databases in, thus finalising the self-contained image
55 | RUN snakemake --configfile test/test_config.yaml --use-conda --conda-cleanup-pkgs cache --jobs 1 results/hamronized_report.tsv && \
56 |     rm -rf results
57 | 


--------------------------------------------------------------------------------
/docker/Dockerfile-step-0:
--------------------------------------------------------------------------------
 1 | # Base image 
 2 | FROM docker.io/continuumio/miniconda3
 3 | 
 4 | # metadata
 5 | LABEL base.image="miniconda3"
 6 | LABEL version="1"
 7 | LABEL software="hAMRonization"
 8 | LABEL software.version="1.0.0"
 9 | LABEL description="Workflow for running many AMR tools on a set of reads/contigs"
10 | LABEL website="https://github.com/pha4ge/hamronization"
11 | LABEL documentation="https://github.com/pha4ge/hamronization_workflow"
12 | LABEL license="https://github.com/pha4ge/hAMRonization/blob/master/LICENSE.txt"
13 | LABEL tags="Genomics"
14 | 
15 | # maintainer
16 | MAINTAINER Finlay Maguire <finlaymaguire@gmail.com>
17 | 
18 | # install system requirements
19 | RUN apt-get -qq update --fix-missing && apt-get -qq install apt-utils && dpkg --configure -a && \
20 |     apt-get -qq install --no-install-recommends git build-essential curl wget unzip bzip2 gnupg zlib1g-dev file jq vim \
21 |     && apt-get -qq clean && rm -rf /var/lib/apt/lists/*
22 | 
23 | # stop container's bash from leaving .bash_histories everywhere and add convenience aliases for interactive use
24 | RUN echo "unset HISTFILE" >>/etc/bash.bashrc && \
25 |     echo "alias ls='ls --color=auto' l='ls -CF' la='l -a' ll='l -l' lla='ll -a'" >>/etc/bash.bashrc
26 | 
27 | # system-wide channels probably best off with the usual trio and strict priority for predictability
28 | RUN conda config --system --add channels defaults --add channels bioconda --add channels conda-forge && \
29 |     conda config --system --set channel_priority strict && \
30 |     conda update -n base --all
31 | 
32 | # install and run in root (weird but keeps user mounts simplest)
33 | WORKDIR /
34 | 
35 | # install the setup (note the .dockerignore file!)
36 | COPY envs envs
37 | COPY rules rules
38 | COPY test test
39 | COPY Snakefile .
40 | 
41 | # install snakemake straight in base, so no activate needed from user (shouldn't break conda)
42 | RUN conda env update -n base -f envs/hamronization_workflow.yaml
43 | 
44 | # The following three steps could all be done in a single compound RUN command, but as especially
45 | # steps 2 and 3 take ages, having Docker commits in between them makes debugging easy: comment
46 | # out the subsequent steps and rebuild up to the last successful commit, then enter that image.
47 | 
48 | # make Snakemake install all conda environments (but not the non-conda binary deps or databases)
49 | #RUN snakemake --configfile test/test_config.yaml --use-conda --conda-cleanup-pkgs cache --jobs 1 --conda-create-envs-only
50 | 
51 | # make Snakemake install the the non-conda binary deps (but not the databases)
52 | #RUN snakemake --configfile test/test_config.yaml --use-conda --conda-cleanup-pkgs cache --jobs 1 bindeps/resistome bindeps/rarefaction bindeps/snpfinder
53 | 
54 | # this maiden run pulls all databases in, thus finalising the self-contained image
55 | #RUN snakemake --configfile test/test_config.yaml --use-conda --conda-cleanup-pkgs cache --jobs 1 results/hamronized_report.tsv && \
56 | #    rm -rf results
57 | 


--------------------------------------------------------------------------------
/docker/Dockerfile-step-1:
--------------------------------------------------------------------------------
 1 | # Base image 
 2 | FROM docker.io/continuumio/miniconda3
 3 | 
 4 | # metadata
 5 | LABEL base.image="miniconda3"
 6 | LABEL version="1"
 7 | LABEL software="hAMRonization"
 8 | LABEL software.version="1.0.0"
 9 | LABEL description="Workflow for running many AMR tools on a set of reads/contigs"
10 | LABEL website="https://github.com/pha4ge/hamronization"
11 | LABEL documentation="https://github.com/pha4ge/hamronization_workflow"
12 | LABEL license="https://github.com/pha4ge/hAMRonization/blob/master/LICENSE.txt"
13 | LABEL tags="Genomics"
14 | 
15 | # maintainer
16 | MAINTAINER Finlay Maguire <finlaymaguire@gmail.com>
17 | 
18 | # install system requirements
19 | RUN apt-get -qq update --fix-missing && apt-get -qq install apt-utils && dpkg --configure -a && \
20 |     apt-get -qq install --no-install-recommends git build-essential curl wget unzip bzip2 gnupg zlib1g-dev file jq vim \
21 |     && apt-get -qq clean && rm -rf /var/lib/apt/lists/*
22 | 
23 | # stop container's bash from leaving .bash_histories everywhere and add convenience aliases for interactive use
24 | RUN echo "unset HISTFILE" >>/etc/bash.bashrc && \
25 |     echo "alias ls='ls --color=auto' l='ls -CF' la='l -a' ll='l -l' lla='ll -a'" >>/etc/bash.bashrc
26 | 
27 | # system-wide channels probably best off with the usual trio and strict priority for predictability
28 | RUN conda config --system --add channels defaults --add channels bioconda --add channels conda-forge && \
29 |     conda config --system --set channel_priority strict && \
30 |     conda update -n base --all
31 | 
32 | # install and run in root (weird but keeps user mounts simplest)
33 | WORKDIR /
34 | 
35 | # install the setup (note the .dockerignore file!)
36 | COPY envs envs
37 | COPY rules rules
38 | COPY test test
39 | COPY Snakefile .
40 | 
41 | # install snakemake straight in base, so no activate needed from user (shouldn't break conda)
42 | RUN conda env update -n base -f envs/hamronization_workflow.yaml
43 | 
44 | # The following three steps could all be done in a single compound RUN command, but as especially
45 | # steps 2 and 3 take ages, having Docker commits in between them makes debugging easy: comment
46 | # out the subsequent steps and rebuild up to the last successful commit, then enter that image.
47 | 
48 | # make Snakemake install all conda environments (but not the non-conda binary deps or databases)
49 | RUN snakemake --configfile test/test_config.yaml --use-conda --conda-cleanup-pkgs cache --jobs 1 --conda-create-envs-only
50 | 
51 | # make Snakemake install the the non-conda binary deps (but not the databases)
52 | #RUN snakemake --configfile test/test_config.yaml --use-conda --conda-cleanup-pkgs cache --jobs 1 bindeps/resistome bindeps/rarefaction bindeps/snpfinder
53 | 
54 | # this maiden run pulls all databases in, thus finalising the self-contained image
55 | #RUN snakemake --configfile test/test_config.yaml --use-conda --conda-cleanup-pkgs cache --jobs 1 results/hamronized_report.tsv && \
56 | #    rm -rf results
57 | 


--------------------------------------------------------------------------------
/docker/Dockerfile-step-2:
--------------------------------------------------------------------------------
 1 | # Base image 
 2 | FROM docker.io/continuumio/miniconda3
 3 | 
 4 | # metadata
 5 | LABEL base.image="miniconda3"
 6 | LABEL version="1"
 7 | LABEL software="hAMRonization"
 8 | LABEL software.version="1.0.0"
 9 | LABEL description="Workflow for running many AMR tools on a set of reads/contigs"
10 | LABEL website="https://github.com/pha4ge/hamronization"
11 | LABEL documentation="https://github.com/pha4ge/hamronization_workflow"
12 | LABEL license="https://github.com/pha4ge/hAMRonization/blob/master/LICENSE.txt"
13 | LABEL tags="Genomics"
14 | 
15 | # maintainer
16 | MAINTAINER Finlay Maguire <finlaymaguire@gmail.com>
17 | 
18 | # install system requirements
19 | RUN apt-get -qq update --fix-missing && apt-get -qq install apt-utils && dpkg --configure -a && \
20 |     apt-get -qq install --no-install-recommends git build-essential curl wget unzip bzip2 gnupg zlib1g-dev file jq vim \
21 |     && apt-get -qq clean && rm -rf /var/lib/apt/lists/*
22 | 
23 | # stop container's bash from leaving .bash_histories everywhere and add convenience aliases for interactive use
24 | RUN echo "unset HISTFILE" >>/etc/bash.bashrc && \
25 |     echo "alias ls='ls --color=auto' l='ls -CF' la='l -a' ll='l -l' lla='ll -a'" >>/etc/bash.bashrc
26 | 
27 | # system-wide channels probably best off with the usual trio and strict priority for predictability
28 | RUN conda config --system --add channels defaults --add channels bioconda --add channels conda-forge && \
29 |     conda config --system --set channel_priority strict && \
30 |     conda update -n base --all
31 | 
32 | # install and run in root (weird but keeps user mounts simplest)
33 | WORKDIR /
34 | 
35 | # install the setup (note the .dockerignore file!)
36 | COPY envs envs
37 | COPY rules rules
38 | COPY test test
39 | COPY Snakefile .
40 | 
41 | # install snakemake straight in base, so no activate needed from user (shouldn't break conda)
42 | RUN conda env update -n base -f envs/hamronization_workflow.yaml
43 | 
44 | # The following three steps could all be done in a single compound RUN command, but as especially
45 | # steps 2 and 3 take ages, having Docker commits in between them makes debugging easy: comment
46 | # out the subsequent steps and rebuild up to the last successful commit, then enter that image.
47 | 
48 | # make Snakemake install all conda environments (but not the non-conda binary deps or databases)
49 | RUN snakemake --configfile test/test_config.yaml --use-conda --conda-cleanup-pkgs cache --jobs 1 --conda-create-envs-only
50 | 
51 | # make Snakemake install the the non-conda binary deps (but not the databases)
52 | RUN snakemake --configfile test/test_config.yaml --use-conda --conda-cleanup-pkgs cache --jobs 1 bindeps/resistome bindeps/rarefaction bindeps/snpfinder
53 | 
54 | # this maiden run pulls all databases in, thus finalising the self-contained image
55 | #RUN snakemake --configfile test/test_config.yaml --use-conda --conda-cleanup-pkgs cache --jobs 1 results/hamronized_report.tsv && \
56 | #    rm -rf results
57 | 


--------------------------------------------------------------------------------
/docker/README.md:
--------------------------------------------------------------------------------
 1 | # Docker
 2 | 
 3 | This directory has scripts to (locally) build and test the Docker/Podman container.
 4 | 
 5 | ## Building
 6 | 
 7 | The `build-containers.sh` script successively builds 4 images, using `podman`
 8 | if installed, else `docker`.
 9 | 
10 |  * step 0: base Conda with only Snakemake added (= `envs/hamronization_workflow.yaml`)
11 |  * step 1: step 0 with on top all tools, installed by Snakemake (= `envs/*.yaml`)
12 |  * step 2: step 1 with on top the binary deps (minor step)
13 |  * step 3: step 2 with on top the databases (massive final step)
14 | 
15 | The images all have the same Dockerfile, except that an additional RUN step
16 | is added in each.  This so that if the build fails at some step, we have the
17 | successful image from the prior step for debugging the failing step.
18 | 
19 | > We could have also used Docker's `FROM ...` to do this, but the idea is that
20 | > once the build is stable, we can ditch the steps and just have `Dockerfile`
21 | > (i.e. the final image).
22 | >
23 | > OTOH, as long as the Dockerfiles are identical up to the penultimate step,
24 | > they share all their image layers, so none of the earlier steps consume any
25 | > disc space or build time.
26 | 
27 | ## Running
28 | 
29 | The `run-*.sh` scripts are convenience wrappers for `podman|docker run ...`,
30 | with the necessary mounts set up.
31 | 
32 | ## Testing
33 | 
34 | The `test-final.sh` script runs the final container against the test data
35 | in `../test`, writing results and logs to a temporary directory in `/tmp`.
36 | 


--------------------------------------------------------------------------------
/docker/build-containers.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | export LC_ALL="C"
 4 | set -euo pipefail
 5 | cd "$(dirname "$0")"
 6 | 
 7 | # Prefer podman over docker for rootlessness
 8 | podman --version >/dev/null 2>&1 && CMD=podman || CMD=docker
 9 | 
10 | # Note the .. at the end, we build the directory above us (which has a .dockerignore)
11 | $CMD build -f Dockerfile-step-0 -t localhost/hamronization_workflow-step-0 .. &&
12 | $CMD build -f Dockerfile-step-1 -t localhost/hamronization_workflow-step-1 .. &&
13 | $CMD build -f Dockerfile-step-2 -t localhost/hamronization_workflow-step-2 .. &&
14 | $CMD build -f Dockerfile -t localhost/hamronization_workflow ..
15 | 


--------------------------------------------------------------------------------
/docker/run-final.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export LC_ALL="C"
4 | set -euo pipefail
5 | cd "$(dirname "$0")"
6 | 
7 | # would use -u "$(id -u):$(id -g)" but the container can't be readonly and files inside are root-owned
8 | exec docker run -it --rm --tmpfs /.cache --tmpfs /run --tmpfs /tmp -v "$PWD/inputs:/inputs:ro" -v "$PWD/results:/results" 'localhost/hamronization_workflow' "${@:-bash}"
9 | 


--------------------------------------------------------------------------------
/docker/run-step-0.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export LC_ALL="C"
4 | set -euo pipefail
5 | cd "$(dirname "$0")"
6 | 
7 | podman --version >/dev/null 2>&1 && CMD=podman || CMD=docker
8 | exec $CMD run -it --rm --tmpfs /.cache --tmpfs /run --tmpfs /tmp 'localhost/hamronization_workflow-step-0' "${@:-bash}"
9 | 


--------------------------------------------------------------------------------
/docker/run-step-1.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export LC_ALL="C"
4 | set -euo pipefail
5 | cd "$(dirname "$0")"
6 | 
7 | podman --version >/dev/null 2>&1 && CMD=podman || CMD=docker
8 | exec $CMD run -it --rm --tmpfs /.cache --tmpfs /run --tmpfs /tmp -v "$PWD/../data:/data:ro" -v "$PWD/inputs:/inputs:ro" -v "$PWD/results:/results" 'localhost/hamronization_workflow-step-1' "${@:-bash}"
9 | 


--------------------------------------------------------------------------------
/docker/run-step-2.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export LC_ALL="C"
4 | set -euo pipefail
5 | cd "$(dirname "$0")"
6 | 
7 | podman --version >/dev/null 2>&1 && CMD=podman || CMD=docker
8 | exec $CMD run -it --rm --tmpfs /.cache --tmpfs /run --tmpfs /tmp -v "$PWD/../data:/data:ro" -v "$PWD/inputs:/inputs:ro" -v "$PWD/results:/results" 'localhost/hamronization_workflow-step-2' "${@:-bash}"
9 | 


--------------------------------------------------------------------------------
/docker/test-final.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Run the final container with the ../test/test_config.yaml, write results and logs to /tmp
 4 | 
 5 | export LC_ALL="C"
 6 | set -euo pipefail
 7 | 
 8 | # Full path to the 'test' directory that has config and isolates
 9 | TEST_DIR="$(realpath "$(dirname "$0")/../test")"
10 | 
11 | # Use a system mktemp directory for the outputs
12 | TMP_DIR="$(mktemp -d)"
13 | mkdir -p "$TMP_DIR/logs" "$TMP_DIR/results"
14 | 
15 | # Would rather use -u "$(id -u):$(id -g)" to run the container as the invoking user, but it can't be readonly
16 | # (snakemake chokes on that), and the files inside are root-owned, so our output will be root-owned unless
17 | # the Docker daemon was set up with user namespaces.  Time to move on to Podman or Singularity.
18 | 
19 | docker run -it --rm --tmpfs /.cache --tmpfs /run --tmpfs /tmp \
20 |      -v "$TEST_DIR:/test:ro" -v "$TMP_DIR/results:/results" -v "$TMP_DIR/logs:/logs" \
21 |      'localhost/hamronization_workflow' \
22 |      snakemake --configfile 'test/test_config.yaml' --use-conda --cores $(nproc) || true
23 | 
24 | printf '
25 | --------------------------------
26 | Container test outputs are here:
27 |  - logs: %s/logs
28 |  - results: %s/results
29 | --------------------------------
30 | ' "$TMP_DIR" "$TMP_DIR"
31 | 


--------------------------------------------------------------------------------
/envs/abricate.yaml:
--------------------------------------------------------------------------------
1 | name: abricate
2 | channels:
3 |   - conda-forge
4 |   - bioconda
5 |   - defaults
6 | dependencies:
7 |   - abricate=1.0.1
8 | 


--------------------------------------------------------------------------------
/envs/amrfinderplus.yaml:
--------------------------------------------------------------------------------
1 | name: amrfinderplus
2 | channels:
3 |   - conda-forge
4 |   - bioconda
5 |   - defaults
6 | dependencies:
7 |   - ncbi-amrfinderplus=4.0.3
8 | 


--------------------------------------------------------------------------------
/envs/amrplusplus.yaml:
--------------------------------------------------------------------------------
 1 | name: amrplusplus
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 | dependencies:
 7 |   - trimmomatic=0.39
 8 |   - samtools=1.11
 9 |   - bwa=0.7.17
10 | 


--------------------------------------------------------------------------------
/envs/ariba.yaml:
--------------------------------------------------------------------------------
1 | name: ariba
2 | channels:
3 |   - conda-forge
4 |   - bioconda
5 |   - defaults
6 | dependencies:
7 |   - ariba=2.14.6
8 | 


--------------------------------------------------------------------------------
/envs/csstar.yaml:
--------------------------------------------------------------------------------
 1 | name: csstar
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 | dependencies:
 7 |   - biopython
 8 |   - nomkl
 9 |   - python<3.0
10 |   - blast
11 | 


--------------------------------------------------------------------------------
/envs/deeparg.yaml:
--------------------------------------------------------------------------------
 1 | name: deeparg
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 | dependencies:
 7 |   - deeparg=1.0.4
 8 |   - trimmomatic
 9 |   - vsearch
10 |   - bedtools
11 |   - bowtie2
12 |   - samtools
13 | 


--------------------------------------------------------------------------------
/envs/groot.yaml:
--------------------------------------------------------------------------------
1 | name: groot
2 | channels:
3 |   - conda-forge
4 |   - bioconda
5 |   - defaults
6 | dependencies:
7 |   - seqkit
8 |   - groot=1.1.2
9 | 


--------------------------------------------------------------------------------
/envs/hamronization.yaml:
--------------------------------------------------------------------------------
1 | name: hamronization
2 | channels:
3 |   - defaults
4 | dependencies:
5 |   - python>=3.9
6 |   - pip
7 |   - pip:
8 |      - git+https://github.com/zwets/hamronization
9 | 


--------------------------------------------------------------------------------
/envs/hamronization_workflow.yaml:
--------------------------------------------------------------------------------
1 | name: hamronization_workflow
2 | channels:
3 |   - conda-forge
4 |   - bioconda
5 |   - defaults
6 | dependencies:
7 |   - snakemake=8.26.0
8 | 


--------------------------------------------------------------------------------
/envs/kmerresistance.yaml:
--------------------------------------------------------------------------------
 1 | name: kmerresistance
 2 | channels:
 3 |   - fmaguire
 4 |   - conda-forge
 5 |   - bioconda
 6 |   - defaults
 7 | dependencies:
 8 |   - kma=1.3.13
 9 |   - kmerresistance=2.2.0
10 | 


--------------------------------------------------------------------------------
/envs/mykrobe.yaml:
--------------------------------------------------------------------------------
1 | name: mykrobe
2 | channels:
3 |   - conda-forge
4 |   - bioconda
5 |   - defaults
6 | dependencies:
7 |   - mykrobe=0.13.0
8 | 


--------------------------------------------------------------------------------
/envs/resfams.yaml:
--------------------------------------------------------------------------------
1 | name: resfams
2 | channels:
3 |   - conda-forge
4 |   - bioconda
5 |   - defaults
6 | dependencies:
7 |   - hmmer=3.3.2
8 |   - prodigal=2.6.3
9 | 


--------------------------------------------------------------------------------
/envs/resfinder.yaml:
--------------------------------------------------------------------------------
1 | name: resfinder
2 | channels:
3 |   - conda-forge
4 |   - bioconda
5 |   - defaults
6 | dependencies:
7 |   - resfinder=4.6.0
8 | 


--------------------------------------------------------------------------------
/envs/rgi.yaml:
--------------------------------------------------------------------------------
1 | name: rgi
2 | channels:
3 |   - conda-forge
4 |   - bioconda
5 |   - defaults
6 | dependencies:
7 |   - rgi=6.0.3
8 | 


--------------------------------------------------------------------------------
/envs/srax.yaml:
--------------------------------------------------------------------------------
1 | name: srax
2 | channels:
3 |   - conda-forge
4 |   - bioconda
5 |   - defaults
6 | dependencies:
7 |   - srax=1.5
8 | 


--------------------------------------------------------------------------------
/envs/srst2.yaml:
--------------------------------------------------------------------------------
1 | name: srst2
2 | channels:
3 |   - conda-forge
4 |   - bioconda
5 |   - defaults
6 | dependencies:
7 |   - srst2=0.2.0
8 |   - samtools=0.1.18
9 | 


--------------------------------------------------------------------------------
/envs/staramr.yaml:
--------------------------------------------------------------------------------
 1 | name: staramr
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 | dependencies:
 7 |   - staramr=0.11.0
 8 | #  - pandas==1.2.5
 9 | #  - perl-list-moreutils
10 | 


--------------------------------------------------------------------------------
/rules/abricate.smk:
--------------------------------------------------------------------------------
 1 | rule run_abricate:
 2 |     input:
 3 |         contigs = get_assembly
 4 |     output:
 5 |         report = "results/{sample}/abricate/report.tsv",
 6 |         metadata = "results/{sample}/abricate/metadata.txt"
 7 |     message: "Running rule run_abricate on {wildcards.sample} with contigs"
 8 |     log:
 9 |        "logs/abricate_{sample}.log"
10 |     conda:
11 |       "../envs/abricate.yaml"
12 |     threads:
13 |        config["params"]["threads"]
14 |     params:
15 |         dbname = config["params"]["abricate"]["name"], #"ncbi",
16 |         minid = config["params"]["abricate"]["minid"],
17 |         mincov = config["params"]["abricate"]["minid"]
18 |     shell:
19 |         """
20 |         abricate --threads {threads} --nopath --db {params.dbname} --minid {params.minid} --mincov {params.mincov} {input.contigs} > {output.report} 2> {log}
21 |         abricate --version | perl -p -e 's/abricate (.+)/--analysis_software_version $1/' > {output.metadata}
22 |         abricate --list | grep {params.dbname} | perl -p -e 's/.+?\t.+?\t.+?\t(.+)/--reference_database_version $1/' >> {output.metadata}
23 |         """
24 | 
25 | rule hamronize_abricate:
26 |     input:
27 |         report = "results/{sample}/abricate/report.tsv",
28 |         metadata = "results/{sample}/abricate/metadata.txt",
29 |     output:
30 |         "results/{sample}/abricate/hamronized_report.tsv"
31 |     conda:
32 |         "../envs/hamronization.yaml"
33 |     shell:
34 |         """
35 |         hamronize abricate $(paste - - < {input.metadata}) {input.report} > {output}
36 |         """
37 | 


--------------------------------------------------------------------------------
/rules/amrfinderplus.smk:
--------------------------------------------------------------------------------
 1 | rule get_amrfinder_db:
 2 |     output:
 3 |         directory(os.path.join(config['params']['db_dir'], "amrfinderplus", "latest"))
 4 |     conda:
 5 |         "../envs/amrfinderplus.yaml"
 6 |     params:
 7 |         db_dir = os.path.join(config['params']['db_dir'], "amrfinderplus")
 8 |     log:
 9 |         "logs/amrfinderplus_db.log"
10 |     shell:
11 |         """
12 |         amrfinder_update -d '{params.db_dir}' 2> {log}
13 |         # Fix the 'latest' symlink to be relative, so it works from containers too
14 |         ln -srfT "$(realpath '{params.db_dir}/latest')" '{params.db_dir}/latest'
15 |         """
16 | 
17 | rule run_amrfinderplus:
18 |     input:
19 |         contigs = get_assembly,
20 |         db_dir = os.path.join(config['params']['db_dir'], "amrfinderplus", "latest")
21 |     output:
22 |         report = "results/{sample}/amrfinderplus/report.tsv",
23 |         metadata = "results/{sample}/amrfinderplus/metadata.txt"
24 |     message: "Running rule run_amrfinderplus on {wildcards.sample} with contigs"
25 |     log:
26 |         "logs/amrfinderplus_{sample}.log"
27 |     conda:
28 |         "../envs/amrfinderplus.yaml"
29 |     params:
30 |         species = branch(get_species, then=lambda w: get_species(w).replace(' ','_'))
31 |     threads:
32 |         config["params"]["threads"]
33 |     shell:
34 |         """
35 |         [ -n '{params.species}' ] && amrfinder --list_organisms -d {input.db_dir} 2>/dev/null | fgrep -q '{params.species}' && SPECIES_OPT='-O {params.species}' || SPECIES_OPT=''
36 |         amrfinder -n '{input.contigs}' $SPECIES_OPT -o '{output.report}' -d '{input.db_dir}' >{log} 2>&1
37 |         sed -En 's/^Software version: (.*)$/--analysis_software_version \\1/p;s/^Database version: (.*)$/--reference_database_version \\1/p' {log} | sort -u >{output.metadata}
38 |         """
39 | 
40 | rule hamronize_amrfinderplus:
41 |     input:
42 |         contigs = get_assembly,
43 |         report = "results/{sample}/amrfinderplus/report.tsv",
44 |         metadata = "results/{sample}/amrfinderplus/metadata.txt"
45 |     output:
46 |         "results/{sample}/amrfinderplus/hamronized_report.tsv"
47 |     log:
48 |         "logs/amrfinderplus_{sample}_hamronize.log"
49 |     conda:
50 |         "../envs/hamronization.yaml"
51 |     shell:
52 |         """
53 |         hamronize amrfinderplus --input_file_name {input.contigs} $(cat {input.metadata}) {input.report} > {output} 2>{log}
54 |         """
55 | 


--------------------------------------------------------------------------------
/rules/amrplusplus.smk:
--------------------------------------------------------------------------------
  1 | rule get_amrplusplus_db:
  2 |     output:
  3 |         megares_db = os.path.join(config["params"]["db_dir"], "megares", "megares_full_database_v2.00.fasta"),
  4 |         megares_annot = os.path.join(config["params"]["db_dir"], "megares", "megares_full_annotations_v2.00.csv")
  5 |     params:
  6 |         db_dir = os.path.join(config["params"]["db_dir"], "megares")
  7 |     conda:
  8 |       "../envs/amrplusplus.yaml"
  9 |     shell:
 10 |         """
 11 |         mkdir -p {params.db_dir}
 12 |         wget -O {output.megares_db} https://www.meglab.org/downloads/megares_v2.00/megares_full_database_v2.00.fasta
 13 |         wget -O {output.megares_annot} https://www.meglab.org/downloads/megares_v2.00/megares_full_annotations_v2.00.csv
 14 |         cd {params.db_dir}
 15 |         bwa index megares_full_database_v2.00.fasta
 16 |         """
 17 | 
 18 | rule get_amrplusplus_binaries:
 19 |     output:
 20 |         resistome_tool = os.path.join(config["params"]["binary_dir"], 'resistome'),
 21 |         rarefaction_tool = os.path.join(config["params"]["binary_dir"], 'rarefaction'),
 22 |         snp_tool = os.path.join(config["params"]["binary_dir"], 'snpfinder')
 23 |     params:
 24 |         bin_dir = config['params']['binary_dir'],
 25 |         snpfinder_version = config['params']['amrplusplus']["snpfinder_version"],
 26 |         resistome_analyzer_version = config['params']['amrplusplus']["resistome_analyzer_version"],
 27 |         rarefaction_analyzer_version = config['params']['amrplusplus']["rarefactionanalyzer_version"]
 28 |     shell:
 29 |         """
 30 |         rm -rf {output.resistome_tool} {output.rarefaction_tool} {output.snp_tool}
 31 |         TMP_DIR="$(mktemp -d)"
 32 |         git clone https://github.com/cdeanj/snpfinder $TMP_DIR/snpfinder
 33 |         git -C $TMP_DIR/snpfinder checkout {params.snpfinder_version}
 34 |         make -C $TMP_DIR/snpfinder
 35 |         mv $TMP_DIR/snpfinder/snpfinder {output.snp_tool}
 36 |         git clone https://github.com/cdeanj/rarefactionanalyzer $TMP_DIR/rarefaction
 37 |         git -C $TMP_DIR/rarefaction checkout {params.rarefaction_analyzer_version}
 38 |         make -C $TMP_DIR/rarefaction
 39 |         mv $TMP_DIR/rarefaction/rarefaction {output.rarefaction_tool}
 40 |         git clone https://github.com/cdeanj/resistomeanalyzer $TMP_DIR/resistome
 41 |         git -C $TMP_DIR/resistome checkout {params.resistome_analyzer_version}
 42 |         make -C $TMP_DIR/resistome
 43 |         mv $TMP_DIR/resistome/resistome {output.resistome_tool}
 44 |         rm -rf "$TMP_DIR"
 45 |         """
 46 | 
 47 | rule run_amrplusplus:
 48 |     input:
 49 |         read1 = get_read1,
 50 |         read2 = get_read2,
 51 |         megares_db = os.path.join(config["params"]["db_dir"], "megares", "megares_full_database_v2.00.fasta"),
 52 |         megares_annot = os.path.join(config["params"]["db_dir"], "megares", "megares_full_annotations_v2.00.csv"),
 53 |         resistome_tool = os.path.join(config["params"]["binary_dir"], 'resistome'),
 54 |         rarefaction_tool = os.path.join(config["params"]["binary_dir"], 'rarefaction'),
 55 |         snp_tool = os.path.join(config["params"]["binary_dir"], 'snpfinder')
 56 |     output:
 57 |         amr_class = "results/{sample}/amrplusplus/class.tsv",
 58 |         amr_gene  = "results/{sample}/amrplusplus/gene.tsv",
 59 |         amr_snps  = "results/{sample}/amrplusplus/snp.tsv",
 60 |         amr_group = "results/{sample}/amrplusplus/group.tsv",
 61 |         amr_mech  = "results/{sample}/amrplusplus/mech.tsv",
 62 |         metadata = "results/{sample}/amrplusplus/metadata.txt"
 63 |     log:
 64 |        "logs/amrplusplus_{sample}.log"
 65 |     message:
 66 |         "Running rule run_amrplusplus on {wildcards.sample} with reads"
 67 |     conda:
 68 |       "../envs/amrplusplus.yaml"
 69 |     threads:
 70 |        config["params"]["threads"]
 71 |     params:
 72 |         output_prefix_tmp = "results/{sample}/amrplusplus/tmp",
 73 |         resistome_analyzer_version = config['params']['amrplusplus']["resistome_analyzer_version"]
 74 |     shell:
 75 |        """
 76 |        mkdir -p {params.output_prefix_tmp}
 77 |        trimmomatic PE {input.read1} {input.read2} {params.output_prefix_tmp}/{wildcards.sample}_r1_pe_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r1_se_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r2_pe_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r2_se_trimmed.fq SLIDINGWINDOW:4:15 LEADING:3 TRAILING:3 MINLEN:36 >{log} 2>&1
 78 |        bwa mem {input.megares_db} {params.output_prefix_tmp}/{wildcards.sample}_r1_pe_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r2_pe_trimmed.fq 2>> {log} | samtools sort -n -O sam > {params.output_prefix_tmp}/{wildcards.sample}.sam 2>>{log}
 79 |        {input.resistome_tool} -ref_fp {input.megares_db} -annot_fp {input.megares_annot} -sam_fp {params.output_prefix_tmp}/{wildcards.sample}.sam -gene_fp {output.amr_gene} -group_fp {output.amr_group} -class_fp {output.amr_class} -mech_fp {output.amr_mech} -t 80  >>{log} 2>&1
 80 |        {input.rarefaction_tool} -ref_fp {input.megares_db} -annot_fp {input.megares_annot} -sam_fp {params.output_prefix_tmp}/{wildcards.sample}.sam -gene_fp {output.amr_gene}_rare -group_fp {output.amr_group}_rare -class_fp {output.amr_class}_rare -mech_fp {output.amr_mech}_rare -min 5 -max 100 -skip 5 -samples 1 -t 80 >>{log} 2>&1
 81 |        {input.snp_tool} -amr_fp {input.megares_db} -sampe {params.output_prefix_tmp}/{wildcards.sample}.sam -out_fp {output.amr_snps} >>{log} 2>&1
 82 |        #rm -rf {params.output_prefix_tmp}
 83 | 
 84 |        echo "--analysis_software_version {params.resistome_analyzer_version}" > {output.metadata}
 85 |        echo "--reference_database_version v2.00" >> {output.metadata}
 86 |        """
 87 | 
 88 | rule hamronize_amrplusplus:
 89 |     input:
 90 |         read1 = get_read1,
 91 |         amr_gene  = "results/{sample}/amrplusplus/gene.tsv",
 92 |         metadata = "results/{sample}/amrplusplus/metadata.txt"
 93 |     output:
 94 |         "results/{sample}/amrplusplus/hamronized_report.tsv"
 95 |     conda:
 96 |         "../envs/hamronization.yaml"
 97 |     shell:
 98 |         """
 99 |         hamronize amrplusplus $(paste - - < {input.metadata}) --input_file_name {input.read1} {input.amr_gene} > {output}
100 |         """
101 | 


--------------------------------------------------------------------------------
/rules/ariba.smk:
--------------------------------------------------------------------------------
 1 | rule get_ariba_db:
 2 |     output:
 3 |       db = directory(os.path.join(config["params"]["db_dir"], "ariba_card.prepareref")),
 4 |       dbversion = os.path.join(config["params"]["db_dir"], "ariba_card.version.txt")
 5 |     conda:
 6 |       "../envs/ariba.yaml"
 7 |     log:
 8 |        "logs/ariba_db.log"
 9 |     params:
10 |         db_dir = config["params"]["db_dir"],
11 |         dateformat = config["params"]["dateformat"]
12 |     shell:
13 |         """
14 |         ariba getref card {params.db_dir}/ariba_card > {log}
15 |         ariba prepareref -f {params.db_dir}/ariba_card.fa -m {params.db_dir}/ariba_card.tsv {output.db} >> {log}
16 |         date +"{params.dateformat}" > {output.dbversion}
17 |         """
18 | 
19 | rule run_ariba:
20 |     input:
21 |         read1 = get_read1,
22 |         read2 = get_read2,
23 |         ref_db = os.path.join(config["params"]["db_dir"], "ariba_card.prepareref"),
24 |         dbversion = os.path.join(config["params"]["db_dir"], "ariba_card.version.txt")
25 |     output:
26 |         report = "results/{sample}/ariba/report.tsv",
27 |         metadata = "results/{sample}/ariba/metadata.txt"
28 |     message: "Running rule run_ariba on {wildcards.sample} with reads"
29 |     log:
30 |        "logs/ariba_{sample}.log"
31 |     conda:
32 |       "../envs/ariba.yaml"
33 |     threads: 1
34 |     params:
35 |         output_folder = "results/{sample}/ariba/",
36 |         tmp_dir = "results/{sample}/ariba_tmp"
37 |     shell:
38 |        """
39 |        mkdir -p {params.tmp_dir}
40 |        ariba run --noclean --force --tmp_dir {params.tmp_dir} --threads {threads} {input.ref_db} {input.read1} {input.read2} {params.output_folder} > {log} 2>&1
41 |        rm -rf {params.tmp_dir}
42 |        ariba version | grep "ARIBA version" | perl -p -e 's/ARIBA version: (.+)/--analysis_software_version $1/' > {output.metadata}
43 |        cat {input.dbversion} | perl -p -e 's/(.+)/--reference_database_version $1/' >> {output.metadata}
44 |        """
45 | 
46 | rule hamronize_ariba:
47 |     input:
48 |         read1 = get_read1,
49 |         report = "results/{sample}/ariba/report.tsv",
50 |         metadata = "results/{sample}/ariba/metadata.txt"
51 |     output:
52 |         "results/{sample}/ariba/hamronized_report.tsv"
53 |     conda:
54 |         "../envs/hamronization.yaml"
55 |     shell:
56 |         """
57 |         hamronize ariba --input_file_name {input.read1} --reference_database_name CARD $(paste - - < {input.metadata}) {input.report} > {output}
58 |         """
59 | 


--------------------------------------------------------------------------------
/rules/csstar.smk:
--------------------------------------------------------------------------------
 1 | rule get_csstar_script:
 2 |     output:
 3 |         csstar = os.path.join(config['params']['binary_dir'], "c-SSTAR", "c-SSTAR")
 4 |     params:
 5 |         bin_dir = config['params']['binary_dir']
 6 |     shell:
 7 |         """
 8 |         mkdir {params.bin_dir}
 9 |         cd {params.bin_dir}
10 |         git clone https://github.com/chrisgulvik/c-SSTAR
11 |         """
12 | 
13 | rule get_csstar_database:
14 |     output:
15 |         dbfile = os.path.join(config['params']['db_dir'], "ResGANNOT_srst2.fasta"),
16 |         dbversion = os.path.join(config["params"]["db_dir"], "ResGANNOT_srst2_version.txt")
17 |     params:
18 |         db_source = config["params"]["csstar"]["db_source"],
19 |         dateformat = config["params"]["dateformat"]
20 |     shell:
21 |         """
22 |         wget -O {output.dbfile} {params.db_source}
23 |         date +"{params.dateformat}" > {output.dbversion}
24 |         """
25 | 
26 | rule run_csstar:
27 |     input:
28 |         contigs = get_assembly,
29 |         csstar = os.path.join(config['params']['binary_dir'], "c-SSTAR", "c-SSTAR"),
30 |         resgannot_db = os.path.join(config['params']['db_dir'], "ResGANNOT_srst2.fasta"),
31 |         dbversion = os.path.join(config["params"]["db_dir"], "ResGANNOT_srst2_version.txt")
32 |     output:
33 |         report = "results/{sample}/csstar/report.tsv",
34 |         metadata = "results/{sample}/csstar/metadata.txt"
35 |     message: "Running rule run_csstar on {wildcards.sample} with contigs"
36 |     log:
37 |        "logs/csstar_{sample}.log"
38 |     conda:
39 |       "../envs/csstar.yaml"
40 |     threads:
41 |        config["params"]["threads"]
42 |     params:
43 |         outdir = 'results/{sample}/csstar',
44 |         logfile = "results/{sample}/csstar/c-SSTAR_*.log"
45 |     shell:
46 |        """
47 |        {input.csstar} -g {input.contigs} -d {input.resgannot_db} --outdir {params.outdir} > {output.report} 2>{log}
48 |        grep "c-SSTAR version" {params.logfile} | perl -p -e 's/.+c-SSTAR version: (.+)/--analysis_software_version $1/' > {output.metadata}
49 |        cat {input.dbversion} | perl -p -e 's/(.+)/--reference_database_version $1/' >> {output.metadata}
50 |        """
51 | 
52 | rule hamronize_csstar:
53 |     input:
54 |         contigs = get_assembly,
55 |         report = "results/{sample}/csstar/report.tsv",
56 |         metadata = "results/{sample}/csstar/metadata.txt"
57 |     output:
58 |         "results/{sample}/csstar/hamronized_report.tsv"
59 |     conda:
60 |         "../envs/hamronization.yaml"
61 |     shell:
62 |         """
63 |         hamronize csstar --input_file_name {input.contigs} --reference_database_name ResGANNOT $(paste - - < {input.metadata}) {input.report} > {output}
64 |         """
65 | 
66 | 


--------------------------------------------------------------------------------
/rules/deeparg.smk:
--------------------------------------------------------------------------------
 1 | rule get_deeparg_db:
 2 |     output:
 3 |         db_dir = directory(os.path.join(config['params']['db_dir'], 'deeparg'))
 4 |     log:
 5 |         "logs/deeparg_db.log"
 6 |     params:
 7 |         db_zip = os.path.join(config['params']['db_dir'], 'deeparg.zip')
 8 |     shell:
 9 |         """
10 |         # deeparg download_data should do this but thinks it is gzip and fails;
11 |         # we use wget -c so an incomplete download will resume (it is 1.8G)
12 |         wget -cO '{params.db_zip}' 'https://zenodo.org/records/8280582/files/deeparg.zip?download=1'
13 |         unzip -d "$(dirname '{output.db_dir}')" '{params.db_zip}'
14 |         rm -f '{params.db_zip}'
15 |         """
16 | 
17 | rule run_deeparg_fna:
18 |     input:
19 |         contigs = get_assembly,
20 |         db_dir = os.path.join(config['params']['db_dir'], 'deeparg')
21 |     output:
22 |         report = "results/{sample}/deeparg-fna/output.mapping.ARG",
23 |         report_potential = "results/{sample}/deeparg-fna/output.mapping.potential.ARG",
24 |         metadata = "results/{sample}/deeparg-fna/metadata.txt"
25 |     message: "Running deeparg on {wildcards.sample} with contigs"
26 |     log:
27 |         "logs/deeparg-fna_{sample}.log"
28 |     conda:
29 |         "../envs/deeparg.yaml"
30 |     params:
31 |         out_dir = "results/{sample}/deeparg-fna",
32 |         version = "1.0.4"
33 |     shell:
34 |         """
35 |         mkdir -p '{params.out_dir}'
36 |         # Note: default --arg-alignment-identity is 50, maybe increase to 90?
37 |         deeparg predict --model LS --type nucl -i '{input.contigs}' -d '{input.db_dir}' -o '{params.out_dir}/output' >{log} 2>&1
38 |         echo "--input_file_name {input.contigs} --analysis_software_version {params.version} --reference_database_version {params.version}" >{output.metadata}
39 |         """
40 | 
41 | rule run_deeparg_fqs:
42 |     input:
43 |         read1 = get_read1, read2 = get_read2,
44 |         db_dir = os.path.join(config['params']['db_dir'], 'deeparg')
45 |     output:
46 |         report = "results/{sample}/deeparg-fqs/output.mapping.ARG",
47 |         report_potential = "results/{sample}/deeparg-fqs/output.mapping.potential.ARG",
48 |         metadata = "results/{sample}/deeparg-fqs/metadata.txt"
49 |     message: "Running deeparg on {wildcards.sample} with reads"
50 |     log:
51 |         "logs/deeparg-fqs_{sample}.log"
52 |     conda:
53 |         "../envs/deeparg.yaml"
54 |     params:
55 |         out_dir = "results/{sample}/deeparg-fqs",
56 |         version = "1.0.4"
57 |     shell:
58 |         """
59 |         mkdir -p '{params.out_dir}/tmp'
60 |         # Create symlinks to the reads in the output/tmp directory, because deeparg leaves behind huge
61 |         # temporary files both in the (possibly read-only) input directory and in the output directory.
62 |         ln -srft '{params.out_dir}/tmp' '{input.read1}' '{input.read2}'
63 |         deeparg short_reads_pipeline -d '{input.db_dir}' \
64 |           --forward_pe_file "{params.out_dir}/tmp/$(basename '{input.read1}')" \
65 |           --reverse_pe_file "{params.out_dir}/tmp/$(basename '{input.read2}')" \
66 |           --output_file '{params.out_dir}/tmp/output' >{log} 2>&1
67 |         # Move the final outputs out of the tmp directory and rename to what they should be
68 |         mv -f '{params.out_dir}/tmp/output.clean.deeparg.mapping.ARG' '{output.report}'
69 |         mv -f '{params.out_dir}/tmp/output.clean.deeparg.mapping.potential.ARG' '{output.report_potential}'
70 |         rm -rf '{params.out_dir}/tmp'
71 |         # Write the metadata file for hamronizer
72 |         echo "--input_file_name {input.read1} --analysis_software_version {params.version} --reference_database_version {params.version}" >{output.metadata}
73 |         """
74 | 
75 | rule hamronize_deeparg:
76 |     input:
77 |         report = "results/{sample}/deeparg-{sfx}/output.mapping.ARG",
78 |         metadata = "results/{sample}/deeparg-{sfx}/metadata.txt"
79 |     output:
80 |         "results/{sample}/deeparg-{sfx}/hamronized_report.tsv"
81 |     log:
82 |         "logs/deeparg-{sfx}_{sample}_hamronize.log"
83 |     conda:
84 |         "../envs/hamronization.yaml"
85 |     shell:
86 |         "hamronize deeparg $(cat '{input.metadata}') '{input.report}' >{output}"
87 | 


--------------------------------------------------------------------------------
/rules/groot.smk:
--------------------------------------------------------------------------------
 1 | rule get_groot_db:
 2 |     output:
 3 |        db = directory(os.path.join(config["params"]["db_dir"], "groot_index"))
 4 |     conda:
 5 |       "../envs/groot.yaml"
 6 |     params:
 7 |         db_source = config['params']['groot']['db_source'],
 8 |         read_length = config['params']['groot']['read_length'],
 9 |         db_dir = config["params"]["db_dir"]
10 |     log:
11 |        "logs/groot_db.log"
12 |     threads:
13 |        config["params"]["threads"]
14 |     shell:
15 |         """
16 |         rm -rf {params.db_dir}/groot_clustered {params.db_dir}/groot_index
17 |         # the mv and tmp alternatives work around 'groot get' in container insisting
18 |         # that it cannot unpack in the output directory (while it did unpack fine in /tmp)
19 |         groot get -d {params.db_source} -o {params.db_dir}/groot_clustered >{log} 2>&1 || true
20 |         test -d {params.db_dir}/groot_clustered || mv /tmp/{params.db_source}.90 {params.db_dir}/groot_clustered/ || tar -C {params.db_dir}/groot_clustered -xf tmp.tar && rm -f tmp.tar
21 |         groot index -p {threads} -m {params.db_dir}/groot_clustered/{params.db_source}.90 -i {output.db} -w {params.read_length} --log {log}
22 |         # fix permissions on the directories created by groot (why 0700?), sure to fail in shared envs and non-root containers
23 |         chmod 0755 {params.db_dir}/groot_clustered {params.db_dir}/groot_index
24 |         """
25 | 
26 | rule run_groot:
27 |     input:
28 |         read1 = get_read1,
29 |         read2 = get_read2,
30 |         db_index = os.path.join(config["params"]["db_dir"], "groot_index")
31 |     output:
32 |         report = "results/{sample}/groot/report.tsv",
33 |         metadata = "results/{sample}/groot/metadata.txt"
34 |     message: "Running rule run_groot on {wildcards.sample} with reads"
35 |     log:
36 |        "logs/groot_{sample}.log"
37 |     conda:
38 |       "../envs/groot.yaml"
39 |     threads:
40 |        config["params"]["threads"]
41 |     params:
42 |         min_read_length = config['params']['groot']['read_length'] - config['params']['groot']['window'],
43 |         max_read_length = config['params']['groot']['read_length'] + config['params']['groot']['window'],
44 |         graph_dir = "results/{sample}/groot/graphs"
45 |     shell:
46 |        """
47 |        zcat {input.read1} {input.read2} | seqkit seq --min-len {params.min_read_length} --max-len {params.max_read_length} | groot align -g {params.graph_dir} -p {threads} -i {input.db_index} --log {log} | groot report --log {log} > {output.report}
48 |        groot version | perl -p -e 's/(.+)/--analysis_software_version $1/' > {output.metadata}
49 |        """
50 | 
51 | rule hamronize_groot:
52 |     input:
53 |         read1 = get_read1,
54 |         report = "results/{sample}/groot/report.tsv",
55 |         metadata = "results/{sample}/groot/metadata.txt"
56 |     output:
57 |         "results/{sample}/groot/hamronized_report.tsv"
58 |     conda:
59 |         "../envs/hamronization.yaml"
60 |     params:
61 |         db_source = config['params']['groot']['db_source'],
62 |         db_dir = config["params"]["db_dir"]
63 |     shell:
64 |         """
65 |         hamronize groot --input_file_name {input.read1} $(paste - < {input.metadata}) --reference_database_name {params.db_source} --reference_database_version $(paste - < {params.db_dir}/groot_clustered/card.90/timestamp.txt) {input.report} > {output}
66 |         """
67 | 


--------------------------------------------------------------------------------
/rules/kmerresistance.smk:
--------------------------------------------------------------------------------
 1 | rule get_kmerresistance_db:
 2 |     output:
 3 |         # We do not mark species_db as a directory output because Snakemake would drop it on failure and it is a 20G download
 4 |         resfinder_db = directory(os.path.join(config['params']['db_dir'], 'kmerresistance', 'resfinder_db'))
 5 |     params:
 6 |         db_base = os.path.join(config['params']['db_dir'], 'kmerresistance'),
 7 |         res_db_version = config['params']['kmerresistance']['res_db_version'],
 8 |         species_db = os.path.join(config['params']['db_dir'], 'kmerresistance', 'kmerfinder_db')
 9 |     log:
10 |         "logs/kmerresistance_db.log"
11 |     conda:
12 |         "../envs/kmerresistance.yaml"
13 |     shell:
14 |         """
15 |         mkdir -p {params.db_base}
16 |         # Species database is downloaded like this but is 20G and downloads
17 |         # from the DTU FTP very slowly, so not going to support this feature
18 |         # for now and just use a single type klebsiella genome for now
19 |         #git clone --depth=1 https://bitbucket.org/genomicepidemiology/kmerfinder_db.git {params.species_db}
20 |         #{params.species_db}/INSTALL.sh {params.species_db} bacteria latest
21 |         mkdir -p {params.species_db}
22 |         test -f '{params.species_db}/bacteria.name' ||
23 |            wget -O- https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/240/185/GCF_000240185.1_ASM24018v2/GCF_000240185.1_ASM24018v2_genomic.fna.gz |
24 |            gunzip -c - | tee '{params.species_db}/bacteria.fsa' | kma_index -Sparse ATG -i -- -o '{params.species_db}/bacteria'
25 |         # Resistance database same as for resfinder
26 |         git clone --depth=1 -b {params.res_db_version} https://bitbucket.org/genomicepidemiology/resfinder_db.git {output.resfinder_db}
27 |         grep -Ev '^[[:space:]]*(#|$)' {output.resfinder_db}/config | cut -f1 | xargs -I@ cat {output.resfinder_db}/@.fsa | kma_index -i -- -o {output.resfinder_db}/kma_resfinder
28 |         """
29 | 
30 | rule run_kmerresistance:
31 |     input:
32 |         read1 = get_read1,
33 |         read2 = get_read2,
34 |         resfinder_db = os.path.join(config['params']['db_dir'], 'kmerresistance', 'resfinder_db')
35 |     output:
36 |         report = "results/{sample}/kmerresistance/results.res",
37 |         metadata = "results/{sample}/kmerresistance/metadata.txt"
38 |     message: "Running rule run_kmerresistance on {wildcards.sample} with reads"
39 |     log:
40 |         "logs/kmerresistance_{sample}.log"
41 |     conda:
42 |         "../envs/kmerresistance.yaml"
43 |     threads:
44 |         config['params']['threads']
45 |     params:
46 |         output_folder = "results/{sample}/kmerresistance",
47 |         kma_resfinder_db = os.path.join(config['params']['db_dir'], 'kmerresistance', 'resfinder_db', 'kma_resfinder'),
48 |         species_db = os.path.join(config['params']['db_dir'], 'kmerresistance', 'kmerfinder_db', 'bacteria'),
49 |         db_version = config['params']['kmerresistance']['res_db_version']
50 |     shell:
51 |         """
52 |         zcat {input.read1} {input.read2} > {params.output_folder}/temp_all_reads.fq
53 |         kmerresistance -i {params.output_folder}/temp_all_reads.fq -t_db {params.kma_resfinder_db} -s_db {params.species_db} -o {params.output_folder}/results > {log} 2>&1
54 |         rm {params.output_folder}/temp_all_reads.fq
55 |         kmerresistance -v 2>&1 | perl -p -e 's/KmerResistance-(.+)/--analysis_software_version $1/' > {output.metadata}
56 |         echo "{params.db_version}" | perl -p -e 's/(.+)/--reference_database_version $1/' >> {output.metadata}
57 |         """
58 | 
59 | rule hamronize_kmerresistance:
60 |     input:
61 |         read1 = get_read1,
62 |         report = "results/{sample}/kmerresistance/results.res",
63 |         metadata = "results/{sample}/kmerresistance/metadata.txt"
64 |     output:
65 |         "results/{sample}/kmerresistance/hamronized_report.tsv"
66 |     conda:
67 |         "../envs/hamronization.yaml"
68 |     shell:
69 |         """
70 |         hamronize kmerresistance --input_file_name {input.read1} $(paste - - < {input.metadata}) {input.report} > {output}
71 |         """
72 | 
73 | 


--------------------------------------------------------------------------------
/rules/mykrobe.smk:
--------------------------------------------------------------------------------
 1 | rule run_mykrobe:
 2 |     input:
 3 |         read1 = get_read1,
 4 |         read2 = get_read2
 5 |     output:
 6 |         report = "results/{sample}/mykrobe/report.json"
 7 |     message: "Running rule run_mykrobe on {wildcards.sample} with reads"
 8 |     log:
 9 |         "logs/mykrobe_{sample}.log"
10 |     conda:
11 |         "../envs/mykrobe.yaml"
12 |     threads:
13 |         config["params"]["threads"]
14 |     params:
15 |         species = get_species,
16 |         skel_dir = "results/{sample}/mykrobe/skels",
17 |         tmp_dir = "results/{sample}/mykrobe/tmp"
18 |     shell:
19 |         """
20 |         mkdir -p $(dirname {output.report})
21 |         echo '{{}}' >{output.report}  # create empty JSON report by default to flag that Mykrobe found nothing
22 |         if [ -z '{params.species}' ]; then
23 |           echo "Not running Mykrobe: it requires the species of the organism"
24 |         else
25 |           # map species to mykrobe-supported species code (see output of: mykrobe panels describe)
26 |           declare -rA species_map=(
27 |             'Mycobacterium tuberculosis' tb
28 |             'Staphylococcus aureus'      staph
29 |             'Shigella sonnei'            sonnei
30 |           ) # ignore Mykrobe's typhi and paratyphiB because we don't have that detail
31 |           myk_species=${{species_map[{params.species}]:-}}
32 |           if [ -z "$myk_species" ]; then
33 |             echo "Not running Mykrobe: it doesn't support {params.species}"
34 |           else
35 |             rm -f {output.report}  # remove as it is now up to mykrobe to write it or else fail
36 |             mykrobe predict -s {wildcards.sample} -S $myk_species -1 {input.read1} {input.read2} --skeleton_dir {params.skel_dir} --threads {threads} --format json --output {output.report} --tmp {params.tmp_dir}/
37 |             rm -rf {params.skel_dir} {params.tmp_dir}
38 |           fi
39 |         fi >{log} 2>&1
40 |         """
41 | 
42 | rule hamronize_mykrobe:
43 |     input:
44 |         report = "results/{sample}/mykrobe/report.json",
45 |     output:
46 |         "results/{sample}/mykrobe/hamronized_report.tsv"
47 |     log:
48 |         "logs/mykrobe_{sample}_hamronize.log"
49 |     conda:
50 |         "../envs/hamronization.yaml"
51 |     shell:
52 |         "hamronize mykrobe {input.report} >{output} 2>{log}"
53 | 


--------------------------------------------------------------------------------
/rules/resfams.smk:
--------------------------------------------------------------------------------
 1 | rule get_resfams_db:
 2 |     output:
 3 |        resfams_hmms = os.path.join(config["params"]["db_dir"], "resfams-full.hmm"),
 4 |        dbversion = os.path.join(config["params"]["db_dir"], "resfams.version.txt")
 5 |     params:
 6 |        dateformat = config["params"]["dateformat"]
 7 |     shell:
 8 |        """
 9 |        wget -O- http://dantaslab.wustl.edu/resfams/Resfams-full.hmm.gz | gunzip -c > {output.resfams_hmms}
10 |        date +"{params.dateformat}" > {output.dbversion}
11 |        """
12 | 
13 | rule run_resfams:
14 |     input:
15 |         contigs = get_assembly,
16 |         resfams_hmms = os.path.join(config["params"]["db_dir"], "resfams-full.hmm"),
17 |         dbversion = os.path.join(config["params"]["db_dir"], "resfams.version.txt")
18 |     output:
19 |         report = "results/{sample}/resfams/resfams.tblout",
20 |         metadata = "results/{sample}/resfams/metadata.txt"
21 |     message: "Running rule run_resfams on {wildcards.sample} with contigs"
22 |     log:
23 |        "logs/resfams_{sample}.log"
24 |     conda:
25 |       "../envs/resfams.yaml"
26 |     threads:
27 |        config["params"]["threads"]
28 |     params:
29 |         output_prefix = "results/{sample}/resfams"
30 |     shell:
31 |        """
32 |        prodigal -p meta -i {input.contigs} -a {params.output_prefix}/protein_seqs.faa > {log} 2>&1
33 |        hmmsearch --cpu {threads} --tblout {output.report} {input.resfams_hmms} {params.output_prefix}/protein_seqs.faa  >>{log} 2>&1
34 |        hmmsearch -h | grep "# HMMER " | perl -p -e 's/# HMMER (.+) \\(.+/--analysis_software_version hmmsearch_v$1/' >> {output.metadata}
35 |        cat {input.dbversion} | perl -p -e 's/(.+)/--reference_database_version $1/' >> {output.metadata}
36 |        """
37 | 
38 | rule hamronize_resfams:
39 |     input:
40 |         contigs = get_assembly,
41 |         report = "results/{sample}/resfams/resfams.tblout",
42 |         metadata = "results/{sample}/resfams/metadata.txt"
43 |     output:
44 |         "results/{sample}/resfams/hamronized_report.tsv"
45 |     conda:
46 |         "../envs/hamronization.yaml"
47 |     shell:
48 |         """
49 |         hamronize resfams --input_file_name {input.contigs} $(paste - - < {input.metadata}) {input.report} > {output}
50 |         """
51 | 


--------------------------------------------------------------------------------
/rules/resfinder.smk:
--------------------------------------------------------------------------------
 1 | rule get_resfinder_db:
 2 |     output:
 3 |         res_db = directory(os.path.join(config['params']['db_dir'], "resfinder_db")),
 4 |         point_db = directory(os.path.join(config['params']['db_dir'], "pointfinder_db")),
 5 |         disinf_db = directory(os.path.join(config['params']['db_dir'], "disinfinder_db"))
 6 |     log:
 7 |         "logs/resfinder_db.log"
 8 |     conda:
 9 |         "../envs/resfinder.yaml"
10 |     params:
11 |         res_ver = config['params']['resfinder']['res_db_version'],
12 |         point_ver = config['params']['resfinder']['point_db_version'],
13 |         disinf_ver = config['params']['resfinder']['disinf_db_version']
14 |     shell:
15 |         """
16 |         {{ set -euo pipefail
17 |         git clone --depth=1 -b {params.res_ver} https://bitbucket.org/genomicepidemiology/resfinder_db.git {output.res_db}
18 |         git clone --depth=1 -b {params.point_ver} https://bitbucket.org/genomicepidemiology/pointfinder_db.git {output.point_db}
19 |         git clone --depth=1 -b {params.disinf_ver} https://bitbucket.org/genomicepidemiology/disinfinder_db.git {output.disinf_db}
20 |         grep -Ev '^[[:space:]]*(#|$)' {output.res_db}/config    | cut -f1 | xargs -I@ kma_index -i {output.res_db}/@.fsa -o {output.res_db}/@
21 |         grep -Ev '^[[:space:]]*(#|$)' {output.point_db}/config  | cut -f1 | xargs -I@ sh -c 'kma_index -i {output.point_db}/@/*.fsa -o {output.point_db}/@/@'
22 |         grep -Ev '^[[:space:]]*(#|$)' {output.disinf_db}/config | cut -f1 | xargs -I@ kma_index -i {output.disinf_db}/@.fsa -o {output.disinf_db}/@
23 |         }} >{log} 2>&1
24 |         """
25 | 
26 | rule run_resfinder_fna:
27 |     input:
28 |         assembly = get_assembly,
29 |         res_db = os.path.join(config['params']['db_dir'], "resfinder_db"),
30 |         point_db = os.path.join(config['params']['db_dir'], "pointfinder_db"),
31 |         disinf_db = os.path.join(config['params']['db_dir'], "disinfinder_db")
32 |     output:
33 |         dir = directory("results/{sample}/resfinder-fna"),
34 |         report = "results/{sample}/resfinder-fna/data_resfinder.json"
35 |     message: "Running rule run_resfinder_fna on {wildcards.sample} assembly"
36 |     log:
37 |         "logs/resfinder-fna_{sample}.log"
38 |     conda:
39 |         "../envs/resfinder.yaml"
40 |     threads:
41 |         config['params']['threads']
42 |     params:
43 |         species = branch(get_species, then=get_species, otherwise="Unknown"),
44 |     shell:
45 |         """
46 |         mkdir -p {output.dir}
47 |         run_resfinder.py --acquired --point --disinfectant --species '{params.species}' --ignore_missing_species \
48 |             -db_res '{input.res_db}' -db_point '{input.point_db}' -db_disinf '{input.disinf_db}' \
49 |             -ifa '{input.assembly}' -j {output.report} -o {output.dir} >{log} 2>&1
50 |         """
51 | 
52 | rule run_resfinder_fqs:
53 |     input:
54 |         read1 = get_read1, read2 = get_read2,
55 |         res_db = os.path.join(config['params']['db_dir'], "resfinder_db"),
56 |         point_db = os.path.join(config['params']['db_dir'], "pointfinder_db"),
57 |         disinf_db = os.path.join(config['params']['db_dir'], "disinfinder_db")
58 |     output:
59 |         dir = directory("results/{sample}/resfinder-fqs"),
60 |         report = "results/{sample}/resfinder-fqs/data_resfinder.json"
61 |     message: "Running rule run_resfinder_fqs on {wildcards.sample} reads"
62 |     log:
63 |         "logs/resfinder-fqs_{sample}.log"
64 |     conda:
65 |         "../envs/resfinder.yaml"
66 |     threads:
67 |         config['params']['threads']
68 |     params:
69 |         species = branch(get_species, then=get_species, otherwise="Unknown"),
70 |     shell:
71 |         """
72 |         mkdir -p {output.dir}
73 |         run_resfinder.py --acquired --point --disinfectant --species '{params.species}' --ignore_missing_species \
74 |             -db_res '{input.res_db}' -db_point '{input.point_db}' -db_disinf '{input.disinf_db}' \
75 |             -ifq '{input.read1}' '{input.read2}' -j {output.report} -o {output.dir} >{log} 2>&1
76 |         """
77 | 
78 | rule hamronize_resfinder:
79 |     input:
80 |         report = "results/{sample}/resfinder-{sfx}/data_resfinder.json",
81 |     output:
82 |         "results/{sample}/resfinder-{sfx}/hamronized_report.tsv"
83 |     log:
84 |         "logs/resfinder-{sfx}_{sample}_hamronize.log"
85 |     conda:
86 |         "../envs/hamronization.yaml"
87 |     shell:
88 |         "hamronize resfinder {input.report} >{output} 2>{log}"
89 | 


--------------------------------------------------------------------------------
/rules/rgi.smk:
--------------------------------------------------------------------------------
 1 | rule get_rgi_db:
 2 |     output:
 3 |         card_db = os.path.join(config["params"]["db_dir"], "card", "card.json")
 4 |     params:
 5 |         db_dir = os.path.join(config["params"]["db_dir"], "card")
 6 |     log:
 7 |         "logs/rgi_db.log"
 8 |     shell:
 9 |         """{{
10 |         mkdir -p {params.db_dir}
11 |         wget -c -q -O {params.db_dir}/card.tar.bz2 'https://card.mcmaster.ca/latest/data'
12 |         tar -C {params.db_dir} -xf {params.db_dir}/card.tar.bz2
13 |         rm -f {params.db_dir}/card.tar.bz2
14 |         }} >{log} 2>&1
15 |         """
16 | 
17 | rule run_rgi:
18 |     input:
19 |         contigs = get_assembly,
20 |         card_db = os.path.join(config["params"]["db_dir"], "card", "card.json")
21 |     output:
22 |         report = "results/{sample}/rgi/rgi.txt",
23 |         metadata = "results/{sample}/rgi/metadata.txt"
24 |     message: "Running rule run_rgi on {wildcards.sample} with contigs"
25 |     log:
26 |         "logs/rgi_{sample}.log"
27 |     conda:
28 |         "../envs/rgi.yaml"
29 |     threads:
30 |         config["params"]["threads"]
31 |     params:
32 |         out_dir = "results/{sample}/rgi"
33 |     shell:
34 |         """{{
35 |         # Inconveniently we need to cd to the output directory because 'rgi load' writes
36 |         # its database where it runs, and we don't want two jobs writing in one location.
37 |         # Before we change directory we need to make all file paths absolute.
38 |         FNA="$(realpath '{input.contigs}')"
39 |         CARD="$(realpath '{input.card_db}')"
40 |         META="$(realpath '{output.metadata}')"
41 |         mkdir -p {params.out_dir}
42 |         cd {params.out_dir}
43 |         rgi load -i "$CARD" --local
44 |         rgi main --local --clean --input_sequence "$FNA" --output_file rgi --num_threads {threads}
45 |         # We extract the database version from the JSON, as 'rgi database -v' gives "N/A"
46 |         echo "--analysis_software_version $(rgi main --version) --reference_database_version $(jq -r '._version' "$CARD")" >"$META"
47 |         }} >{log} 2>&1
48 |         """
49 | 
50 | rule hamronize_rgi:
51 |     input:
52 |         contigs = get_assembly,
53 |         report = "results/{sample}/rgi/rgi.txt",
54 |         metadata = "results/{sample}/rgi/metadata.txt"
55 |     output:
56 |         "results/{sample}/rgi/hamronized_report.tsv"
57 |     conda:
58 |         "../envs/hamronization.yaml"
59 |     shell:
60 |         """
61 |         hamronize rgi $(cat {input.metadata}) --input_file_name {input.contigs} {input.report} > {output}
62 |         """
63 | 


--------------------------------------------------------------------------------
/rules/rgi_bwt.smk:
--------------------------------------------------------------------------------
 1 | rule get_rgi_bwt_db:
 2 |     output:
 3 |         card_db_bwt = os.path.join(config["params"]["db_dir"], "card_bwt", "card.json")
 4 |     params:
 5 |         db_dir = os.path.join(config["params"]["db_dir"], "card_bwt")
 6 |     log:
 7 |         "logs/rgi_bwt_db.log"
 8 |     shell:
 9 |         """{{
10 |         mkdir -p {params.db_dir}
11 |         wget -c -q -O {params.db_dir}/card.tar.bz2 'https://card.mcmaster.ca/latest/data'
12 |         tar -C {params.db_dir} -xf {params.db_dir}/card.tar.bz2
13 |         rm -f {params.db_dir}/card.tar.bz2
14 |         }} >{log} 2>&1
15 |         """
16 | 
17 | rule run_rgi_bwt:
18 |     input:
19 |         read1 = get_read1,
20 |         read2 = get_read2,
21 |         card_db = os.path.join(config["params"]["db_dir"], "card_bwt", "card.json")
22 |     output:
23 |         report = "results/{sample}/rgibwt/rgibwt.gene_mapping_data.txt",
24 |         metadata = "results/{sample}/rgibwt/metadata.txt"
25 |     message: "Running rule run_rgi_bwt on {wildcards.sample} with reads"
26 |     log:
27 |         "logs/rgi_bwt_{sample}.log"
28 |     conda:
29 |         "../envs/rgi.yaml"
30 |     threads:
31 |         config["params"]["threads"]
32 |     params:
33 |         out_dir = "results/{sample}/rgibwt"
34 |     shell:
35 |         """{{
36 |         # We need to change directory to the output directory because we can't
37 |         # control where rgi writes its annotations or "loads" its database;
38 |         # and so before this we need to make all paths we use relative to PWD
39 |         FQ1="$(realpath '{input.read1}')"
40 |         FQ2="$(realpath '{input.read2}')"
41 |         CARD="$(realpath '{input.card_db}')"
42 |         META="$(realpath '{output.metadata}')"
43 |         mkdir -p {params.out_dir}
44 |         cd {params.out_dir}
45 |         
46 |         # Figure out the database version as 'rgi database -v' gives "NA"
47 |         DB_VER="$(jq -r '._version' "$CARD")"
48 | 
49 |         # Create the annotation files (will be written in PWD)
50 |         rgi card_annotation --input "$CARD"
51 |         F1="card_database_v${{DB_VER}}.fasta"
52 |         F2="card_database_v${{DB_VER}}_all.fasta"
53 | 
54 |         # Now "load" (= create) the database locally and run the tool
55 |         rgi load --local -i "$CARD" --card_annotation "$F1" --card_annotation_all_models "$F2"
56 |         rm -f "$F1" "$F2"
57 |         rgi bwt --local --clean --read_one "$FQ1" --read_two "$FQ2" --output_file "rgibwt" --threads {threads}
58 | 
59 |         echo "--analysis_software_version $(rgi main --version) --reference_database_version $DB_VER" >"$META"
60 |         }} >{log} 2>&1
61 |         """
62 | 
63 | rule hamronize_rgi_bwt:
64 |     input:
65 |         read1 = get_read1,
66 |         report = "results/{sample}/rgibwt/rgibwt.gene_mapping_data.txt",
67 |         metadata = "results/{sample}/rgibwt/metadata.txt"
68 |     output:
69 |         "results/{sample}/rgibwt/hamronized_report.tsv"
70 |     conda:
71 |         "../envs/hamronization.yaml"
72 |     shell:
73 |         """
74 |         hamronize rgi $(cat {input.metadata}) --input_file_name {input.read1} {input.report} > {output}
75 |         """
76 | 


--------------------------------------------------------------------------------
/rules/srax.smk:
--------------------------------------------------------------------------------
 1 | rule run_srax:
 2 |     input:
 3 |         contigs = get_assembly
 4 |     output:
 5 |         report = "results/{sample}/srax/sraX_detected_ARGs.tsv",
 6 |         metadata = "results/{sample}/srax/metadata.txt"
 7 |     message: "Running rule run_srax on {wildcards.sample} with contigs"
 8 |     log:
 9 |         "logs/srax_{sample}.log"
10 |     conda:
11 |         "../envs/srax.yaml"
12 |     threads:
13 |         config["params"]["threads"]
14 |     params:
15 |         dbtype = config["params"]["srax"]["dbtype"],
16 |         dateformat = config["params"]["dateformat"]
17 |     shell:
18 |         """{{
19 |         mkdir -p $(dirname {output.report})
20 |         # copy input to a temp directory because sraX processes every fasta file in its input directory
21 |         TMPDIR=$(mktemp -d)
22 |         cp {input.contigs} $TMPDIR/
23 |         sraX -i $TMPDIR -t 4 -db {params.dbtype} -o $TMPDIR/output
24 |         mv $TMPDIR/output/Results/Summary_files/sraX_detected_ARGs.tsv {output.report}
25 |         rm -rf $TMPDIR
26 |         }} >{log} 2>&1
27 |         printf -- '--analysis_software_version %s --reference_database_version %s --reference_database_name srax_{params.dbtype}_amr_db' \
28 |                      $(sraX --version | fgrep version | cut -d: -f2)   $(date '+{params.dateformat}')  >{output.metadata}
29 |        """
30 | 
31 | rule hamronize srax:
32 |     input:
33 |         contigs = get_assembly,
34 |         report = "results/{sample}/srax/sraX_detected_ARGs.tsv",
35 |         metadata = "results/{sample}/srax/metadata.txt"
36 |     output:
37 |         "results/{sample}/srax/hamronized_report.tsv"
38 |     conda:
39 |         "../envs/hamronization.yaml"
40 |     shell:
41 |         """
42 |         hamronize srax --input_file_name {input.contigs} $(cat {input.metadata}) {input.report} > {output}
43 |         """
44 | 


--------------------------------------------------------------------------------
/rules/srst2.smk:
--------------------------------------------------------------------------------
 1 | rule get_srst2_db:
 2 |     output:
 3 |         db_file = os.path.join(config["params"]["db_dir"], config["params"]["srst2"]["gene_db"]),
 4 |         dbversion = os.path.join(config["params"]["db_dir"], config["params"]["srst2"]["gene_db"] + '-version.txt')
 5 |     log:
 6 |         "logs/srst2_db.log"
 7 |     params:
 8 |         db_source = config["params"]["srst2"]["db_source"],
 9 |         dateformat = config["params"]["dateformat"]
10 |     shell:
11 |         """
12 |         curl {params.db_source} --output {output.db_file}
13 |         date +"{params.dateformat}" > {output.dbversion}
14 |         """
15 | 
16 | rule run_srst2:
17 |     input:
18 |         read1 = get_read1,
19 |         read2 = get_read2,
20 |         db_file = os.path.join(config["params"]["db_dir"], config["params"]["srst2"]["gene_db"]),
21 |         dbversion = os.path.join(config["params"]["db_dir"], config["params"]["srst2"]["gene_db"] + '-version.txt')
22 |     output:
23 |         report = "results/{sample}/srst2/srst2__fullgenes__ARGannot__results.txt",
24 |         metadata = "results/{sample}/srst2/metadata.txt"
25 |     message: "Running rule run_srst2 on {wildcards.sample} with reads"
26 |     log:
27 |        "logs/srst2_{sample}.log"
28 |     conda:
29 |       "../envs/srst2.yaml"
30 |     threads:
31 |        config["params"]["threads"]
32 |     params:
33 |         gene_db = os.path.join(config["params"]["db_dir"], config["params"]["srst2"]["gene_db"]),
34 |         min_depth = config["params"]["srst2"]["min_depth"],
35 |         max_divergence = config["params"]["srst2"]["max_divergence"],
36 |         for_suffix = config["params"]["srst2"]["forward"],
37 |         rev_suffix = config["params"]["srst2"]["reverse"],
38 |         output_prefix = "results/{sample}/srst2/srst2",
39 |     shell:
40 |        """
41 |        srst2 --threads {threads} --gene_db {params.gene_db} --forward {params.for_suffix} --reverse {params.rev_suffix} --input_pe {input.read1} {input.read2} --min_depth {params.min_depth} --output {params.output_prefix} > {log} 2>&1
42 |        srst2 --version 2>&1 | perl -p -e 's/srst2 (.+)/--analysis_software_version $1/' > {output.metadata}
43 |        cat {input.dbversion} | perl -p -e 's/(.+)/--reference_database_version $1/' >> {output.metadata}
44 |        """
45 | 
46 | rule hamronize_srst2:
47 |     input:
48 |         read1 = get_read1,
49 |         report = "results/{sample}/srst2/srst2__fullgenes__ARGannot__results.txt",
50 |         metadata = "results/{sample}/srst2/metadata.txt"
51 |     output:
52 |         "results/{sample}/srst2/hamronized_report.tsv"
53 |     conda:
54 |         "../envs/hamronization.yaml"
55 |     shell:
56 |         """
57 |         hamronize srst2 --input_file_name {input.read1} $(paste - - - < {input.metadata}) {input.report} > {output}
58 |         """
59 | 


--------------------------------------------------------------------------------
/rules/staramr.smk:
--------------------------------------------------------------------------------
 1 | rule run_staramr:
 2 |     input:
 3 |         contigs = get_assembly
 4 |     output:
 5 |         report = "results/{sample}/staramr/resfinder.tsv",
 6 |         metadata = "results/{sample}/staramr/metadata.txt"
 7 |     message: "Running rule run_staramr on {wildcards.sample} with contigs"
 8 |     log:
 9 |        "logs/staramr_{sample}.log"
10 |     conda:
11 |       "../envs/staramr.yaml"
12 |     threads:
13 |        config["params"]["threads"]
14 |     params:
15 |         output_folder = "results/{sample}/staramr/",
16 |         settings = "results/{sample}/staramr/settings.txt"
17 |     shell:
18 |        """
19 |        rm -r {params.output_folder};
20 |        staramr search -o {params.output_folder} --nproc {threads} {input.contigs} >{log} 2>&1
21 |        staramr --version | perl -p -e 's/staramr (.+)/--analysis_software_version $1/' > {output.metadata}
22 |        grep "resfinder_db_commit" {params.settings} | perl -p -e 's/.+= (.+)/--reference_database_version $1/' >> {output.metadata}
23 |        """
24 |        # only supports salmonella/campylobacter
25 | 
26 | rule hamronize_staramr:
27 |     input:
28 |         report = "results/{sample}/staramr/resfinder.tsv",
29 |         metadata = "results/{sample}/staramr/metadata.txt"
30 |     output:
31 |         "results/{sample}/staramr/hamronized_report.tsv"
32 |     conda:
33 |         "../envs/hamronization.yaml"
34 |     shell:
35 |         """
36 |         hamronize staramr $(paste - - < {input.metadata}) {input.report} > {output}
37 |         """
38 | 
39 | 


--------------------------------------------------------------------------------
/run_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Reproducible bash
 4 | export LC_ALL="C"
 5 | set -euo pipefail
 6 | 
 7 | # Execute in directory of this script
 8 | cd "$(dirname "$0")"
 9 | 
10 | # Change this if you prefer a different Conda environment name
11 | ENV_NAME=hamronization_workflow
12 | 
13 | # Activate the Conda environment
14 | CONDA_BASE=$(conda info --base)
15 | source $CONDA_BASE/etc/profile.d/conda.sh
16 | conda activate $ENV_NAME
17 | 
18 | # And run the pipeline
19 | exec snakemake --configfile test/test_config.yaml --use-conda --jobs 1
20 | 


--------------------------------------------------------------------------------
/test/get_large_test_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # stop on errors
 3 | set -o errexit
 4 | 
 5 | wget -O SAMN02599008.tar.gz  https://osf.io/6ma8p/download
 6 | tar xvf SAMN02599008.tar.gz 
 7 | 
 8 | wget -O SAMEA6634591.tar.gz  https://osf.io/4tqxc/download
 9 | tar xvf SAMEA6634591.tar.gz
10 | 


--------------------------------------------------------------------------------
/test/simple/test_R1.fq.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pha4ge/hAMRonization_workflow/dcb2d1a3965075095e6bb62e8bed3e8d9e9c17c2/test/simple/test_R1.fq.gz


--------------------------------------------------------------------------------
/test/simple/test_R2.fq.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pha4ge/hAMRonization_workflow/dcb2d1a3965075095e6bb62e8bed3e8d9e9c17c2/test/simple/test_R2.fq.gz


--------------------------------------------------------------------------------
/test/simple/test_contig.fna:
--------------------------------------------------------------------------------
 1 | >ndm-1
 2 | ATGGAATTGCCCAATATTATGCACCCGGTCGCGAAGCTGAGCACCGCATTAGCCGCTGCATTGATGCTGAGCGGGTGCATGCCCGGTGAA
 3 | ATCCGCCCGACGATTGGCCAGCAAATGGAAACTGGCGACCAACGGTTTGGCGATCTGGTTTTCCGCCAGCTCGCACCGAATGTCTGGCAG
 4 | CACACTTCCTATCTCGACATGCCGGGTTTCGGGGCAGTCGCTTCCAACGGTTTGATCGTCAGGGATGGCGGCCGCGTGCTGGTGGTCGAT
 5 | ACCGCCTGGACCGATGACCAGACCGCCCAGATCCTCAACTGGATCAAGCAGGAGATCAACCTGCCGGTCGCGCTGGCGGTGGTGACTCAC
 6 | GCGCATCAGGACAAGATGGGCGGTATGGACGCGCTGCATGCGGCGGGGATTGCGACTTATGCCAATGCGTTGTCGAACCAGCTTGCCCCG
 7 | CAAGAGGGGATGGTTGCGGCGCAACACAGCCTGACTTTCGCCGCCAATGGCTGGGTCGAACCAGCAACCGCGCCCAACTTTGGCCCGCTC
 8 | AAGGTATTTTACCCCGGCCCCGGCCACACCAGTGACAATATCACCGTTGGGATCGACGGCACCGACATCGCTTTTGGTGGCTGCCTGATC
 9 | AAGGACAGCAAGGCCAAGTCGCTCGGCAATCTCGGTGATGCCGACACTGAGCACTACGCCGCGTCAGCGCGCGCGTTTGGTGCGGCGTTC
10 | CCCAAGGCCAGCATGATCGTGATGAGCCATTCCGCCCCCGATAGCCGCGCCGCAATCACTCATACGGCCCGCATGGCCGACAAGCTGCGC
11 | TGA
12 | 


--------------------------------------------------------------------------------
/test/simple/test_contig.fna.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pha4ge/hAMRonization_workflow/dcb2d1a3965075095e6bb62e8bed3e8d9e9c17c2/test/simple/test_contig.fna.log


--------------------------------------------------------------------------------
/test/test_config.yaml:
--------------------------------------------------------------------------------
 1 | # Path or URL to the sample sheet
 2 | samples: "test/test_data.tsv"
 3 | 
 4 | params:
 5 |   db_dir: "data/dbs" # directory to install databases in
 6 |   dateformat: "%Y-%b-%d" # date formats for database downloads
 7 |   binary_dir: "bindeps" # directory to install non-conda dependencies
 8 |   threads: 1 #number of threads per rule
 9 |   abricate:
10 |     name: "ncbi"
11 |     minid: 75 #minimum identity threshold
12 |     mincov: 0 #minimum coverage threshold
13 |   amrfinderplus:
14 |     #empty, no options exposed
15 |   groot:
16 |     db_source: "card" #which preclustered db to use
17 |     read_length: 250 # readlengh to use for indexing the preclustered db
18 |     window: 20 # window size to allow min and max read length (min-len = read_length - window)
19 |   rgi:
20 |     #empty, no options exposed
21 |   srax:
22 |     dbtype: "basic"
23 |   amrplusplus:
24 |     resistome_analyzer_version: c6b097ad054e0620560f3bcd22e2a63b896ab33a
25 |     snpfinder_version: 28a20e981d33f8d22814ed6f0aeba36f101a8037
26 |     rarefactionanalyzer_version: de06630986c10c03b8540fd53925075baca5d38e
27 |   ariba:
28 |     #empty, no options exposed
29 |   staramr:
30 |     #empty, no options exposed
31 |   resfams:
32 |     #empty, no options exposed
33 |   mykrobe:
34 |     #empty, no options exposed
35 |   resfinder:
36 |     # git tags for databases used; each component has its own at https://bitbucket.org/genomicepidemiology
37 |     res_db_version: "resfinder-4.6.0"
38 |     point_db_version: "resfinder-4.6.0"
39 |     disinf_db_version: "resfinder-4.6.0"
40 |   kmerresistance:
41 |     # uses same res_db as resfinder but also KmerFinder database for species detection (TODO: check needed?)
42 |     # due to the the large size and slow FTP for KmerFinder db we use just use a single dummy klebsiella type genome
43 |     res_db_version: "resfinder-4.6.0"
44 |   csstar:
45 |     db_source: "https://raw.githubusercontent.com/tomdeman-bio/Sequence-Search-Tool-for-Antimicrobial-Resistance-SSTAR-/master/Latest_AR_database/ResGANNOT_srst2.fasta"
46 |   srst2:
47 |     gene_db: "ARGannot.fasta"
48 |     db_source: "https://raw.githubusercontent.com/katholt/srst2/master/data/ARGannot.fasta"
49 |     min_depth: 5
50 |     max_divergence: 10
51 |     forward: "_R1"
52 |     reverse: "_R2"
53 | 


--------------------------------------------------------------------------------
/test/test_data.tsv:
--------------------------------------------------------------------------------
 1 | # This TSV file specifies the list of inputs for a hAMRonization workflow run.
 2 | # Its first line (ex. comment lines and empty lines) must have column headings,
 3 | # and must define at least the columns listed below. Columns may be in any order.
 4 | #
 5 | # Every row must have a (possibly empty) value in every column.  The 'biosample'
 6 | # column must have a unique non-empty value in every row.  If you omit assembly,
 7 | # reads, or species, then tools that require these inputs will fail to run.
 8 | 
 9 | species	biosample	assembly	read1	read2
10 | NDM	NDM_biosample	test/simple/test_contig.fna	test/simple/test_R1.fq.gz	test/simple/test_R2.fq.gz
11 | 


--------------------------------------------------------------------------------