├── README.md
├── Snakefile
├── cluster.slurm.cheaha.json
└── snakemakeslurm


/README.md:
--------------------------------------------------------------------------------
 1 | # snakemake-slurm-uab
 2 | Working example of snakemake tutorial using UAB Cheaha computing cluster via SLURM scheduler
 3 | 
 4 | Use with Snakemake tutorial at http://snakemake.readthedocs.io/en/stable/tutorial/tutorial.html
 5 | 
 6 | In this setup, one runs "snakemakeslurm" instead of "snakemake" and it pulls in the cluster config file and creates the sbatch commandline mapping. Surely there is a more elegant way to do this!
 7 | 
 8 | Basic flow
 9 | 
10 | 	source ~/snakemake-miniconda3/bin/activate snakemake-tutorial
11 | 	snakemakeslurm 
12 | 
13 | 


--------------------------------------------------------------------------------
/Snakefile:
--------------------------------------------------------------------------------
 1 | # insure errors propogate along pipe'd shell commands
 2 | shell.prefix("set -o pipefail; ")
 3 | 
 4 | rule all:
 5 |     input:
 6 |         "report.html"
 7 | 
 8 | rule bwa_map:
 9 |     input:
10 |         "data/genome.fa",
11 |         "data/samples/{sample}.fastq"
12 |     output:
13 |         "mapped_reads/{sample}.bam"
14 |     threads: 4
15 |     shell:
16 |         "bwa mem -t {threads} {input} | samtools view -Sb - > {output}"
17 | 
18 | rule samtools_sort:
19 |     input:
20 |         "mapped_reads/{sample}.bam"
21 |     output:
22 |         "sorted_reads/{sample}.bam"
23 |     threads: 4
24 |     shell:
25 |         "samtools sort -@ {threads} -T sorted_reads/{wildcards.sample} "
26 |         "-O bam {input} > {output}"
27 | 
28 | rule samtools_index:
29 |     input:
30 |         "sorted_reads/{sample}.bam"
31 |     output:
32 |         "sorted_reads/{sample}.bam.bai"
33 |     shell:
34 |         "samtools index {input}"
35 | 
36 | SAMPLES = ["A", "B"]
37 | #expand("sorted_reads/{sample}.bam", sample=SAMPLES)
38 | #expand("sorted_reads/{sample}.{replicate}.bam", sample=SAMPLES, replicate=[0, 1])
39 | 
40 | rule bcftools_call:
41 |     input:
42 |         fa="data/genome.fa",
43 |         bam=expand("sorted_reads/{sample}.bam", sample=SAMPLES),
44 |         bai=expand("sorted_reads/{sample}.bam.bai", sample=SAMPLES)
45 |     output:
46 |         "calls/all.vcf"
47 |     shell:
48 |         "samtools mpileup -g -f {input.fa} {input.bam} | "
49 |         "bcftools call -mv - > {output}"
50 | 
51 | rule clean:
52 |      """Clean up all output for a fresh test run"""
53 |      shell:
54 |           "rm -rf report.html mapped_reads sorted_reads calls"
55 | 
56 | rule report:
57 |     input:
58 |         "calls/all.vcf"
59 |     output:
60 |         "report.html"
61 |     run:
62 |         from snakemake.utils import report
63 |         with open(input[0]) as vcf:
64 |             n_calls = sum(1 for l in vcf if not l.startswith("#"))
65 | 
66 |         report("""
67 |         An example variant calling workflow
68 |         ===================================
69 | 
70 |         Reads were mapped to the Yeast
71 |         reference genome and variants were called jointly with
72 |         SAMtools/BCFtools.
73 | 
74 |         This resulted in {n_calls} variants (see Table T1_).
75 |         """, output[0], T1=input[0])
76 | 
77 | 


--------------------------------------------------------------------------------
/cluster.slurm.cheaha.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "documenation": {
 3 |         "cmdline": "Use with snakemake --cluster-config cluster.slurm.cheaha.json --cluster 'sbatch --job-name {cluster.job-name} --ntasks {cluster.ntasks} --cpus-per-task {threads} --mem-per-cpu {cluster.mem-per-cpu} --partition {cluster.partition} --time {cluster.time} --mail-user {cluster.mail-user} --mail-type {cluster.mail-type} --error {cluster.error} --output {cluster.output}'"
 4 |     },
 5 |     "__default__" : {
 6 |         "job-name"       : "SM.{rule}",
 7 |         "partition"      : "express",
 8 |         "time"           : "02:00:00",
 9 |         "ntasks"         : 1,
10 |         "cpus-per-task"  : 1,
11 |         "mem-per-cpu-mb" : 2000,
12 |         "output"         : "logs/%j.%N.out.txt",
13 |         "error"          : "logs/%j.%N.err.txt",
14 |         "mail-user"      : "$USER@uab.edu",
15 |         "mail-type"      : "ALL"
16 |     },
17 | 
18 |     "bwa_map" : {
19 | 	"mem-per-cpu-mb" : 2200
20 |     },
21 |     "short" : {
22 |         "time"      : "12:00:00",
23 |         "partition" : "short"
24 |     },
25 |     "medium" : {
26 |         "time"      : "2-02:00:00",
27 |         "partition" : "medium"
28 |     },
29 |     "long" : {
30 |         "time"      : "6-06:00:00",
31 |         "partition" : "long"
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/snakemakeslurm:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #
 4 | # check for clean
 5 | #
 6 | # https://snakemake.readthedocs.io/en/stable/project_info/faq.html#how-do-i-remove-all-files-created-by-snakemake-i-e-like-make-clean
 7 | if [ "$1" == "clean" ]; then
 8 |     echo 'rm $(snakemake --summary | tail -n+2 | cut -f1)'
 9 |     snakemake --summary | tail -n+2 | cut -f1
10 |     rm -f $(snakemake --summary | tail -n+2 | cut -f1)
11 |     exit 0
12 | fi
13 | 
14 | #
15 | # launch snakemake to run jobs on UAB CHEAHA via SLURM
16 | #
17 | SM_PARAMS="job-name ntasks partition time mail-user mail-type error output"
18 | SM_ARGS="--cpus-per-task {threads} --mem-per-cpu {cluster.mem-per-cpu-mb}"
19 | for P in ${SM_PARAMS}; do SM_ARGS="$SM_ARGS --$P {cluster.$P}"; done
20 | echo "SM_ARGS: ${SM_ARGS}"
21 | 
22 | # our SLURM error/output paths expect a logs/ subdir in PWD
23 | mkdir -p logs
24 | 
25 | snakemake \
26 |     $* \
27 |      --latency-wait 30 \
28 |     -j 999 \
29 |     --cluster-config $(dirname $0)/cluster.slurm.cheaha.json \
30 |     --cluster "sbatch $SM_ARGS"
31 | 


--------------------------------------------------------------------------------