├── README.md ├── Snakefile ├── cluster.slurm.cheaha.json └── snakemakeslurm /README.md: -------------------------------------------------------------------------------- 1 | # snakemake-slurm-uab 2 | Working example of snakemake tutorial using UAB Cheaha computing cluster via SLURM scheduler 3 | 4 | Use with Snakemake tutorial at http://snakemake.readthedocs.io/en/stable/tutorial/tutorial.html 5 | 6 | In this setup, one runs "snakemakeslurm" instead of "snakemake" and it pulls in the cluster config file and creates the sbatch commandline mapping. Surely there is a more elegant way to do this! 7 | 8 | Basic flow 9 | 10 | source ~/snakemake-miniconda3/bin/activate snakemake-tutorial 11 | snakemakeslurm 12 | 13 | -------------------------------------------------------------------------------- /Snakefile: -------------------------------------------------------------------------------- 1 | # insure errors propogate along pipe'd shell commands 2 | shell.prefix("set -o pipefail; ") 3 | 4 | rule all: 5 | input: 6 | "report.html" 7 | 8 | rule bwa_map: 9 | input: 10 | "data/genome.fa", 11 | "data/samples/{sample}.fastq" 12 | output: 13 | "mapped_reads/{sample}.bam" 14 | threads: 4 15 | shell: 16 | "bwa mem -t {threads} {input} | samtools view -Sb - > {output}" 17 | 18 | rule samtools_sort: 19 | input: 20 | "mapped_reads/{sample}.bam" 21 | output: 22 | "sorted_reads/{sample}.bam" 23 | threads: 4 24 | shell: 25 | "samtools sort -@ {threads} -T sorted_reads/{wildcards.sample} " 26 | "-O bam {input} > {output}" 27 | 28 | rule samtools_index: 29 | input: 30 | "sorted_reads/{sample}.bam" 31 | output: 32 | "sorted_reads/{sample}.bam.bai" 33 | shell: 34 | "samtools index {input}" 35 | 36 | SAMPLES = ["A", "B"] 37 | #expand("sorted_reads/{sample}.bam", sample=SAMPLES) 38 | #expand("sorted_reads/{sample}.{replicate}.bam", sample=SAMPLES, replicate=[0, 1]) 39 | 40 | rule bcftools_call: 41 | input: 42 | fa="data/genome.fa", 43 | bam=expand("sorted_reads/{sample}.bam", sample=SAMPLES), 44 | bai=expand("sorted_reads/{sample}.bam.bai", sample=SAMPLES) 45 | output: 46 | "calls/all.vcf" 47 | shell: 48 | "samtools mpileup -g -f {input.fa} {input.bam} | " 49 | "bcftools call -mv - > {output}" 50 | 51 | rule clean: 52 | """Clean up all output for a fresh test run""" 53 | shell: 54 | "rm -rf report.html mapped_reads sorted_reads calls" 55 | 56 | rule report: 57 | input: 58 | "calls/all.vcf" 59 | output: 60 | "report.html" 61 | run: 62 | from snakemake.utils import report 63 | with open(input[0]) as vcf: 64 | n_calls = sum(1 for l in vcf if not l.startswith("#")) 65 | 66 | report(""" 67 | An example variant calling workflow 68 | =================================== 69 | 70 | Reads were mapped to the Yeast 71 | reference genome and variants were called jointly with 72 | SAMtools/BCFtools. 73 | 74 | This resulted in {n_calls} variants (see Table T1_). 75 | """, output[0], T1=input[0]) 76 | 77 | -------------------------------------------------------------------------------- /cluster.slurm.cheaha.json: -------------------------------------------------------------------------------- 1 | { 2 | "documenation": { 3 | "cmdline": "Use with snakemake --cluster-config cluster.slurm.cheaha.json --cluster 'sbatch --job-name {cluster.job-name} --ntasks {cluster.ntasks} --cpus-per-task {threads} --mem-per-cpu {cluster.mem-per-cpu} --partition {cluster.partition} --time {cluster.time} --mail-user {cluster.mail-user} --mail-type {cluster.mail-type} --error {cluster.error} --output {cluster.output}'" 4 | }, 5 | "__default__" : { 6 | "job-name" : "SM.{rule}", 7 | "partition" : "express", 8 | "time" : "02:00:00", 9 | "ntasks" : 1, 10 | "cpus-per-task" : 1, 11 | "mem-per-cpu-mb" : 2000, 12 | "output" : "logs/%j.%N.out.txt", 13 | "error" : "logs/%j.%N.err.txt", 14 | "mail-user" : "$USER@uab.edu", 15 | "mail-type" : "ALL" 16 | }, 17 | 18 | "bwa_map" : { 19 | "mem-per-cpu-mb" : 2200 20 | }, 21 | "short" : { 22 | "time" : "12:00:00", 23 | "partition" : "short" 24 | }, 25 | "medium" : { 26 | "time" : "2-02:00:00", 27 | "partition" : "medium" 28 | }, 29 | "long" : { 30 | "time" : "6-06:00:00", 31 | "partition" : "long" 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /snakemakeslurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # check for clean 5 | # 6 | # https://snakemake.readthedocs.io/en/stable/project_info/faq.html#how-do-i-remove-all-files-created-by-snakemake-i-e-like-make-clean 7 | if [ "$1" == "clean" ]; then 8 | echo 'rm $(snakemake --summary | tail -n+2 | cut -f1)' 9 | snakemake --summary | tail -n+2 | cut -f1 10 | rm -f $(snakemake --summary | tail -n+2 | cut -f1) 11 | exit 0 12 | fi 13 | 14 | # 15 | # launch snakemake to run jobs on UAB CHEAHA via SLURM 16 | # 17 | SM_PARAMS="job-name ntasks partition time mail-user mail-type error output" 18 | SM_ARGS="--cpus-per-task {threads} --mem-per-cpu {cluster.mem-per-cpu-mb}" 19 | for P in ${SM_PARAMS}; do SM_ARGS="$SM_ARGS --$P {cluster.$P}"; done 20 | echo "SM_ARGS: ${SM_ARGS}" 21 | 22 | # our SLURM error/output paths expect a logs/ subdir in PWD 23 | mkdir -p logs 24 | 25 | snakemake \ 26 | $* \ 27 | --latency-wait 30 \ 28 | -j 999 \ 29 | --cluster-config $(dirname $0)/cluster.slurm.cheaha.json \ 30 | --cluster "sbatch $SM_ARGS" 31 | --------------------------------------------------------------------------------