├── LICENSE
├── README.md
├── config
    ├── bpipe.config.groovy
    ├── preambles.groovy
    ├── tools.groovy
    └── validate_module_params.groovy
├── modules
    ├── ChIPseq
    │   ├── GREAT.groovy
    │   ├── GREAT.header
    │   ├── blacklist_filter.groovy
    │   ├── blacklist_filter.header
    │   ├── bowtie1.groovy
    │   ├── bowtie1.header
    │   ├── bowtie2.groovy
    │   ├── bowtie2.header
    │   ├── diffbind3.groovy
    │   ├── diffbind3.header
    │   ├── filbowtie2unique.groovy
    │   ├── filbowtie2unique.header
    │   ├── ipstrength.groovy
    │   ├── ipstrength.header
    │   ├── macs2.groovy
    │   ├── macs2.header
    │   ├── make_greylist.groovy
    │   ├── make_greylist.header
    │   ├── normbigwig.groovy
    │   ├── normbigwig.header
    │   ├── pbc.groovy
    │   ├── pbc.header
    │   ├── peak_annotation.groovy
    │   ├── peak_annotation.header
    │   ├── phantompeak.groovy
    │   ├── phantompeak.header
    │   ├── shinyreports.groovy
    │   ├── shinyreports.header
    │   ├── upsetPlot.groovy
    │   └── upsetPlot.header
    ├── DNAseq
    │   ├── bwa.groovy
    │   ├── bwa.header
    │   ├── collectVariantCallingMetrics.groovy
    │   ├── collectVariantCallingMetrics.header
    │   ├── gatherBQSRReports.groovy
    │   ├── gatherBQSRReports.header
    │   ├── genomicsDBImport.groovy
    │   ├── genomicsDBImport.header
    │   ├── genotypeGVCFs.groovy
    │   ├── genotypeGVCFs.header
    │   ├── realignment.groovy
    │   ├── realignment.header
    │   ├── recalibration.groovy
    │   ├── recalibration.header
    │   ├── shinyreports.groovy
    │   ├── shinyreports.header
    │   ├── snpEff.groovy
    │   ├── snpEff.header
    │   ├── validateVariants.groovy
    │   ├── validateVariants.header
    │   ├── variantFiltration.groovy
    │   ├── variantFiltration.header
    │   ├── variant_score_recalibration.groovy
    │   ├── variant_score_recalibration.header
    │   ├── variantcallHC.groovy
    │   ├── variantcallHC.header
    │   ├── variantcallUG.groovy
    │   ├── variantcallUG.header
    │   ├── varianteval.groovy
    │   ├── varianteval.header
    │   ├── variantfuseHC.groovy
    │   └── variantfuseHC.header
    ├── NGS
    │   ├── README.md
    │   ├── bam2bw.groovy
    │   ├── bam2bw.header
    │   ├── bamcoverage.groovy
    │   ├── bamcoverage.header
    │   ├── bamindexer.groovy
    │   ├── bamindexer.header
    │   ├── bamqc.groovy
    │   ├── bamqc.header
    │   ├── cutadapt.groovy
    │   ├── cutadapt.header
    │   ├── downsamplebam.groovy
    │   ├── downsamplebam.header
    │   ├── downsamplefastqPE.groovy
    │   ├── downsamplefastqPE.header
    │   ├── downsamplefastqSE.groovy
    │   ├── downsamplefastqSE.header
    │   ├── extend.groovy
    │   ├── extend.header
    │   ├── fastqc.groovy
    │   ├── fastqc.header
    │   ├── fastqscreen.groovy
    │   ├── fastqscreen.header
    │   ├── filterchromosomes.groovy
    │   ├── filterchromosomes.header
    │   ├── insertsize.groovy
    │   ├── insertsize.header
    │   ├── markdups.groovy
    │   ├── markdups.header
    │   ├── markdups2.groovy
    │   ├── markdups2.header
    │   ├── mergebam.groovy
    │   ├── mergebam.header
    │   ├── multiqc.groovy
    │   ├── multiqc.header
    │   ├── rmdups.groovy
    │   ├── rmdups.header
    │   ├── samtoolscov.groovy
    │   ├── samtoolscov.header
    │   ├── strandSpecificBW.groovy
    │   ├── strandSpecificBW.header
    │   ├── trackhub.groovy
    │   ├── trackhub.header
    │   ├── trackhub_config.groovy
    │   └── trackhub_config.header
    ├── RNAseq
    │   ├── GO_Enrichment.groovy
    │   ├── GO_Enrichment.header
    │   ├── deseq2.groovy
    │   ├── deseq2.header
    │   ├── deseq2_mm.groovy
    │   ├── deseq2_mm.header
    │   ├── dupradar.groovy
    │   ├── dupradar.header
    │   ├── filter2htseq.groovy
    │   ├── filter2htseq.header
    │   ├── genebodycov.groovy
    │   ├── genebodycov.header
    │   ├── genebodycov2.groovy
    │   ├── genebodycov2.header
    │   ├── inferexperiment.groovy
    │   ├── inferexperiment.header
    │   ├── prermats.groovy
    │   ├── prermats.header
    │   ├── qualimap.groovy
    │   ├── qualimap.header
    │   ├── rmats.groovy
    │   ├── rmats.header
    │   ├── rnaseqc.groovy
    │   ├── rnaseqc.header
    │   ├── shinyreports.groovy
    │   ├── shinyreports.header
    │   ├── star.groovy
    │   ├── star.header
    │   ├── starfusion.groovy
    │   ├── starfusion.header
    │   ├── stringtie.groovy
    │   ├── stringtie.header
    │   ├── subread.groovy
    │   ├── subread.header
    │   ├── subread2rnatypes.groovy
    │   ├── subread2rnatypes.header
    │   ├── tpm.groovy
    │   └── tpm.header
    ├── RNAseqVariantCalling
    │   ├── add_read_group.groovy
    │   ├── add_read_group.header
    │   ├── base_recalibration.groovy
    │   ├── base_recalibration.header
    │   ├── create_star_index_sjdb.groovy
    │   ├── create_star_index_sjdb.header
    │   ├── mark_dups.groovy
    │   ├── mark_dups.header
    │   ├── merge_SJ_tab.groovy
    │   ├── merge_SJ_tab.header
    │   ├── splitNcigar.groovy
    │   ├── splitNcigar.header
    │   ├── star1pass.groovy
    │   ├── star1pass.header
    │   ├── star2pass.groovy
    │   ├── star2pass.header
    │   ├── variantCall_HC.groovy
    │   ├── variantCall_HC.header
    │   ├── variant_filtration.groovy
    │   └── variant_filtration.header
    ├── breaktag
    │   ├── bwa.groovy
    │   ├── bwa.header
    │   ├── collect_stats.groovy
    │   ├── collect_stats.header
    │   ├── count_breaks.groovy
    │   ├── count_breaks.header
    │   ├── count_breaks_strandless.groovy
    │   ├── count_breaks_strandless.header
    │   ├── pattern_filtering.groovy
    │   └── pattern_filtering.header
    ├── miscellaneous
    │   ├── collect_tool_versions.groovy
    │   └── collect_tool_versions.header
    ├── scRNAseq
    │   ├── CRmotifCounts.groovy
    │   ├── CRmotifCounts.header
    │   ├── CTannoMarker.groovy
    │   ├── CTannoMarker.header
    │   ├── CTannoSeurat.groovy
    │   ├── CTannoSeurat.header
    │   ├── DNAaccess.groovy
    │   ├── DNAaccess.header
    │   ├── SCTransform.groovy
    │   ├── SCTransform.header
    │   ├── addumibarcodetofastq.groovy
    │   ├── addumibarcodetofastq.header
    │   ├── assignSouporcellCluster.groovy
    │   ├── assignSouporcellCluster.header
    │   ├── cellranger_aggr.groovy
    │   ├── cellranger_aggr.header
    │   ├── cellranger_count.groovy
    │   ├── cellranger_count.header
    │   ├── cellrangerarc_aggr.groovy
    │   ├── cellrangerarc_aggr.header
    │   ├── cellrangerarc_count.groovy
    │   ├── cellrangerarc_count.header
    │   ├── cellrangeratac_aggr.groovy
    │   ├── cellrangeratac_aggr.header
    │   ├── cellrangeratac_count.groovy
    │   ├── cellrangeratac_count.header
    │   ├── demux_gt.groovy
    │   ├── demux_gt.header
    │   ├── demux_hto.groovy
    │   ├── demux_hto.header
    │   ├── diffExprSeurat.groovy
    │   ├── diffExprSeurat.header
    │   ├── diffPeaks.groovy
    │   ├── diffPeaks.header
    │   ├── grn.groovy
    │   ├── grn.header
    │   ├── motifActivity.groovy
    │   ├── motifActivity.header
    │   ├── motifEnrich.groovy
    │   ├── motifEnrich.header
    │   ├── motifFootprinting.groovy
    │   ├── motifFootprinting.header
    │   ├── peaks2genes.groovy
    │   ├── peaks2genes.header
    │   ├── sc_filter.groovy
    │   ├── sc_filter.header
    │   ├── sc_integrateATAC.groovy
    │   ├── sc_integrateATAC.header
    │   ├── sc_integrateRNA.groovy
    │   ├── sc_integrateRNA.header
    │   ├── sc_qc.groovy
    │   ├── sc_qc.header
    │   ├── sc_readAggrData.groovy
    │   ├── sc_readAggrData.header
    │   ├── sc_readIndivSamplesAndMerge.groovy
    │   ├── sc_readIndivSamplesAndMerge.header
    │   ├── shinyreports.groovy
    │   ├── shinyreports.header
    │   ├── splitpipe_all.groovy
    │   ├── splitpipe_all.header
    │   ├── splitpipe_comb.groovy
    │   ├── splitpipe_comb.header
    │   ├── subread.groovy
    │   ├── subread.header
    │   ├── umicount.groovy
    │   ├── umicount.header
    │   ├── umidedup.groovy
    │   ├── umidedup.header
    │   ├── wnn.groovy
    │   └── wnn.header
    └── smallRNAseq
    │   ├── bowtie1.groovy
    │   ├── bowtie1.header
    │   ├── dedup.groovy
    │   ├── dedup.header
    │   ├── deseq2.groovy
    │   ├── deseq2.header
    │   ├── deseq2_mirnamature.groovy
    │   ├── deseq2_mirnamature.header
    │   ├── fastq_quality_filter.groovy
    │   ├── fastq_quality_filter.header
    │   ├── filter2htseq.groovy
    │   ├── filter2htseq.header
    │   ├── filter_smallrna_counts.groovy
    │   ├── filter_smallrna_counts.header
    │   ├── mirDeep2.groovy
    │   ├── mirDeep2.header
    │   ├── mirDeep2_mapper.groovy
    │   ├── mirDeep2_mapper.header
    │   ├── shinyreports.groovy
    │   ├── shinyreports.header
    │   ├── subread.groovy
    │   ├── subread.header
    │   ├── subread2rnatypes.header
    │   ├── subread_mirnamature.groovy
    │   ├── subread_mirnamature.header
    │   ├── trim_umis.groovy
    │   └── trim_umis.header
├── pipelines
    ├── ChIPseq
    │   ├── README.md
    │   ├── chipseq.pipeline.groovy
    │   ├── contrasts_diffbind.txt
    │   ├── essential.vars.groovy
    │   ├── targets.txt
    │   └── tools.groovy
    ├── DNAseq
    │   ├── README.md
    │   ├── dnaseq.pipeline.groovy
    │   ├── essential.vars.groovy
    │   ├── targets.txt
    │   └── tools.groovy
    ├── RNAseq
    │   ├── README.md
    │   ├── contrasts.txt
    │   ├── essential.vars.groovy
    │   ├── rnaseq.pipeline.groovy
    │   ├── targets.txt
    │   └── tools.groovy
    ├── RNAseqVariantCalling
    │   ├── README.md
    │   ├── essential.vars.groovy
    │   ├── rnaseq_variant_calling.pipeline.groovy
    │   └── tools.groovy
    ├── breaktag
    │   ├── README.md
    │   ├── breaktag.pipeline.groovy
    │   ├── essential.vars.groovy
    │   ├── targets.txt
    │   └── tools.groovy
    ├── scRNAseq
    │   ├── README.md
    │   ├── combiBarcoding.pipeline.groovy
    │   ├── contrasts.txt
    │   ├── essential.vars.groovy
    │   ├── marsseq.pipeline.groovy
    │   ├── smartseq.pipeline.groovy
    │   ├── targets.txt
    │   ├── tenX.pipeline.groovy
    │   ├── tenXatac.pipeline.groovy
    │   ├── tenXmultiome.pipeline.groovy
    │   └── tools.groovy
    ├── smallRNAseq
    │   ├── README.md
    │   ├── contrasts.txt
    │   ├── smallrnaseq.essential.vars.groovy
    │   ├── smallrnaseq.pipeline.groovy
    │   ├── targets.txt
    │   └── tools.groovy
    └── test
    │   ├── README.md
    │   └── test.pipeline.groovy
├── resources
    ├── IMB_logo.png
    ├── MARS-Seq_protocol_Step-by-Step_MML.pdf
    ├── NGSpipe2go_scheme.old.png
    └── NGSpipe2go_scheme.png
├── testdata
    ├── ChIPseq
    │   ├── README.Rmd
    │   ├── contrasts_diffbind.txt
    │   ├── essential.vars.groovy
    │   └── targets.txt
    ├── DNASeq
    │   ├── README.md
    │   └── essential.vars.groovy
    ├── RNAseq
    │   ├── README.Rmd
    │   ├── contrasts.txt
    │   ├── essential.vars.groovy
    │   └── targets.txt
    ├── RNAseqVariantCalling
    │   ├── README.md
    │   ├── essential.vars.groovy
    │   ├── knowVariants.vcf
    │   └── knowVariants.vcf.idx
    ├── scRNAseq_smartseq2
    │   ├── README.md
    │   ├── essential.vars.groovy
    │   └── targets.txt
    ├── smallRNAseq_BCF_miRNA
    │   ├── README.md
    │   └── miRNA.essential.vars.groovy
    ├── smallRNAseq_BCF_smallrnaseq
    │   ├── README.md
    │   └── smallrnaseq.essential.vars.groovy
    ├── tenx_ATAC
    │   ├── README.md
    │   └── essential.vars.groovy
    ├── tenx_GEX
    │   ├── README.md
    │   └── essential.vars.groovy
    └── tenx_multiome
    │   ├── README.md
    │   └── essential.vars.groovy
└── tools
    ├── BlackList_Filter
        ├── BlackList_Filter.R
        └── make_greylist.R
    ├── CTanno
        ├── CTannoMarker.R
        ├── CTannoSeurat.R
        └── renv.lock
    ├── DE_DESeq2
        ├── DE_DESeq2.R
        └── DE_DESeq2_miRNAmature.R
    ├── DE_edgeR
        ├── DE_edgeR.R
        └── DE_edgeR.mmatrix.R
    ├── ENCODEqc
        ├── IPstrength.R
        ├── PBC.R
        └── phantompeak.R
    ├── GO_Enrichment
        ├── GO_Enrichment.R
        └── GREAT.R
    ├── Peak_Annotation
        └── Peak_Annotation.R
    ├── TPMs
        └── TPMs.R
    ├── breaktag
        ├── pattern_filtering.pl
        └── umi_filtering.py
    ├── collectBpipeLogs
        └── collectBpipeLogs.sh
    ├── dedup
        ├── PCRDuplicatesPlot.R
        └── remove_duplicates_with_stats.sh
    ├── demux
        └── demux_hto.R
    ├── diffbind
        └── diffbind3.R
    ├── dupRadar
        └── dupRadar.R
    ├── geneBodyCov
        └── geneBodyCov.R
    ├── mapping_stats
        └── mapping_stats_bowtie1.R
    ├── maser
        ├── createMaserPlots.R
        ├── plotTranscriptsMod.R
        └── volcanoMod.R
    ├── piRNA
        ├── piRNABaseTerminalBases.py
        ├── piRNABaseTerminalBasesPlot.R
        ├── piRNA_quantification_summarization.R
        ├── ping-pong_signature.py
        └── plotPP.R
    ├── reports
        ├── shiny_chipseq_reporting_tool
        │   ├── ChIP.shinyrep.helpers.R
        │   ├── ChIPreport.Rmd
        │   └── styles.css
        ├── shiny_dnaseq_reporting_tool
        │   ├── styles.css
        │   ├── variant.shinyrep.helpers.R
        │   └── variantreport.Rmd
        ├── shiny_rnaseq_reporting_tool
        │   ├── DE.shinyrep.helpers.R
        │   ├── DEreport.Rmd
        │   └── styles.css
        ├── shiny_scrnaseq_reporting_tool
        │   ├── Read_individual_10X_sample_data.Rmd
        │   ├── app.R
        │   ├── sc.report.Rmd
        │   ├── sc.shinyrep.helpers.R
        │   ├── scatac.report.Rmd
        │   ├── scmultiome.report.Rmd
        │   ├── server.R
        │   ├── styles.css
        │   └── ui.R
        └── shiny_smallrnaseq_reporting_tool
        │   ├── README
        │   ├── smallRNA.shinyrep.helpers.R
        │   ├── smallRNAreport.Rmd
        │   ├── smallRNAreport.miRNAmature.Rmd
        │   ├── smallRNAreport.type.Rmd
        │   └── styles.css
    ├── sc_DNAaccess
        ├── DNAaccess.R
        ├── diffPeaks.R
        └── renv.lock
    ├── sc_diffExprSeurat
        ├── diffExprSeurat.R
        └── renv.lock
    ├── sc_grn
        ├── grn.R
        └── renv.lock
    ├── sc_integrate
        ├── sc_integrateATAC.R
        └── sc_integrateRNA.R
    ├── sc_motifs
        ├── CRmotifCounts.R
        ├── motifActivity.R
        ├── motifEnrich.R
        ├── motifFootprinting.R
        └── renv.lock
    ├── sc_norm
        ├── SCTransform.R
        └── renv.lock
    ├── sc_peaks2genes
        ├── peaks2genes.R
        └── renv.lock
    ├── sc_qc
        ├── renv.lock
        ├── sc_filter_multiome.R
        └── sc_qc_multiome.R
    ├── sc_readData
        ├── renv.lock
        ├── sc_readAggrData.R
        └── sc_readIndivSamplesAndMerge.R
    ├── sc_wnn
        ├── renv.lock
        └── wnn.R
    ├── smallRNA
        ├── DEG_analysis_DESeq2.Rmd
        ├── DEG_analysis_DESeq2.helpers.R
        ├── DEG_analysis_DESeq2.transposons.Rmd
        ├── DEG_analysis_Transposons_DESeq2.Rmd
        ├── DEG_analysis_edgeR.R
        ├── SummarizeLibrariesRPMs.R
        ├── SummarizeknownTargetsRPMs.R
        ├── countNonStructuralReads.R
        ├── countNonStructuralReadsHTseq-count.R
        ├── filterSmallRNAclasses.py
        ├── plotReadLength.R
        ├── plot_sensor_coverage.R
        ├── sequence_bias_plot.R
        ├── smallRNA_classes_plot.R
        └── summarizeNucleotideByReadLenght.py
    ├── smallRNA_BCF
        └── extract_smallRNA.R
    ├── tSNE_exprs
        └── app.R
    ├── trackhub
        ├── Configure_Trackhub.R
        └── Make_Trackhub.R
    └── upsetPlot
        └── upsetPlot.R


/config/validate_module_params.groovy:
--------------------------------------------------------------------------------
 1 | class Ngspipe2goWrongTypeException extends Exception {
 2 |   Ngspipe2goWrongTypeException(String message) {
 3 |     super(message)
 4 |   }
 5 | }
 6 | 
 7 | Boolean validate_schema(Class Params, Map params) {
 8 |   try {
 9 |     // validate parameter types against the schema
10 |     p = Params.newInstance(params)
11 |     params.each{ k, v -> 
12 |       if(p[k].getClass() != params[k].getClass()) {
13 |         String message = "param ${k} is ${params[k].getClass()} instead of ${p[k].getClass()}"
14 |         throw new Ngspipe2goWrongTypeException(message)
15 |       }
16 |     }
17 |     // validate presence of mandatory parameters
18 |     assert true == !!p
19 |   } catch(Ngspipe2goWrongTypeException e) {
20 |     throw new RuntimeException("invalid parameter types\n${e}")
21 |   } catch(AssertionError e) {
22 |     throw new RuntimeException("mandatory arguments missing or invalid")
23 |   } catch(Exception e) {
24 |     throw new RuntimeException("invalid parameter types\n${e}")
25 |   }
26 |   return true
27 | }
28 | 


--------------------------------------------------------------------------------
/modules/ChIPseq/GREAT.groovy:
--------------------------------------------------------------------------------
 1 | GREAT = {
 2 |     doc title: "GREAT",
 3 |         desc: "Genomic Regions Enrichment Analysis",
 4 |         constraints: "",
 5 |         bpipe_version:"",
 6 |         author:"Giuseppe Petrosino"
 7 | 
 8 |     var subdir : ""
 9 |     output.dir = GREAT_vars.outdir + "/$subdir"
10 | 
11 |     def GREAT_FLAGS =
12 |         (GREAT_vars.files      ? " peakData="       + GREAT_vars.files + "/$subdir"  : "") +
13 |         (GREAT_vars.targets    ? " targets="        + GREAT_vars.targets             : "") +
14 |         (GREAT_vars.outdir     ? " out="            + GREAT_vars.outdir + "/$subdir" : "") +
15 |         (GREAT_vars.padj       ? " padj="           + GREAT_vars.padj                : "") +
16 |         (GREAT_vars.nterms     ? " nterms="         + GREAT_vars.nterms              : "") +
17 |         (GREAT_vars.db         ? " db="             + GREAT_vars.db                  : "") +
18 |         (GREAT_vars.upstream   ? " adv_upstream="   + GREAT_vars.upstream            : "") +
19 |         (GREAT_vars.downstream ? " adv_downstream=" + GREAT_vars.downstream          : "") +
20 |         (GREAT_vars.extra      ? " "                + GREAT_vars.extra               : "")
21 | 
22 |     def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"])
23 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
24 | 
25 |     produce("GREAT.RData") {
26 |         exec """
27 |             ${TOOL_ENV} &&
28 |             ${PREAMBLE} &&
29 | 
30 |             Rscript ${PIPELINE_ROOT}/tools/GO_Enrichment/GREAT.R $GREAT_FLAGS
31 |         ""","GREAT"
32 |     }
33 | }
34 | 
35 | 


--------------------------------------------------------------------------------
/modules/ChIPseq/GREAT.header:
--------------------------------------------------------------------------------
 1 | GREAT_vars=[
 2 |     outdir    : RESULTS + "/GREAT_analysis",
 3 |     files     : RESULTS + "/macs2",
 4 |     targets   : "targets.txt", // targets file describing the samples
 5 |     padj      : "0.01",
 6 |     nterms    : "5",
 7 |     db        : ESSENTIAL_DB,
 8 |     upstream  : "5",    // 5 kb upstream of the TSS 
 9 |     downstream: "1",    // 1 kb downstream of the TSS 
10 |     extra     : ""
11 | ]
12 | 
13 | load PIPELINE_ROOT + "/modules/ChIPseq/GREAT.groovy"
14 | 
15 | 


--------------------------------------------------------------------------------
/modules/ChIPseq/blacklist_filter.groovy:
--------------------------------------------------------------------------------
 1 | blacklist_filter = {
 2 |     doc title: "blacklist_filter",
 3 |         desc: "Remove peaks overlapping blacklisted genomic regions",
 4 |         constraints: "",
 5 |         bpipe_version:"",
 6 |         author:"Giuseppe Petrosino, modified by Frank Ruehle"
 7 | 
 8 |     var subdir : ""
 9 |     var blacklist: blacklist_filter_vars.blacklist
10 |     output.dir = blacklist_filter_vars.outdir + "/$subdir" 
11 |     
12 |     println blacklist
13 | 
14 |     def BLACKLIST_FILTER_FLAGS =
15 |         (blacklist_filter_vars.files     ? " peakData="         + blacklist_filter_vars.files   + "/$subdir" : "") +
16 |         (blacklist                       ? " blacklistRegions=" + blacklist                                  : "") +
17 |         (blacklist_filter_vars.outdir    ? " out="              + blacklist_filter_vars.outdir  + "/$subdir" : "") +
18 |         (blacklist_filter_vars.extra     ?                        blacklist_filter_vars.extra                : "")
19 | 
20 |     def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"])
21 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
22 | 
23 |     produce("BlackList_Filter.RData") {
24 |         exec """
25 |             ${TOOL_ENV} &&
26 |             ${PREAMBLE} &&
27 | 
28 |             Rscript ${PIPELINE_ROOT}/tools/BlackList_Filter/BlackList_Filter.R $BLACKLIST_FILTER_FLAGS;
29 |         ""","blacklist_filter"
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/modules/ChIPseq/blacklist_filter.header:
--------------------------------------------------------------------------------
 1 | blacklist_filter_vars=[
 2 |     outdir   : RESULTS + "/macs2",
 3 |     blacklist: ESSENTIAL_BLACKLIST,
 4 |     files    : RESULTS + "/macs2",
 5 |     extra    : ""
 6 | ]
 7 | 
 8 | load PIPELINE_ROOT + "/modules/ChIPseq/blacklist_filter.groovy"
 9 | 
10 | 


--------------------------------------------------------------------------------
/modules/ChIPseq/filbowtie2unique.header:
--------------------------------------------------------------------------------
 1 | filbowtie2unique_vars=[
 2 |     mapped           : MAPPED,
 3 |     paired           : RUN_IN_PAIRED_END_MODE, // run in se or pe mode
 4 |     samtools_mapq_pe : "10", // MAPQ for paired end data; >=3 should exclude "true multireads", multi mapped reads within the window of insert size
 5 |     samtools_mapq_se : "10", // MAPQ for single end data; 
 6 |     samtools_threads : Integer.toString(ESSENTIAL_THREADS)
 7 | ]
 8 | 
 9 | dupremoval_vars=[
10 |     remove_pcr_dups  : ESSENTIAL_DEDUPLICATION // added to remove PCR duplicates
11 | ]
12 | 
13 | load PIPELINE_ROOT + "/modules/ChIPseq/filbowtie2unique.groovy"
14 | 
15 | 


--------------------------------------------------------------------------------
/modules/ChIPseq/ipstrength.header:
--------------------------------------------------------------------------------
 1 | ipstrength_vars=[
 2 |     outdir  :  QC + "/ipstrength",
 3 |     targets : "targets.txt",      // targets file describing the samples
 4 |     mapped  : MAPPED,             // where the bam files are stored
 5 |     bsgenome: ESSENTIAL_BSGENOME  // a bioconductor BSgenome reference available in the R libPath() BSgenome.Hsapiens.UCSC.hg19
 6 | ]
 7 | 
 8 | load PIPELINE_ROOT + "/modules/ChIPseq/ipstrength.groovy"
 9 | 
10 | 


--------------------------------------------------------------------------------
/modules/ChIPseq/macs2.header:
--------------------------------------------------------------------------------
 1 | macs2_vars=[
 2 |     outdir : RESULTS + "/macs2",                         // output dir
 3 |     targets: "targets.txt",                              // targets file describing the samples
 4 |     gsize  : ESSENTIAL_MACS2_GSIZE,                      // the mappable genome size
 5 |     minlen : ESSENTIAL_MIN_PEAKLENGTH,                   // MACS2 minimum peak length (default is fragment size). Could be increased if broad option is used.
 6 |     broad  : ESSENTIAL_MACS2_BROAD,                      // use broad setting for peak calling
 7 |     mapped : MAPPED,                                     // where the bam files are stored
 8 |     paired : (ESSENTIAL_PAIRED == "yes"),                // for PE data use fragments in peak calling
 9 |     extra  : "--keep-dup " + ESSENTIAL_DUP 	         // other parameters sent to macs2
10 | ]
11 | 
12 | load PIPELINE_ROOT + "/modules/ChIPseq/macs2.groovy"
13 | 
14 | // expected parameter types
15 | class macs2_vars_schema {
16 |     String outdir 
17 |     String targets
18 |     String gsize  
19 |     String minlen 
20 |     Boolean broad  
21 |     String mapped 
22 |     Boolean paired 
23 |     String extra  
24 | 
25 |     // check for the presence of mandatory params
26 |     boolean asBoolean() {
27 |       outdir && targets && mapped
28 |     }
29 | }
30 | 
31 | validate_schema(macs2_vars_schema, macs2_vars)
32 | 


--------------------------------------------------------------------------------
/modules/ChIPseq/make_greylist.header:
--------------------------------------------------------------------------------
 1 | make_greylist_vars=[
 2 |     outdir     : RESULTS + "/greylist",
 3 |     karyoFile  : ESSENTIAL_CHROMSIZES,  // file with chromosome sizes
 4 |     targets    : "targets.txt",         // targets file. Check the bin directory for the format
 5 |     bams       : MAPPED,                // directory with the bam files
 6 |     peaks      : RESULTS + "/macs2",    // directory with peak caller output
 7 |     reps       : "100",                 // The number of times to sample bins and estimate the parameters of the negative binomial distribution.
 8 |     sampleSize : "30000",               // The number of bins to sample on each repetition.
 9 |     pThreshold : "0.99",                // The p-value threshold for marking bins as “grey”.
10 |     maxgap     : "10000",               // merge grey regions if distance below maxGap
11 |     extra      : ""
12 | ]
13 | 
14 | load PIPELINE_ROOT + "/modules/ChIPseq/make_greylist.groovy"
15 | 
16 | 


--------------------------------------------------------------------------------
/modules/ChIPseq/normbigwig.header:
--------------------------------------------------------------------------------
 1 | normbigwig_vars=[
 2 |     outdir          : TRACKS + "/input_normalised_cov",
 3 |     targets         : "targets.txt", // targets file describing the samples
 4 |     extension_length: ESSENTIAL_FRAGLEN-ESSENTIAL_READLEN, //for paired end the pairs are automatically used. The extension length is only used if there are singletons
 5 |     mapped          : MAPPED,
 6 |     threads         : Integer.toString(ESSENTIAL_THREADS),
 7 |     extra           : "--scaleFactorsMethod readCount --operation subtract " + "--extendReads " + Integer.toString(EXTENSION_LENGTH) + " --outFileFormat bedgraph"
 8 | ]
 9 | 
10 | load PIPELINE_ROOT + "/modules/ChIPseq/normbigwig.groovy"
11 | 
12 | 


--------------------------------------------------------------------------------
/modules/ChIPseq/pbc.groovy:
--------------------------------------------------------------------------------
 1 | pbc = {
 2 |     doc title: "PBC",
 3 |         desc:  "PCR Bottleneck Coefficient",
 4 |         constraints: "",
 5 |         bpipe_version: "tested with bpipe 0.9.8.7",
 6 |         author: "Sergi Sayols"
 7 | 
 8 |     var subdir : ""
 9 |     output.dir = pbc_vars.outdir + "/$subdir"
10 | 
11 |     def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"])
12 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
13 | 
14 |     transform(".bam") to("_PBC.csv") {
15 |         exec """
16 |             ${TOOL_ENV} &&
17 |             ${PREAMBLE} &&
18 | 
19 |             Rscript ${PIPELINE_ROOT}/tools/ENCODEqc/PBC.R $input && mv ${input.prefix}_PBC.csv $output.dir
20 |         ""","pbc"
21 |     }
22 | 
23 |     forward input
24 | 
25 | }
26 | 


--------------------------------------------------------------------------------
/modules/ChIPseq/pbc.header:
--------------------------------------------------------------------------------
1 | pbc_vars=[
2 |     outdir: QC + "/pbc"
3 | ]
4 | 
5 | load PIPELINE_ROOT + "/modules/ChIPseq/pbc.groovy"
6 | 
7 | 


--------------------------------------------------------------------------------
/modules/ChIPseq/peak_annotation.header:
--------------------------------------------------------------------------------
 1 | peak_annotation_vars=[
 2 |     outdir         : RESULTS + "/Peak_Annotation",
 3 |     files          : RESULTS + "/macs2",
 4 |     transcript_type: "Bioconductor",
 5 |     transcript_db  : ESSENTIAL_TXDB,   // eg, TxDb.Mmusculus.UCSC.mm9.knownGene"
 6 |     orgdb          : ESSENTIAL_ANNODB, // eg, org.Mm.eg.db
 7 |     regiontss      : "3000",
 8 |     targets        : "targets.txt",
 9 |     orderby        : "group",          // name of the column in targets.txt to order the libraries in plots
10 |     extra          : ""
11 | ]
12 | 
13 | load PIPELINE_ROOT + "/modules/ChIPseq/peak_annotation.groovy"
14 | 
15 | 


--------------------------------------------------------------------------------
/modules/ChIPseq/phantompeak.groovy:
--------------------------------------------------------------------------------
 1 | phantompeak = {
 2 |     doc title: "Phantompeak QC  plot",
 3 |         desc:  "Phantompeak",
 4 |         constraints: "",
 5 |         bpipe_version: "tested with bpipe 0.9.8.7",
 6 |         author: "Sergi Sayols"
 7 | 
 8 |     var subdir : ""
 9 |     output.dir = phantompeak_vars.outdir + "/$subdir" 
10 | 
11 |     def PHANTOMPEAK_FLAGS =
12 |         (phantompeak_vars.minshift ? " " + phantompeak_vars.minshift : "") +
13 |         (phantompeak_vars.maxshift ? " " + phantompeak_vars.maxshift : "") +
14 |         (phantompeak_vars.binsize  ? " " + phantompeak_vars.binsize  : "") +
15 |         (phantompeak_vars.readlen  ? " " + phantompeak_vars.readlen  : "") +
16 |         (phantompeak_vars.threads  ? " " + phantompeak_vars.threads  : "") +
17 |         (phantompeak_vars.extra    ? " " + phantompeak_vars.extra    : "")
18 | 
19 |     def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"])
20 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
21 | 
22 |     transform(".bam") to("_phantompeak.png") {
23 |         exec """
24 |             ${TOOL_ENV} &&
25 |             ${PREAMBLE} &&
26 | 
27 |             Rscript ${PIPELINE_ROOT}/tools/ENCODEqc/phantompeak.R $input \$(basename $input.prefix) $PHANTOMPEAK_FLAGS &&
28 | 
29 |             mv \$(basename $input.prefix)_phantompeak.* $output.dir
30 |         ""","phantompeak"
31 |     }
32 | 
33 |     forward input
34 | }
35 | 
36 | 


--------------------------------------------------------------------------------
/modules/ChIPseq/phantompeak.header:
--------------------------------------------------------------------------------
 1 | phantompeak_vars=[
 2 |     outdir  : QC + "/phantompeak",
 3 |     threads : Integer.toString(ESSENTIAL_THREADS), // number of threads to use
 4 |     minshift: Integer.toString(-500), // left 'x' coordinate in plot
 5 |     maxshift: Integer.toString(1500), // right 'x' coordinate in plot
 6 |     binsize : Integer.toString(5),    // stepsize for cc calculation
 7 |     readlen : Integer.toString(ESSENTIAL_READLEN), // read length
 8 |     extra   : ""                      // extra parms to pass to the tool
 9 | ]
10 | 
11 | load PIPELINE_ROOT + "/modules/ChIPseq/phantompeak.groovy"
12 | 
13 | 


--------------------------------------------------------------------------------
/modules/ChIPseq/upsetPlot.groovy:
--------------------------------------------------------------------------------
 1 | upsetPlot = {
 2 |     doc title: "upset plot",
 3 |         desc: "prepare combination matrix and UpSet Plot for peak data",
 4 |         constraints: "calculation of combination matrix may take hours for larger projects",
 5 |         bpipe_version:"tested with bpipe 0.9.9.8",
 6 |         author:"Frank Rühle"
 7 | 
 8 |     var subdir : ""
 9 |     output.dir = UPSET_vars.outdir + "/$subdir"
10 | 
11 |     def UPSET_FLAGS =
12 |         (UPSET_vars.files      ? " peakData="       + UPSET_vars.files + "/$subdir"       : "") +
13 |         (UPSET_vars.targets    ? " targets="        + UPSET_vars.targets                  : "") +
14 |         (UPSET_vars.outdir     ? " out="            + UPSET_vars.outdir + "/$subdir"      : "") +
15 |         (UPSET_vars.mode       ? " mode="           + UPSET_vars.mode                     : "") +
16 |         (UPSET_vars.peakOverlapMode  ? " peakOverlapMode=" + UPSET_vars.peakOverlapMode   : "") +
17 |         (UPSET_vars.setsize    ? " setsize="        + UPSET_vars.setsize                  : "") +
18 |         (UPSET_vars.addBarAnnotation ? " addBarAnnotation=" + UPSET_vars.addBarAnnotation : "") +
19 |         (UPSET_vars.extra      ? " "                + UPSET_vars.extra                    : "")
20 | 
21 |     def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"])
22 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
23 | 
24 |     produce("upsetPlot.RData") {
25 |         exec """
26 |             ${TOOL_ENV} &&
27 |             ${PREAMBLE} &&
28 | 
29 |             Rscript ${PIPELINE_ROOT}/tools/upsetPlot/upsetPlot.R $UPSET_FLAGS;
30 |         ""","upsetPlot"
31 |     }
32 | }
33 | 
34 | 


--------------------------------------------------------------------------------
/modules/ChIPseq/upsetPlot.header:
--------------------------------------------------------------------------------
 1 | UPSET_vars=[
 2 |     outdir    : RESULTS + "/upsetPlot",
 3 |     files     : RESULTS + "/macs2",
 4 |     targets   : "targets.txt", // targets file describing the samples
 5 |     mode      : "intersect",
 6 |     peakOverlapMode    : "peaknumber",
 7 |     setsize   : "25",
 8 |     addBarAnnotation   : false,
 9 |     extra     : ""
10 | ]
11 | 
12 | load PIPELINE_ROOT + "/modules/ChIPseq/upsetPlot.groovy"
13 | 
14 | 


--------------------------------------------------------------------------------
/modules/DNAseq/bwa.header:
--------------------------------------------------------------------------------
 1 | BWA_vars=[
 2 |     outdir          : MAPPED,
 3 |     ref             : ESSENTIAL_BWA_REF,
 4 |     threads         : Integer.toString(ESSENTIAL_THREADS),
 5 | 
 6 |     // additional settings
 7 |     samtools_threads: Integer.toString(ESSENTIAL_THREADS),
 8 |     extra           : "" // extra parms sent to the tool
 9 | ]
10 | 
11 | 
12 | load  PIPELINE_ROOT + "/modules/DNAseq/bwa.groovy"
13 | 
14 | // expected parameter types
15 | class BWA_vars_schema {
16 |     String outdir
17 |     String ref
18 |     String threads
19 |     String samtools_threads
20 |     String extra
21 | 
22 |     // check for the presence of mandatory params
23 |     boolean asBoolean() {
24 |       outdir && ref
25 |     }
26 | }
27 | 
28 | validate_schema(BWA_vars_schema, BWA_vars)
29 | 


--------------------------------------------------------------------------------
/modules/DNAseq/collectVariantCallingMetrics.header:
--------------------------------------------------------------------------------
 1 | CollectVariantCallingMetrics_vars=[
 2 |     outdir        : QC + "/GATK_VariantCallingMetrics",
 3 |     java_flags    : "-Xmx20000m",
 4 |     bwa_ref       : ESSENTIAL_BWA_REF,
 5 |     known_variants: ESSENTIAL_KNOWN_VARIANTS,
 6 |     extra         : ""
 7 | ]
 8 | 
 9 | load PIPELINE_ROOT + "/modules/DNAseq/collectVariantCallingMetrics.groovy"
10 | 
11 | // metrics explained here: https://broadinstitute.github.io/picard/picard-metric-definitions.html
12 | 


--------------------------------------------------------------------------------
/modules/DNAseq/gatherBQSRReports.groovy:
--------------------------------------------------------------------------------
 1 | GatherBQSRReports = {
 2 |     doc title: "GATK GatherBQSRReports",
 3 |         desc:  "This tool gathers scattered BQSR recalibration reports into a single file.",
 4 |         constraints: "",
 5 |         bpipe_version: "tested with 0.9.9.8.slurm",
 6 |         author: "Frank Rühle"
 7 | 
 8 |     output.dir = GatherBQSRReports_vars.outdir
 9 | 
10 |     def GatherBQSRReports_vars_FLAGS = 
11 |             (GatherBQSRReports_vars.extra          ? " "         + GatherBQSRReports_vars.extra          : "" )
12 | 
13 |     def TOOL_ENV = prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && " +
14 |                    prepare_tool_env("gatk", tools["gatk"]["version"], tools["gatk"]["runenv"])
15 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix.prefix).getName())
16 | 
17 |     produce("gatheredBQSR.report") {
18 | 
19 |         exec """
20 |             ${TOOL_ENV} &&
21 |             ${PREAMBLE} &&
22 | 
23 |             gatk --java-options "${GatherBQSRReports_vars.java_flags}" GatherBQSRReports $GatherBQSRReports_vars_FLAGS --tmp-dir \${TMP} -O $output \$(for f in \$(ls ${output.dir}/*.table);do echo " -I " "\$f"; done)
24 |         ""","GatherBQSRReports"
25 |     }
26 |     forward input
27 | }
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/modules/DNAseq/gatherBQSRReports.header:
--------------------------------------------------------------------------------
 1 | GatherBQSRReports_vars=[
 2 |     outdir        : BaseRecalibration_vars.statsdir,
 3 |     java_flags    : "-Xmx20000m",
 4 |     extra         : ""
 5 | ]
 6 | 
 7 | load PIPELINE_ROOT + "/modules/DNAseq/gatherBQSRReports.groovy"
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/modules/DNAseq/genomicsDBImport.header:
--------------------------------------------------------------------------------
 1 | GenomicsDBImport_vars=[
 2 |     outdir        : RESULTS,
 3 |     workspace_name: "GenomicsDBImport",
 4 |     java_flags    : "-Xmx4g -Xms4g",
 5 |     call_region   : ESSENTIAL_CALL_REGION,
 6 |     sample_map    : "sample.map", // will be generated from targets file
 7 |     extra         : ""
 8 | ]
 9 | 
10 | load  PIPELINE_ROOT + "/modules/DNAseq/genomicsDBImport.groovy"
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/modules/DNAseq/genotypeGVCFs.groovy:
--------------------------------------------------------------------------------
 1 | GenotypeGVCFs = {
 2 |     doc title: "GATK GenotypeGVCFs",
 3 |         desc:  "Perform joint genotyping on single input file containing one or more samples pre-called with HaplotypeCaller",
 4 |         constraints: "input sample file must possess genotype likelihoods produced by HaplotypeCaller with '-ERC GVCF' or '-ERC BP_RESOLUTION'",
 5 |         bpipe_version: "tested with bpipe 0.9.9.8.slurm",
 6 |         author: "Frank Rühle"
 7 | 
 8 |     output.dir = GenotypeGVCFs_vars.outdir
 9 | 
10 |     def GenotypeGVCFs_FLAGS =
11 |         (GenotypeGVCFs_vars.bwa_ref        ? " -R "      + GenotypeGVCFs_vars.bwa_ref        : "" ) +
12 |         (GenotypeGVCFs_vars.extra          ? " "         + GenotypeGVCFs_vars.extra          : "" )
13 | 
14 |     def TOOL_ENV = prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && " +
15 |                    prepare_tool_env("gatk", tools["gatk"]["version"], tools["gatk"]["runenv"])
16 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1).getName())
17 | 
18 |     println "Used workspace from GenomicsDBImport: " + input.dir
19 | 
20 |     produce(GenotypeGVCFs_vars.vcf_name) {
21 |         exec """
22 |             ${TOOL_ENV} &&
23 |             ${PREAMBLE} &&
24 | 
25 |             gatk --java-options "${GenotypeGVCFs_vars.java_flags}" GenotypeGVCFs $GenotypeGVCFs_FLAGS --tmp-dir \${TMP} -V gendb://$input.dir -O $output
26 | 
27 |         ""","GenotypeGVCFs"
28 |     }
29 | }
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/modules/DNAseq/genotypeGVCFs.header:
--------------------------------------------------------------------------------
 1 | GenotypeGVCFs_vars=[
 2 |     outdir        : RESULTS + "/GenotypeGVCFs",
 3 |     vcf_name      :"joinedsamples.vcf.gz",
 4 |     java_flags    : "-Xmx4g",
 5 |     bwa_ref       : ESSENTIAL_BWA_REF,
 6 |     extra         : ""
 7 | ]
 8 | 
 9 | load  PIPELINE_ROOT + "/modules/DNAseq/genotypeGVCFs.groovy"
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/modules/DNAseq/realignment.header:
--------------------------------------------------------------------------------
 1 | IndelRealignment_vars=[
 2 |     outdir        : MAPPED,
 3 |     java_flags    : "-Xmx5000m",
 4 |     threads       : Integer.toString(ESSENTIAL_THREADS),
 5 |     mills_variants: ESSENTIAL_MILLS_VARIANTS,
 6 |     bwa_ref       : ESSENTIAL_BWA_REF
 7 | ]
 8 | 
 9 | load  PIPELINE_ROOT + "/modules/DNAseq/realignment.groovy"
10 | 
11 | 


--------------------------------------------------------------------------------
/modules/DNAseq/recalibration.header:
--------------------------------------------------------------------------------
 1 | BaseRecalibration_vars=[
 2 |     outdir        : MAPPED,
 3 |     statsdir      : QC + "/BQSR",
 4 |     bwa_ref       : ESSENTIAL_BWA_REF,
 5 |     known_variants: ESSENTIAL_KNOWN_VARIANTS,
 6 |     extra         : ""
 7 | ]
 8 | 
 9 | load  PIPELINE_ROOT + "/modules/DNAseq/recalibration.groovy"
10 | 
11 | 


--------------------------------------------------------------------------------
/modules/DNAseq/snpEff.header:
--------------------------------------------------------------------------------
 1 | snpEff_vars=[
 2 |     outdir         : RESULTS + "/snpEff",
 3 |     java_flags     : "-Xmx8g",
 4 |     config         : "",                      // specify if not default
 5 |     genome_version : ESSENTIAL_SNPEFF_GENOME, // the corresponding snpEff database
 6 |     output_format  : "gatk",                  // either of vcf, gatk, bed, bedAnn
 7 |     extra          : ""
 8 | ]
 9 | 
10 | load  PIPELINE_ROOT + "/modules/DNAseq/snpEff.groovy"
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/modules/DNAseq/validateVariants.groovy:
--------------------------------------------------------------------------------
 1 | ValidateVariants = {
 2 |     doc title: "GATK ValidateVariants",
 3 |         desc:  "This tool validates the adherence of a file to VCF format including information contained within the fields REF, CHR_COUNTS, IDS, ALLELES.",
 4 |         constraints: "",
 5 |         bpipe_version: "tested with 0.9.9.8.slurm",
 6 |         author: "Frank Rühle"
 7 | 
 8 |     output.dir = ValidateVariants_vars.outdir
 9 | 
10 |     def ValidateVariants_vars_FLAGS = 
11 |             (ValidateVariants_vars.bwa_ref        ? " -R "      + ValidateVariants_vars.bwa_ref        : "" ) +
12 |             (ValidateVariants_vars.known_variants ? " --dbsnp " + ValidateVariants_vars.known_variants : "" ) +
13 |             (ValidateVariants_vars.extra          ? " "         + ValidateVariants_vars.extra          : "" )
14 | 
15 |     def TOOL_ENV = prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && " +
16 |                    prepare_tool_env("gatk", tools["gatk"]["version"], tools["gatk"]["runenv"])
17 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix.prefix).getName())
18 | 
19 |     transform (".vcf.gz") to (".report") {
20 | 
21 |         exec """
22 |             ${TOOL_ENV} &&
23 |             ${PREAMBLE} &&
24 | 
25 |             gatk --java-options "${ValidateVariants_vars.java_flags}" ValidateVariants $ValidateVariants_vars_FLAGS -V $input > $output
26 |         ""","ValidateVariants"
27 |     }
28 |     forward input
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/modules/DNAseq/validateVariants.header:
--------------------------------------------------------------------------------
 1 | ValidateVariants_vars=[
 2 |     outdir        : QC + "/GATK_ValidateVariants",
 3 |     java_flags    : "-Xmx20000m",
 4 |     bwa_ref       : ESSENTIAL_BWA_REF,
 5 |     known_variants: ESSENTIAL_KNOWN_VARIANTS,
 6 |     extra         : ""
 7 | ]
 8 | 
 9 | load PIPELINE_ROOT + "/modules/DNAseq/validateVariants.groovy"
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/modules/DNAseq/variantFiltration.header:
--------------------------------------------------------------------------------
 1 | VariantFiltration_vars=[
 2 |     outdir                      : RESULTS + "/VariantFiltration",
 3 |     bwa_ref                     : ESSENTIAL_BWA_REF,
 4 |     java_flags                  : "-Xmx8g",
 5 | 
 6 |     // Note that these expressions will tag any sites as failing the filter where the value matches the expression.
 7 |     // For explanations of filter criteria see https://gatk.broadinstitute.org/hc/en-us/articles/360035890471 
 8 |     snp_filter_QD               : "QD < 2.0", 
 9 |     snp_filter_QUAL             : "",
10 |     snp_filter_SOR              : "SOR > 3.0",
11 |     snp_filter_FS               : "FS > 60.0",
12 |     snp_filter_MQ               : "MQ < 40.0",
13 |     snp_filter_MQRankSum        : "MQRankSum < -12.5",
14 |     snp_filter_ReadPosRankSum   : "ReadPosRankSum < -8.0",
15 | 
16 |     indel_filter_QD             : "QD < 2.0",
17 |     indel_filter_QUAL           : "",
18 |     indel_filter_SOR            : "SOR > 10.0",
19 |     indel_filter_FS             : "FS > 200.0",
20 |     indel_filter_MQ             : "",
21 |     indel_filter_MQRankSum      : "",
22 |     indel_filter_ReadPosRankSum : "ReadPosRankSum < -20.0"
23 | ]
24 | 
25 | load PIPELINE_ROOT + "/modules/DNAseq/variantFiltration.groovy"
26 | 
27 | 


--------------------------------------------------------------------------------
/modules/DNAseq/variant_score_recalibration.header:
--------------------------------------------------------------------------------
 1 | VariantScoreRecalibration_vars=[
 2 |     outdir                   : RESULTS + "/VQSR",
 3 |     java_flags               : "-Xmx24g -Xms24g",
 4 |     known_variants           : ESSENTIAL_KNOWN_VARIANTS,
 5 |     hapmap_variants          : ESSENTIAL_HAPMAP_VARIANTS,
 6 |     omni_variants            : ESSENTIAL_OMNI_VARIANTS,
 7 |     mills_variants           : ESSENTIAL_MILLS_VARIANTS,
 8 |     thousand_genomes_variants: ESSENTIAL_THOUSAND_GENOMES_VARIANTS,
 9 |     snp_filter_level         : "99.7",
10 |     indel_filter_level       : "99.7",
11 |     max_gaussians_indels     : 4,
12 |     max_gaussians_snps       : 6,
13 |     bwa_ref                  : ESSENTIAL_BWA_REF
14 | ]
15 | 
16 | load PIPELINE_ROOT + "/modules/DNAseq/variant_score_recalibration.groovy"
17 | 
18 | 


--------------------------------------------------------------------------------
/modules/DNAseq/variantcallHC.groovy:
--------------------------------------------------------------------------------
 1 | VariantCallHC = {
 2 |     doc title: "GATK Variant Calling HC",
 3 |         desc:  "Call variants in BAM files using GATK HaplotypeCaller",
 4 |         constraints: "Requires BWA ( paramteter -M ) produced BAM file, with correct chromosome order and ReadGroup attached.",
 5 |         bpipe_version: "tested with bpipe 0.9.9.8.slurm",
 6 |         author: "Oliver Drechsel, modified by Frank Rühle"
 7 | 
 8 |     output.dir = VariantCallHC_vars.outdir
 9 | 
10 |     def HaplotypeCaller_FLAGS =
11 |         (VariantCallHC_vars.erc            ? " -ERC "    + VariantCallHC_vars.erc            : "" ) +
12 |         (VariantCallHC_vars.call_region    ? " -L "      + VariantCallHC_vars.call_region    : "" ) +
13 |         (VariantCallHC_vars.bwa_ref        ? " -R "      + VariantCallHC_vars.bwa_ref        : "" ) +
14 |         (VariantCallHC_vars.known_variants ? " --dbsnp " + VariantCallHC_vars.known_variants : "" ) +
15 |         (VariantCallHC_vars.extra          ? " "         + VariantCallHC_vars.extra          : "" )
16 | 
17 |     def TOOL_ENV = prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && " +
18 |                    prepare_tool_env("gatk", tools["gatk"]["version"], tools["gatk"]["runenv"])
19 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
20 | 
21 |     transform (".bam") to (".g.vcf.gz") {
22 |         exec """
23 |             ${TOOL_ENV} &&
24 |             ${PREAMBLE} &&
25 | 
26 |             gatk --java-options "${VariantCallHC_vars.java_flags}" HaplotypeCaller $HaplotypeCaller_FLAGS -I $input -O $output
27 | 
28 |         ""","VariantCallHC"
29 |     }
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/modules/DNAseq/variantcallHC.header:
--------------------------------------------------------------------------------
 1 | VariantCallHC_vars=[
 2 |     outdir        : RESULTS  + "/VariantCallHC",
 3 |     java_flags    : "-Xmx20000m",
 4 |     erc           : "GVCF",  // Mode for emitting reference confidence scores (NONE, "BP_RESOLUTION" or "GVCF") 
 5 |     call_region   : ESSENTIAL_CALL_REGION,
 6 |     bwa_ref       : ESSENTIAL_BWA_REF,
 7 |     known_variants: ESSENTIAL_KNOWN_VARIANTS,
 8 |     extra         : ""
 9 | ]
10 | 
11 | load  PIPELINE_ROOT + "/modules/DNAseq/variantcallHC.groovy"
12 | 
13 | 


--------------------------------------------------------------------------------
/modules/DNAseq/variantcallUG.groovy:
--------------------------------------------------------------------------------
 1 | VariantCallUG = {
 2 |     doc title: "GATK Variant Calling UG",
 3 |         desc:  "Call variants in BAM files using GATK UnifiedGenotyper",
 4 |         constraints: "Requires BWA ( paramteter -M ) produced BAM file, with correct chromosome order and ReadGroup attached.",
 5 |         bpipe_version: "tested with bpipe 0.9.9.3.slurm",
 6 |         author: "Oliver Drechsel"
 7 | 
 8 |     output.dir = VariantCallUG_vars.outdir
 9 | 
10 |     def UnifiedGenotyper_FLAGS =
11 |         " -glm BOTH " +
12 |         (VariantCallUG_vars.call_region    ? " -L "      + VariantCallUG_vars.call_region    : "" ) +
13 |         (VariantCallUG_vars.threads        ? " -nt "     + VariantCallUG_vars.threads        : "" ) + // this is not a typo!
14 |         (VariantCallUG_vars.threads        ? " -nct "    + VariantCallUG_vars.threads        : "" ) + // check tool multithread options
15 |         (VariantCallUG_vars.bwa_ref        ? " -R "      + VariantCallUG_vars.bwa_ref        : "" )
16 | 
17 |     def TOOL_ENV = prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && " +
18 |                    prepare_tool_env("gatk", tools["gatk"]["version"], tools["gatk"]["runenv"])
19 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
20 | 
21 |     transform (".dupmarked.realigned.recalibrated.bam") to (".UG.vcf.gz") {
22 |     // usage parameters https://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_gatk_tools_walkers_genotyper_UnifiedGenotyper.php
23 |         exec """
24 |             ${TOOL_ENV} &&
25 |             ${PREAMBLE} &&
26 | 
27 |             java ${VariantCallUG_vars.java_flags} -Djava.io.tmpdir=\${TMP} -jar \${gatk} -T UnifiedGenotyper $UnifiedGenotyper_FLAGS -I $input -o $output
28 |         ""","VariantCallUG"
29 |     }
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/modules/DNAseq/variantcallUG.header:
--------------------------------------------------------------------------------
 1 | VariantCallUG_vars=[
 2 |     outdir        : RESULTS,
 3 |     java_flags    : "-Xmx20000m",
 4 |     threads       : Integer.toString(ESSENTIAL_THREADS),
 5 |     call_region   : ESSENTIAL_CALL_REGION,
 6 |     bwa_ref       : ESSENTIAL_BWA_REF
 7 | ]
 8 | 
 9 | load  PIPELINE_ROOT + "/modules/DNAseq/variantcallUG.groovy"
10 | 
11 | 


--------------------------------------------------------------------------------
/modules/DNAseq/varianteval.groovy:
--------------------------------------------------------------------------------
 1 | VariantEval = {
 2 |     doc title: "GATK VariantEval",
 3 |         desc:  "Variant evaluation (% in dbSNP, genotype concordance, Ti/Tv ratios, and a lot more)",
 4 |         constraints: "VariantEval is a BETA tool and is not yet ready for use in production",
 5 |         bpipe_version: "tested with 0.9.9.8.slurm",
 6 |         author: "Oliver Drechsel, modified by Frank Rühle"
 7 | 
 8 |     output.dir = VariantEval_vars.outdir
 9 | 
10 |     def VariantEval_FLAGS = 
11 |             (VariantEval_vars.bwa_ref        ? " -R "      + VariantEval_vars.bwa_ref        : "" ) +
12 |             (VariantEval_vars.known_variants ? " --dbsnp " + VariantEval_vars.known_variants : "" ) +
13 |             (VariantEval_vars.extra          ? " "         + VariantEval_vars.extra          : "" )
14 | 
15 |     def TOOL_ENV = prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && " +
16 |                    prepare_tool_env("gatk", tools["gatk"]["version"], tools["gatk"]["runenv"])
17 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix.prefix).getName())
18 | 
19 |     transform (".vcf.gz") to (".report") {
20 | 
21 |         exec """
22 |             ${TOOL_ENV} &&
23 |             ${PREAMBLE} &&
24 | 
25 |             gatk --java-options "${VariantEval_vars.java_flags}" VariantEval $VariantEval_FLAGS --eval $input -O $output
26 |         ""","VariantEval"
27 |     }
28 |     forward input
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/modules/DNAseq/varianteval.header:
--------------------------------------------------------------------------------
 1 | VariantEval_vars=[
 2 |     outdir        : QC + "/GATK_varianteval",
 3 |     java_flags    : "-Xmx20000m",
 4 |     bwa_ref       : ESSENTIAL_BWA_REF,
 5 |     known_variants: ESSENTIAL_KNOWN_VARIANTS,
 6 |     extra         : ""
 7 | ]
 8 | 
 9 | load PIPELINE_ROOT + "/modules/DNAseq/varianteval.groovy"
10 | 
11 | // usage parameters https://gatk.broadinstitute.org/hc/en-us/articles/4418051376155-VariantEval-BETA-
12 | 


--------------------------------------------------------------------------------
/modules/DNAseq/variantfuseHC.header:
--------------------------------------------------------------------------------
 1 | HaplotypeCaller_vars=[
 2 |     outdir        : RESULTS,
 3 |     java_flags    : "-Xmx20000m",
 4 |     threads       : Integer.toString(ESSENTIAL_THREADS),
 5 |     bwa_ref       : ESSENTIAL_BWA_REF,
 6 |     known_variants: ESSENTIAL_KNOWN_VARIANTS,
 7 |     refconf       : "--emitRefConfidence GVCF",
 8 |     indextype     : "--variant_index_type LINEAR",
 9 |     indexparm     : "-variant_index_parameter 128000",
10 |     extra         : ""
11 | ]
12 | 
13 | load  PIPELINE_ROOT + "/modules/DNAseq/variantfuseHC.groovy"
14 | 
15 | 


--------------------------------------------------------------------------------
/modules/NGS/bam2bw.groovy:
--------------------------------------------------------------------------------
 1 | bam2bw = {
 2 |     doc title: "bam2bw",
 3 |         desc:  "Convert BAM file to bigWig",
 4 |         constraints: "none.",
 5 |         bpipe_version: "tested with bpipe 0.9.8.7",
 6 |         author: "Sergi Sayols"
 7 | 
 8 |     output.dir=bam2bw_vars.outdir
 9 | 
10 |     def TOOL_ENV = prepare_tool_env("bedtools", tools["bedtools"]["version"], tools["bedtools"]["runenv"]) + " && " +
11 |                    prepare_tool_env("kentutils", tools["kentutils"]["version"], tools["kentutils"]["runenv"]) + " && " +
12 |                    prepare_tool_env("samtools", tools["samtools"]["version"], tools["samtools"]["runenv"])
13 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
14 | 
15 |     transform(".bam") to ("_scaled.bw") {
16 |         exec """
17 |             ${TOOL_ENV} &&
18 |             ${PREAMBLE} &&
19 | 
20 |             BASEOUTPUT=`basename $output` &&
21 |             CHRSIZES=\${TMP}/\$(basename ${input.prefix}).bam2bw.chrsizes &&
22 |             samtools idxstats ${input} | cut -f1-2 > \${CHRSIZES} &&
23 |             TOTAL_MAPPED=\$( samtools flagstat $input | head -n5 | tail -n1 | cut -f1 -d" ") &&
24 |             SCALE=\$(echo "1000000/\$TOTAL_MAPPED" | bc -l) &&
25 |             genomeCoverageBed -bg -split -scale \${SCALE} -ibam ${input} | sortBed -i -  > \${TMP}/\${BASEOUTPUT%.bw}.bedgraph &&
26 |             bedGraphToBigWig \${TMP}/\${BASEOUTPUT%.bw}.bedgraph \${CHRSIZES} \${TMP}/\${BASEOUTPUT} &&
27 |             cp -f \${TMP}/\${BASEOUTPUT} $output
28 |         ""","bam2bw"
29 |     }
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/modules/NGS/bam2bw.header:
--------------------------------------------------------------------------------
1 | bam2bw_vars=[
2 |     outdir: TRACKS
3 | ]
4 | 
5 | load PIPELINE_ROOT + "/modules/NGS/bam2bw.groovy"
6 | 
7 | 


--------------------------------------------------------------------------------
/modules/NGS/bamcoverage.groovy:
--------------------------------------------------------------------------------
 1 | bamCoverage = {
 2 |     doc title: "bamCoverage",
 3 |         desc:  "bamCoverage wrapper",
 4 |         constraints: "normalised bigwig track for RNA/ChipSeq PE data",
 5 |         bpipe_version: "tested with bpipe 0.9.8.7",
 6 |         author: "Nastasja Kreim"
 7 | 
 8 |     var subdir : ""
 9 |     output.dir = bamCoverage_vars.outdir + "/$subdir"
10 | 
11 |     def BAMCOVERAGE_FLAGS =
12 |         (bamCoverage_vars.cores     ? " --numberOfProcessors " + bamCoverage_vars.cores : "") +
13 |         (bamCoverage_vars.fragments ? " --extendReads " + (bamCoverage_vars.paired ? "" : bamCoverage_vars.fraglength + " ") : "") +
14 |         (bamCoverage_vars.extra     ? " "                      + bamCoverage_vars.extra : "")
15 | 
16 |     def TOOL_ENV = prepare_tool_env("deeptools", tools["deeptools"]["version"], tools["deeptools"]["runenv"])
17 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
18 | 
19 |     transform(".bam") to(".bw") {
20 |         exec """
21 |             ${TOOL_ENV} &&
22 |             ${PREAMBLE} &&
23 |     
24 |             bamCoverage $BAMCOVERAGE_FLAGS --bam $input -o ${output};
25 |         ""","bamCoverage"
26 |     }
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/modules/NGS/bamcoverage.header:
--------------------------------------------------------------------------------
 1 | bamCoverage_vars=[
 2 |     outdir    : TRACKS,
 3 |     cores     : Integer.toString(ESSENTIAL_THREADS),
 4 |     paired    : RUN_IN_PAIRED_END_MODE,       // run in se or pe mode    
 5 |     fragments : (binding.hasVariable('ESSENTIAL_FRAGMENT_USAGE') && binding.hasVariable('ESSENTIAL_FRAGLEN') ? ESSENTIAL_FRAGMENT_USAGE == "yes" : false),
 6 |     fraglength: (binding.hasVariable('ESSENTIAL_FRAGLEN') ? Integer.toString(ESSENTIAL_FRAGLEN) : "200"),
 7 |     // If you want to exclude chromsomes for normalisation e.g. rDNA or mitochondrion add
 8 |     // the following parameter --ignoreForNormalization \"chrM, rDNA\".
 9 |     // If you like to use offsets, blacklist regions, center reads or anything like it please
10 |     // refer to the deepTools manual, there is even a special modus for Nucleosome detection in Mnase data
11 |     // for deeptools versions >v3 you have to use --normalizeUsing RPKM since the API changed
12 |     extra     : "--outFileFormat bigwig" + " " + ESSENTIAL_BAMCOVERAGE
13 | ]
14 | 
15 | load PIPELINE_ROOT + "/modules/NGS/bamcoverage.groovy"
16 | 
17 | // expected parameter types
18 | class bamCoverage_vars_schema {
19 |     String outdir    
20 |     String cores     
21 |     Boolean paired    
22 |     Boolean fragments 
23 |     String fraglength
24 |     String extra
25 | 
26 |     // check for the presence of mandatory params
27 |     boolean asBoolean() {
28 |       outdir
29 |     }
30 | }
31 | 
32 | validate_schema(bamCoverage_vars_schema, bamCoverage_vars)
33 | 


--------------------------------------------------------------------------------
/modules/NGS/bamindexer.groovy:
--------------------------------------------------------------------------------
 1 | BAMindexer = {
 2 |     doc title: "BAMindexer",
 3 |         desc:  "Call samtools to index a bam file",
 4 |         constraints: "",
 5 |         bpipe_version: "tested with bpipe 0.9.8.7",
 6 |         author: "Sergi Sayols, Nastasja Kreim"
 7 | 
 8 |     def File f = new File(input1)
 9 |     output.dir = f.getParent()
10 | 
11 |     def TOOL_ENV = prepare_tool_env("samtools", tools["samtools"]["version"], tools["samtools"]["runenv"])
12 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
13 | 
14 |     transform(".bam\$") to(".bam.bai") {
15 |         exec """
16 |             ${TOOL_ENV} &&
17 |             ${PREAMBLE} &&
18 | 
19 |             samtools index $input
20 |         ""","BAMindexer"
21 |     }
22 | 
23 |     forward input
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/modules/NGS/bamindexer.header:
--------------------------------------------------------------------------------
1 | BAMindexer_vars=[
2 | ]
3 | 
4 | load PIPELINE_ROOT + "/modules/NGS/bamindexer.groovy"
5 | 
6 | 


--------------------------------------------------------------------------------
/modules/NGS/bamqc.groovy:
--------------------------------------------------------------------------------
 1 | BamQC = {
 2 |     doc title: "BamQC",
 3 |         desc:  "Quality control of bam file",
 4 |         constraints: "",
 5 |         bpipe_version: "tested with bpipe 0.9.9.3",
 6 |         author: "Giuseppe Petrosino"
 7 | 
 8 |     output.dir = BamQC_vars.outdir
 9 |     def BAMQC_FLAGS = 
10 |         (BamQC_vars.extra ? " " + BamQC_vars.extra : "")
11 | 
12 |     def TOOL_ENV = prepare_tool_env("bamqc", tools["bamqc"]["version"], tools["bamqc"]["runenv"])
13 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
14 | 
15 |     transform(".bam") to ("_bamqc.zip") {
16 |         exec """
17 |             ${TOOL_ENV} &&
18 |             ${PREAMBLE} &&
19 | 
20 |             bamqc $BAMQC_FLAGS -o $output.dir $input
21 |         ""","BamQC"
22 |     }
23 | 
24 |     forward input
25 | }
26 | 


--------------------------------------------------------------------------------
/modules/NGS/bamqc.header:
--------------------------------------------------------------------------------
1 | BamQC_vars=[
2 |     outdir: QC + "/bamqc",
3 |     extra : "--extract --quiet"
4 | ]
5 | 
6 | load PIPELINE_ROOT + "/modules/NGS/bamqc.groovy"
7 | 
8 | 


--------------------------------------------------------------------------------
/modules/NGS/downsamplebam.groovy:
--------------------------------------------------------------------------------
 1 | DownsampleBAM = {
 2 |     doc title: "DownsampleBAM",
 3 |         desc:  "Call samtools tools to downsample a given bam file to roughly a given number of mapped reads",
 4 |         constraints: "Samtools tools version >= 1.3",
 5 |         bpipe_version: "tested with bpipe 0.9.9.5",
 6 |         author: "Nastasja Kreim"
 7 | 
 8 |     output.dir = DownsampleBAM_vars.outdir
 9 | 
10 |     def TOOL_ENV = prepare_tool_env("samtools", tools["samtools"]["version"], tools["samtools"]["runenv"])
11 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
12 | 
13 |     transform(".bam") to (".down.bam") {
14 |         exec """
15 |             ${TOOL_ENV} &&
16 |             ${PREAMBLE} &&
17 |     
18 |             BASE=\$(basename $input) &&
19 |             samtools view -F 0x04 -bh $input -o \${TMP}/\${BASE}_mapped.bam &&
20 |             TOTAL_MAPPED=\$(samtools flagstat \${TMP}/\${BASE}_mapped.bam | grep mapped | head -n 1 | awk '{print \$1 }') &&
21 |             echo mapped_info \$TOTAL_MAPPED &&
22 |             if [[ ${DownsampleBAM_vars.amount} > \$TOTAL_MAPPED ]]; then
23 |                 echo "Downsample amount higher than amount of mapped reads. Keeping all reads!" &&
24 |                 cp \${TMP}/\${BASE}_mapped.bam $output;
25 |             else
26 |                 PROBABILITY=\$(echo "${DownsampleBAM_vars.seed} + ${DownsampleBAM_vars.amount}/\$TOTAL_MAPPED" | bc -l);
27 |                 echo Probability \$PROBABILITY &&
28 |                 samtools view -bs \$PROBABILITY -o $output \${TMP}/\${BASE}_mapped.bam;
29 |             fi
30 |         ""","DownsampleBAM"
31 |     }
32 | }
33 | 
34 | 


--------------------------------------------------------------------------------
/modules/NGS/downsamplebam.header:
--------------------------------------------------------------------------------
1 | DownsampleBAM_vars=[
2 |     outdir: MAPPED,
3 |     amount: 100000,
4 |     seed  : 1
5 | ]
6 | 
7 | load PIPELINE_ROOT + "/modules/NGS/downsamplebam.groovy"
8 | 
9 | 


--------------------------------------------------------------------------------
/modules/NGS/downsamplefastqPE.groovy:
--------------------------------------------------------------------------------
 1 | DownsamplefastqPE = {
 2 |     doc title: "downsample",
 3 |         desc:    "downsample wrapper for fastq files (paired end)",
 4 |         constraints: "",
 5 |         bpipe_version: "tested with bpipe 0.9.8.7",
 6 |         author: "Nastasja Kreim"
 7 | 
 8 |     output.dir = DownsamplefastqPE_vars.outdir
 9 |     def OUTPUTFILES = new ArrayList()
10 |     inputs.eachWithIndex { item, index -> 
11 |         File f = new File(item)
12 |         OUTPUTFILES.add((f.getName() =~ /.fastq.gz/).replaceFirst(".down.fastq.gz"))
13 |         println OUTPUTFILES[index]
14 |     }
15 | 
16 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
17 | 
18 |     produce(OUTPUTFILES){
19 |         exec """
20 |             paste <(zcat $input1) <(zcat $input2) | awk '{ printf("%s",\$0); n++; if(n%4==0) { printf("\\n");} else { printf("\\t\\t");} }' | shuf    | head -n ${DownsamplefastqPE_vars.amount} | sed 's/\\t\\t/\\n/g' | awk -v r1=$output1.prefix -v r2=$output2.prefix 'BEGIN {FS="\\t"}{print \$1 >r1; print \$2>r2 }' &&
21 |             gzip $output1.prefix &&
22 |             gzip $output2.prefix;
23 |         ""","Downsamplefastq"
24 |     }
25 | }
26 | 
27 | 


--------------------------------------------------------------------------------
/modules/NGS/downsamplefastqPE.header:
--------------------------------------------------------------------------------
1 | DownsamplefastqPE_vars=[
2 |     outdir: PROJECT + "/downsampled",
3 |     amount: 2500000
4 | ]
5 | 
6 | load PIPELINE_ROOT + "/modules/NGS/downsamplefastqPE.groovy"
7 | 
8 | 


--------------------------------------------------------------------------------
/modules/NGS/downsamplefastqSE.groovy:
--------------------------------------------------------------------------------
 1 | DownsamplefastqSE = {
 2 |     doc title: "downsample",
 3 |         desc: "downsample wrapper for fastq files (single end)",
 4 |         constraints: "",
 5 |         bpipe_version: "tested with bpipe 0.9.8.7",
 6 |         author: "Nastasja Kreim"
 7 | 
 8 |     output.dir = DownsamplefastqSE_vars.outdir
 9 |     def OUTPUTFILES = new ArrayList()
10 |     inputs.eachWithIndex { item, index -> 
11 |         File f = new File(item)
12 |         OUTPUTFILES.add((f.getName() =~ /.fastq.gz/).replaceFirst(".down.fastq.gz"))
13 |         OUTPUTFILES.add(item)
14 |         println OUTPUTFILES[index]
15 |     }
16 | 
17 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
18 | 
19 |     produce(OUTPUTFILES) {
20 |         exec """
21 |             paste <(zcat $input) | awk '{ printf("%s",\$0); n++; if(n%4==0) { printf("\\n");} else { printf("\\t\\t");} }' | shuf | head -n ${DownsamplefastqSE_vars.amount} | sed 's/\\t\\t/\\n/g' | awk -v r1=$output1.prefix 'BEGIN {FS="\\t"}{print \$1 >r1}' &&
22 |             gzip $output1.prefix
23 |     ""","Downsamplefastq"
24 |     }
25 | }
26 | 
27 | 


--------------------------------------------------------------------------------
/modules/NGS/downsamplefastqSE.header:
--------------------------------------------------------------------------------
1 | downsample_vars=[
2 |     outdir: PROJECT + "/downsampled",
3 |     amount: 100000
4 | ]
5 | 
6 | load PIPELINE_ROOT + "/modules/NGS/downsamplefastqSE.groovy"
7 | 
8 | 


--------------------------------------------------------------------------------
/modules/NGS/extend.groovy:
--------------------------------------------------------------------------------
 1 | extend = {
 2 |     doc title: "extend",
 3 |         desc:  "Extend read length to the average fragment size",
 4 |         constraints: "",
 5 |         bpipe_version: "tested with bpipe 0.9.8.7",
 6 |         author: "Sergi Sayols"
 7 | 
 8 |     output.dir=extend_vars.outdir
 9 | 
10 |     def SAMTOOLS_SORT_FLAGS = "-O bam -@ " + extend_vars.samtools_threads
11 | 
12 |     def TOOL_ENV = prepare_tool_env("bedtools", tools["bedtools"]["version"], tools["bedtools"]["runenv"]) + " && " +
13 |                    prepare_tool_env("samtools", tools["samtools"]["version"], tools["samtools"]["runenv"])
14 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
15 | 
16 |     transform(".bam") to ("_ext.bam") {
17 |         exec """
18 |             ${TOOL_ENV} &&
19 |             ${PREAMBLE} &&
20 | 
21 |             CHRSIZES="\${TMP}/\$(basename ${input.prefix}).extend.chrsizes"  &&
22 |             samtools idxstats ${input} | cut -f1-2 > "\${CHRSIZES}" &&
23 |             bedtools bamtobed -split -i $input | bedtools slop -g "\${CHRSIZES}" -l 0 -r ${extend_vars.fraglen} -s | bedtools bedtobam -ubam -g "\${CHRSIZES}" | samtools sort $SAMTOOLS_SORT_FLAGS -T \${TMP}/\$(basename $output.prefix) - > $output &&
24 |             samtools index $output
25 |         ""","extend"
26 |     }
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/modules/NGS/extend.header:
--------------------------------------------------------------------------------
 1 | extend_vars=[
 2 |     outdir          : MAPPED,
 3 |     fraglen         : ESSENTIAL_FRAGLEN - ESSENTIAL_READLEN,  //the average fragment length
 4 |     samtools_threads: Integer.toString(ESSENTIAL_THREADS)
 5 | ]
 6 | 
 7 | load PIPELINE_ROOT + "/modules/NGS/extend.groovy"
 8 | 
 9 | // expected parameter types
10 | class extend_vars_schema {
11 |     String outdir
12 |     Integer fraglen
13 |     String samtools_threads
14 | 
15 |     // check for the presence of mandatory params
16 |     boolean asBoolean() {
17 |       outdir && fraglen > 0
18 |     }
19 | }
20 | 
21 | validate_schema(extend_vars_schema, extend_vars)
22 | 


--------------------------------------------------------------------------------
/modules/NGS/fastqc.groovy:
--------------------------------------------------------------------------------
 1 | FastQC = {
 2 |     doc title: "FastQC",
 3 |         desc:  "Quality control of input file",
 4 |         constraints: "Only supports compressed FASTQ files",
 5 |         bpipe_version: "tested with bpipe 0.9.8.7",
 6 |         author: "Sergi Sayols, Frank Rühle"
 7 | 
 8 |     var subdir : ""
 9 |     output.dir = FastQC_vars.outdir + "/$subdir"
10 | 
11 |     def FASTQC_FLAGS =
12 |         (FastQC_vars.extra ? " " + FastQC_vars.extra : "")
13 | 
14 |     def TOOL_ENV = prepare_tool_env("fastqc", tools["fastqc"]["version"], tools["fastqc"]["runenv"])
15 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
16 | 
17 |     transform("*.fastq.gz") to ("_fastqc.zip") {
18 |         exec """
19 |             ${TOOL_ENV} &&
20 |             ${PREAMBLE} &&
21 | 
22 |             fastqc --extract $FASTQC_FLAGS -o $output.dir $inputs
23 |         ""","FastQC"
24 |     }
25 | 
26 |     forward inputs
27 | }
28 | 


--------------------------------------------------------------------------------
/modules/NGS/fastqc.header:
--------------------------------------------------------------------------------
1 | FastQC_vars=[
2 |     outdir: QC + "/fastqc",
3 |     extra : "--quiet"
4 | ]
5 | 
6 | load PIPELINE_ROOT + "/modules/NGS/fastqc.groovy"
7 | 
8 | 


--------------------------------------------------------------------------------
/modules/NGS/fastqscreen.groovy:
--------------------------------------------------------------------------------
 1 | FastqScreen = {
 2 |     doc title: "FastqScreen",
 3 |         desc:  "Quality control of input file against various contaminants",
 4 |         constraints: "Only supports compressed FASTQ files",
 5 |         bpipe_version: "tested with bpipe 0.9.8.7",
 6 |         author: "Nastasja Kreim, modified by Frank Ruehle"
 7 | 
 8 |     var subdir : ""
 9 |     output.dir = FastqScreen_vars.outdir + "/$subdir"
10 |     def FASTQSCREEN_FLAGS = 
11 |         (FastqScreen_vars.threads ? " --threads " + FastqScreen_vars.threads : "") +
12 |         (FastqScreen_vars.extra   ? " "           + FastqScreen_vars.extra   : "")
13 | 
14 |     def TOOL_ENV = prepare_tool_env("fastqscreen", tools["fastqscreen"]["version"], tools["fastqscreen"]["runenv"])
15 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
16 | 
17 |     transform("*.fastq.gz") to("_fastqscreen.done") {
18 |         exec """
19 |             ${TOOL_ENV} &&
20 |             ${PREAMBLE} &&
21 | 
22 |             if [ ! -e "$output.prefix" ]; then
23 |                 mkdir $output.prefix;
24 |             fi &&
25 |             fastqreference=${FastqScreen_vars.conf};
26 |             references=(\${fastqreference//,/ });
27 |             for i in "\${!references[@]}"; do
28 |                 reference=(\${references[i]//::/ });
29 |                 echo -e "DATABASE\t\${reference[0]}\t\${reference[1]}" >> $output.prefix/fastqscreen.conf;
30 |             done;
31 |             fastq_screen $FASTQSCREEN_FLAGS --conf $output.prefix/fastqscreen.conf --outdir $output.prefix $inputs;
32 |             touch $outputs
33 |         ""","FastqScreen"
34 |     }
35 | 
36 |     forward inputs
37 | }
38 | 
39 | 


--------------------------------------------------------------------------------
/modules/NGS/fastqscreen.header:
--------------------------------------------------------------------------------
 1 | FastqScreen_vars=[
 2 |     outdir : QC + "/fastqscreen",
 3 |     threads: Integer.toString(ESSENTIAL_THREADS),
 4 |     //the fastqscreen_conf defines your references, with these we will create a fastqscreen conf script and then run the fastqscreen
 5 |     //this could be e.g.
 6 |     conf   : ESSENTIAL_FASTQSCREEN,
 7 |     //fastqscreen additional param e.g. subset or bowtie /bowtie 2 parameters
 8 |     extra  : "--nohits --subset 100000"
 9 | ]
10 | 
11 | load PIPELINE_ROOT + "/modules/NGS/fastqscreen.groovy"
12 | 
13 | 


--------------------------------------------------------------------------------
/modules/NGS/filterchromosomes.groovy:
--------------------------------------------------------------------------------
 1 | FilterChr = {
 2 |     doc title: "FilterChr",
 3 |         desc:  "When mapping to full genome, including unassembled contigs, remove those extra contiguous before proceeding for further analysis. The goal is to increase speed and decrease disk space usage. Source: https://www.biostars.org/p/171791/#171819",
 4 |         constraints: "Requires a file with the list of chromosomes to keep.",
 5 |         bpipe_version: "tested with bpipe 0.9.9.3.slurm",
 6 |         author: "António Domingues"
 7 | 
 8 |     output.dir=FilterChr_vars.outdir
 9 | 
10 |     def TOOL_ENV = prepare_tool_env("samtools", tools["samtools"]["version"], tools["samtools"]["runenv"])
11 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
12 | 
13 |     transform(".bam") to (".chrOnly.bam") {
14 |         exec """
15 |              ${TOOL_ENV} &&
16 |              ${PREAMBLE} &&
17 | 
18 |             chroms=\$(cut -f1 ${FilterChr_vars.file}) && 
19 |             samtools view -@ ${FilterChr_vars.threads} -b $input \$chroms > $output &&
20 |             samtools index $output
21 |         ""","FilterChr"
22 |     }
23 | }
24 | 
25 | 


--------------------------------------------------------------------------------
/modules/NGS/filterchromosomes.header:
--------------------------------------------------------------------------------
1 | FilterChr_vars=[
2 |     outdir : MAPPED,
3 |     threads: Integer.toString(ESSENTIAL_THREADS),
4 |     file   : ESSENTIAL_PROJECT + "/chromosomes2keep.txt"
5 | ]
6 | 
7 | load PIPELINE_ROOT + "/modules/NGS/filterchromosomes.groovy"
8 | 
9 | 


--------------------------------------------------------------------------------
/modules/NGS/insertsize.groovy:
--------------------------------------------------------------------------------
 1 | InsertSize = {
 2 |     doc title: "InsertSize",
 3 |         desc:  "Call picard tools create insert size values",
 4 |         constraints: "",
 5 |         bpipe_version: "tested with bpipe 0.9.8.7",
 6 |         author: "Nastasja Kreim"
 7 | 
 8 |     var subdir : ""
 9 |     output.dir = InsertSize_vars.outdir + "/$subdir"
10 | 
11 |     def INSERTSIZE_FLAGS =
12 |         (InsertSize_vars.extra ? " " + InsertSize_vars.extra : "")
13 | 
14 |     def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"]) + " && " +
15 |                    prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && "+
16 |                    prepare_tool_env("picard", tools["picard"]["version"], tools["picard"]["runenv"])
17 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
18 | 
19 |     transform(".bam") to ("_insertsizemetrics.tsv") {
20 |         exec """
21 |             ${TOOL_ENV} &&
22 |             ${PREAMBLE} &&
23 | 
24 |             java ${InsertSize_vars.java_flags} -jar \${PICARD} CollectInsertSizeMetrics $INSERTSIZE_FLAGS INPUT=$input OUTPUT=$output HISTOGRAM_FILE=${output.prefix}_hist.pdf
25 |         ""","InsertSize"
26 |     }
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/modules/NGS/insertsize.header:
--------------------------------------------------------------------------------
1 | InsertSize_vars=[
2 |     outdir    :  QC + "/insertsize", //location of the OUTPUT Dir
3 |     java_flags: "-Xmx5000m",         //set the java heap size
4 |     extra     : "ASSUME_SORTED=true VALIDATION_STRINGENCY=LENIENT" //sometimes the sorted flag is not set and we should not care if we have reads which overhang chromosomes
5 | ]
6 | 
7 | load PIPELINE_ROOT + "/modules/NGS/insertsize.groovy"
8 | 
9 | 


--------------------------------------------------------------------------------
/modules/NGS/markdups.groovy:
--------------------------------------------------------------------------------
 1 | MarkDups = {
 2 |     doc title: "MarkDups",
 3 |         desc:  "Call picard tools to mark with/without removing duplicated reads from a bam file",
 4 |         constraints: "Picard tools version >= 1.141. Expects an env var called `picard` with the path to picard's jar",
 5 |         bpipe_version: "tested with bpipe 0.9.8.7",
 6 |         author: "Sergi Sayols"
 7 | 
 8 |     output.dir = MarkDups_vars.outdir
 9 |     def MARKDUPS_FLAGS =
10 |         " REMOVE_DUPLICATES=" + (MarkDups_vars.remove_dups   ? "TRUE" : "FALSE") +
11 |         " ASSUME_SORTED="     + (MarkDups_vars.assume_sorted ? "TRUE" : "FALSE") +
12 |         (MarkDups_vars.extra ? " " + MarkDups_vars.extra : "" )
13 | 
14 |     def TOOL_ENV = prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && " +
15 |                    prepare_tool_env("picard", tools["picard"]["version"], tools["picard"]["runenv"])
16 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
17 | 
18 |     transform(".bam") to (".dupmarked.bam") {
19 |         exec """
20 |             ${TOOL_ENV} &&
21 |             ${PREAMBLE} &&
22 | 
23 |             java ${MarkDups_vars.java_flags} -jar \${PICARD} MarkDuplicates $MARKDUPS_FLAGS INPUT=$input OUTPUT=$output METRICS_FILE=${input.prefix}_dupmarked_dupmetrics.tsv
24 |         ""","MarkDups"
25 |     }
26 | }
27 | 
28 | 


--------------------------------------------------------------------------------
/modules/NGS/markdups.header:
--------------------------------------------------------------------------------
 1 | MarkDups_vars=[
 2 |     outdir       : MAPPED,
 3 |     java_flags   : "-Xmx5000m",
 4 |     remove_dups  : false,
 5 |     assume_sorted: true,
 6 |     extra        : ""
 7 | ]
 8 | 
 9 | load PIPELINE_ROOT + "/modules/NGS/markdups.groovy"
10 | 
11 | // expected parameter types
12 | class MarkDups_vars_schema {
13 |     String outdir
14 |     String java_flags
15 |     Boolean remove_dups
16 |     Boolean assume_sorted
17 |     String extra
18 | 
19 |     // check for the presence of mandatory params
20 |     boolean asBoolean() {
21 |       outdir
22 |     }
23 | }
24 | 
25 | validate_schema(MarkDups_vars_schema, MarkDups_vars)
26 | 


--------------------------------------------------------------------------------
/modules/NGS/markdups2.groovy:
--------------------------------------------------------------------------------
 1 | MarkDups2 = {
 2 |     doc title: "MarkDups2",
 3 |         desc:  "Call bamUtil dedup tool to mark with/without removing duplicated reads from a bam file",
 4 |         constraints: "bamUtil tool version >= 1.0.13",
 5 |         bpipe_version: "tested with bpipe 0.9.9.3",
 6 |         author: "Giuseppe Petrosino"
 7 | 
 8 |     output.dir=MarkDups2_vars.outdir
 9 | 
10 |     def TOOL_ENV = prepare_tool_env("bamutil", tools["bamutil"]["version"], tools["bamutil"]["runenv"])
11 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
12 | 
13 |     transform(".bam") to (".dupmarked.bam") {
14 |         exec """
15 |             ${TOOL_ENV} &&
16 |             ${PREAMBLE} &&
17 | 
18 |             bam dedup --in $input --out $output --log ${input.prefix}_dupmetrics.log --noPhoneHome
19 |         ""","MarkDups2"
20 |     }
21 | }
22 | 


--------------------------------------------------------------------------------
/modules/NGS/markdups2.header:
--------------------------------------------------------------------------------
1 | MarkDups2_vars=[
2 |     outdir: MAPPED
3 | ]
4 | 
5 | load PIPELINE_ROOT + "/modules/NGS/markdups2.groovy"
6 | 
7 | 


--------------------------------------------------------------------------------
/modules/NGS/mergebam.groovy:
--------------------------------------------------------------------------------
 1 | MergeBam = {
 2 |     doc title: "Merge bam files",
 3 |         desc:  "Merges bam files following any given pipeline defined pattern",
 4 |         constraints: "Unless modified, the name for the merged replicates will be determined by removing the pattern _rep[1-9] from the name of the first input. Change the code bellow if the pattern of your samples is different",
 5 |         bpipe_version: "tested with bpipe 0.9.9.3",
 6 |         author: "Antonio Domingues"
 7 | 
 8 |     output.dir = MergeBam_vars.outdir
 9 |     def EXP = input1.split("/")[-1].replaceAll(".bam", "").replaceAll("_rep\\d+", "")
10 | 
11 |     def TOOL_ENV = prepare_tool_env("samtools", tools["samtools"]["version"], tools["samtools"]["runenv"])
12 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
13 | 
14 |     // run the chunk
15 |     produce(EXP + ".merged.bam") {
16 |         exec """
17 |             ${TOOL_ENV} &&
18 |             ${PREAMBLE} &&
19 | 
20 |             echo $inputs &&
21 |             samtools merge $output $inputs
22 |         ""","MergeBam"
23 |     }
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/modules/NGS/mergebam.header:
--------------------------------------------------------------------------------
1 | MergeBam_vars=[
2 |     outdir: MAPPED + "/merged"
3 | ]
4 | 
5 | load PIPELINE_ROOT + "/modules/NGS/mergebam.groovy"
6 | 
7 | 


--------------------------------------------------------------------------------
/modules/NGS/multiqc.groovy:
--------------------------------------------------------------------------------
 1 | MultiQC = {
 2 |     doc title: "MultiQC",
 3 |         desc:  "MultiQC is a reporting tool that parses summary statistics from results and log files generated by other bioinformatics tools",
 4 |         constraints: "",
 5 |         bpipe_version: "tested with bpipe 0.9.9.7",
 6 |         author: "Giuseppe Petrosino"
 7 | 
 8 |     output.dir = MultiQC_vars.outdir
 9 |     def MultiQC_FLAGS =
10 |         (MultiQC_vars.extra ? " " + MultiQC_vars.extra : "")
11 | 
12 |     def TOOL_ENV = prepare_tool_env("multiqc", tools["multiqc"]["version"], tools["multiqc"]["runenv"])
13 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
14 | 
15 |     produce("multiqc_report.html") {
16 |         exec """
17 |             ${TOOL_ENV} &&
18 |             ${PREAMBLE} &&
19 | 
20 |             multiqc $ESSENTIAL_PROJECT $MultiQC_FLAGS -o $output.dir
21 |         ""","MULTIQC"
22 |     }
23 | }
24 | 


--------------------------------------------------------------------------------
/modules/NGS/multiqc.header:
--------------------------------------------------------------------------------
1 | MultiQC_vars=[
2 |     outdir: QC + "/multiqc",
3 |     extra : "--ignore .bpipe/"
4 | ]
5 | 
6 | load PIPELINE_ROOT + "/modules/NGS/multiqc.groovy"
7 | 
8 | 


--------------------------------------------------------------------------------
/modules/NGS/rmdups.groovy:
--------------------------------------------------------------------------------
 1 | RmDups = {
 2 |     doc title: "RmDups",
 3 |         desc:  "Call picard tools to mark with/without removing duplicated reads from a bam file",
 4 |         constraints: "Picard tools version >= 1.141. Expects an env var called `picard` with the path to picard's jar",
 5 |         bpipe_version: "tested with bpipe 0.9.8.7",
 6 |         author: "Sergi Sayols"
 7 | 
 8 |     output.dir = RmDups_vars.outdir
 9 |     def RMDUPS_FLAGS =
10 |         " REMOVE_DUPLICATES=" + (RmDups_vars.remove_dups   ? "TRUE" : "FALSE") +
11 |         " ASSUME_SORTED="     + (RmDups_vars.assume_sorted ? "TRUE" : "FALSE") +
12 |         (RmDups_vars.extra ? " " + RmDups_vars.extra : "" )
13 | 
14 |     def TOOL_ENV = prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && " +
15 |                    prepare_tool_env("picard", tools["picard"]["version"], tools["picard"]["runenv"])
16 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
17 | 
18 |     transform(".bam") to (".duprm.bam") {
19 |         exec """
20 |             ${TOOL_ENV} &&
21 |             ${PREAMBLE} &&
22 | 
23 |             java ${RmDups_vars.java_flags} -jar \${PICARD} MarkDuplicates $RMDUPS_FLAGS INPUT=$input OUTPUT=$output METRICS_FILE=${input.prefix}_duprm_dupmetrics.tsv TMP_DIR=\${TMP}
24 |         ""","RmDups"
25 |     }
26 | }
27 | 
28 | 


--------------------------------------------------------------------------------
/modules/NGS/rmdups.header:
--------------------------------------------------------------------------------
 1 | RmDups_vars=[
 2 |     outdir       : MAPPED,
 3 |     java_flags   : "-Xmx5000m",
 4 |     remove_dups  : true,
 5 |     assume_sorted: true,
 6 |     extra        : ""
 7 | ]
 8 | 
 9 | load PIPELINE_ROOT + "/modules/NGS/rmdups.groovy"
10 | 
11 | // expected parameter types
12 | class RmDups_vars_schema {
13 |     String outdir
14 |     String java_flags
15 |     Boolean remove_dups
16 |     Boolean assume_sorted
17 |     String extra
18 | 
19 |     // check for the presence of mandatory params
20 |     boolean asBoolean() {
21 |       outdir
22 |     }
23 | }
24 | 
25 | validate_schema(RmDups_vars_schema, RmDups_vars)
26 | 


--------------------------------------------------------------------------------
/modules/NGS/samtoolscov.groovy:
--------------------------------------------------------------------------------
 1 | samtoolscov = {
 2 |     doc title: "samtoolscov",
 3 |         desc:  "Call samtools to generate coverage statistics",
 4 |         constraints: "",
 5 |         bpipe_version: "tested with bpipe 0.9.9.8",
 6 |         author: "Frank Rühle"
 7 | 
 8 |     output.dir = samtoolscov_vars.outdir
 9 |     def SAMTOOLSCOV_FLAGS = 
10 |         (samtoolscov_vars.extra ? " " + samtoolscov_vars.extra : "")
11 | 
12 |     def TOOL_ENV = prepare_tool_env("samtools", tools["samtools"]["version"], tools["samtools"]["runenv"])
13 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
14 | 
15 |     transform(".bam\$") to(".coverage.txt") {
16 |         exec """
17 |             ${TOOL_ENV} &&
18 |             ${PREAMBLE} &&
19 | 
20 |             samtools coverage $SAMTOOLSCOV_FLAGS -o $output $input
21 |         ""","samtoolscov"
22 |     }
23 | 
24 |     forward input
25 | }
26 | 
27 | 


--------------------------------------------------------------------------------
/modules/NGS/samtoolscov.header:
--------------------------------------------------------------------------------
1 | samtoolscov_vars=[
2 |     outdir: QC + "/samtoolscov",
3 |     extra : " "
4 | ]
5 | 
6 | load PIPELINE_ROOT + "/modules/NGS/samtoolscov.groovy"
7 | 
8 | 


--------------------------------------------------------------------------------
/modules/NGS/strandSpecificBW.header:
--------------------------------------------------------------------------------
 1 | strandBigWig_vars=[
 2 |     outdir               : TRACKS + "/strandspecific",
 3 |     threads              : Integer.toString(ESSENTIAL_THREADS),
 4 |     stranded             : ESSENTIAL_STRANDED,
 5 |     binSize              : "10",
 6 |     normalizeUsing       : "CPM",
 7 |     skipNonCoveredRegions: true,
 8 |     outFileFormat        : "bedgraph",
 9 |     extra                : ""
10 | ]
11 | 
12 | load PIPELINE_ROOT + "/modules/NGS/strandSpecificBW.groovy"
13 | 
14 | // expected parameter types
15 | class strandBigWig_vars_schema {
16 |     String outdir               
17 |     String threads              
18 |     String stranded             
19 |     String binSize              
20 |     String normalizeUsing       
21 |     Boolean skipNonCoveredRegions
22 |     String outFileFormat        
23 |     String extra                
24 | 
25 |     // check for the presence of mandatory params
26 |     boolean asBoolean() {
27 |       outdir && stranded
28 |     }
29 | }
30 | 
31 | validate_schema(strandBigWig_vars_schema, strandBigWig_vars)
32 | 


--------------------------------------------------------------------------------
/modules/NGS/trackhub.groovy:
--------------------------------------------------------------------------------
 1 | trackhub = {
 2 |     doc title: "Trackhub",
 3 |         desc:  "Generate UCSC track hub to display project tracks",
 4 |         constraints: "Uses configuration file, which should have been generated earlier",
 5 |         bpipe_version: "tested with bpipe 0.9.9.3",
 6 |         author: "Martin Oti"
 7 | 
 8 |     output.dir = trackhub_vars.tracksdir  // location of "trackhub.done" file
 9 |     def TRACKHUB_FLAGS =
10 |         (trackhub_vars.config ? "TRACKHUB_CONFIG=" + trackhub_vars.config : "")
11 | 
12 |     def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"]) + " && " +
13 |                    prepare_tool_env("kentutils", tools["kentutils"]["version"], tools["kentutils"]["runenv"])
14 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
15 | 
16 |     transform(".yaml") to (".done") {
17 |         exec """
18 |             ${TOOL_ENV} &&
19 |             ${PREAMBLE} &&
20 | 
21 |             Rscript ${PIPELINE_ROOT}/tools/trackhub/Make_Trackhub.R $TRACKHUB_FLAGS
22 |         ""","trackhub"
23 |     }
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/modules/NGS/trackhub.header:
--------------------------------------------------------------------------------
1 | trackhub_vars=[
2 |     tracksdir : TRACKS,                 // folder with tracks & trackhub configuration file
3 |     config: TRACKS + "/trackhub.yaml"  // trackhub configuration file
4 | ]
5 | 
6 | load PIPELINE_ROOT + "/modules/NGS/trackhub.groovy"
7 | 
8 | 


--------------------------------------------------------------------------------
/modules/NGS/trackhub_config.header:
--------------------------------------------------------------------------------
 1 | trackhub_config_vars=[
 2 |     ftpurlbase: "https://hpc1.imb.uni-mainz.de/public",// public FTP base URL
 3 |     ftpbase   : "/fsimb/services/ftp/public/",         // public FTP root folder
 4 |     ucsccfg   : "/fsimb/common/tools/ucsc/config/",    // folder with UCSC tools configuration files (e.g. narrowPeak.as/broadPeak.as)
 5 |     targets   : PROJECT + "/targets.txt",              // targets file describing the samples for ChIP-seq
 6 |     peaksdir  : RESULTS + "/macs2",                    // location of peak files for ChIP-seq (comment out if no peak files)
 7 |     tracksdir : TRACKS,                                // location of track files for putting into trackhub
 8 |     config    : TRACKS + "/trackhub.yaml",            // trackhub configuration file
 9 |     project   : PROJECT,
10 |     db        : ESSENTIAL_DB,                          // UCSC genome assembly, e.g. "hg19
11 |     chromsizes: ESSENTIAL_CHROMSIZES,                  // chromosome sizes file
12 |     stranded  : ESSENTIAL_STRANDED,                    // stranded sequencing or not, for strans-specific bigwig creation
13 |     tracks    : TRACKS                                 // full path of project subdirectory containing tracks
14 | ]
15 | 
16 | load PIPELINE_ROOT + "/modules/NGS/trackhub_config.groovy"
17 | 
18 | 


--------------------------------------------------------------------------------
/modules/RNAseq/GO_Enrichment.groovy:
--------------------------------------------------------------------------------
 1 | GO_Enrichment = {
 2 |     doc title: "GO_Enrichment",
 3 |         desc: "Gene Ontology enrichment analysis",
 4 |         constraints: "",
 5 |         bpipe_version: "",
 6 |         author: ""
 7 | 
 8 |     output.dir = GO_Enrichment_vars.outdir
 9 |     def GO_Enrichment_FLAGS =
10 |         (GO_Enrichment_vars.log2fold ? " log2Fold="     + GO_Enrichment_vars.log2fold : "" ) +
11 |         (GO_Enrichment_vars.padj     ? " padj="         + GO_Enrichment_vars.padj     : "" ) +
12 |         (GO_Enrichment_vars.org      ? " organism="     + GO_Enrichment_vars.org      : "" ) +
13 |         (GO_Enrichment_vars.univ     ? " univ="         + GO_Enrichment_vars.univ     : "" ) +
14 |         (GO_Enrichment_vars.type     ? " type="         + GO_Enrichment_vars.type     : "" ) +
15 |         (GO_Enrichment_vars.category ? " plotCategory=" + GO_Enrichment_vars.category : "" ) +
16 |         (GO_Enrichment_vars.outdir   ? " out="          + GO_Enrichment_vars.outdir   : "" ) +
17 |         (GO_Enrichment_vars.cores    ? " cores="        + GO_Enrichment_vars.cores    : "" ) +
18 |         (GO_Enrichment_vars.extra    ? " "              + GO_Enrichment_vars.extra    : "" ) 
19 | 
20 |     def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"])
21 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
22 | 
23 |     transform(".RData") to("_GO.done") {
24 |         exec """
25 |             ${TOOL_ENV} &&
26 |             ${PREAMBLE} &&
27 | 
28 |             touch $output &&
29 |             Rscript ${PIPELINE_ROOT}/tools/GO_Enrichment/GO_Enrichment.R rData=$input $GO_Enrichment_FLAGS &&
30 |             if [ \$? -ne 0 ]; then rm $output; fi;
31 |         ""","GO_Enrichment"
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/modules/RNAseq/GO_Enrichment.header:
--------------------------------------------------------------------------------
 1 | GO_Enrichment_vars=[
 2 |     rdata   : DE_DESeq2_vars.outdir,
 3 |     log2fold: Double.toString(Math.log(ESSENTIAL_DESEQ2_FC)/Math.log(2)),
 4 |     padj    : ESSENTIAL_DESEQ2_FDR,
 5 |     org     : ESSENTIAL_ORG,
 6 |     univ    : "expressed",
 7 |     type    : "gene_name",
 8 |     category: "20",
 9 |     outdir  : RESULTS + "/GO_Analysis",
10 |     cores   : Integer.toString(ESSENTIAL_THREADS),
11 |     extra   : ""
12 | ]
13 | 
14 | load PIPELINE_ROOT + "/modules/RNAseq/GO_Enrichment.groovy"
15 | 
16 | // expected parameter types
17 | class GO_Enrichment_vars_schema {
18 |     String rdata   
19 |     String log2fold
20 |     BigDecimal padj    
21 |     String org     
22 |     String univ    
23 |     String type    
24 |     String category
25 |     String outdir  
26 |     String cores   
27 |     String extra   
28 | 
29 |     // check for the presence of mandatory params
30 |     boolean asBoolean() {
31 |       outdir && padj >= 0 && padj <= 1
32 |     }
33 | }
34 | 
35 | validate_schema(GO_Enrichment_vars_schema, GO_Enrichment_vars)
36 | 


--------------------------------------------------------------------------------
/modules/RNAseq/dupradar.header:
--------------------------------------------------------------------------------
 1 | dupRadar_vars=[
 2 |     outdir  : QC + "/dupRadar",    //output dir. If you change it here, change it in the module file also
 3 |     stranded: ESSENTIAL_STRANDED,  // strandness
 4 |     paired  : ESSENTIAL_PAIRED,    // is a paired end experiment
 5 |     threads : Integer.toString(ESSENTIAL_THREADS), // number of threads to be used
 6 |     gtf     : ESSENTIAL_GENESGTF,  // gene model
 7 |     extra   : "",                  // extra parms sent to the tool
 8 | ]
 9 | 
10 | load PIPELINE_ROOT + "/modules/RNAseq/dupradar.groovy"
11 | 
12 | 


--------------------------------------------------------------------------------
/modules/RNAseq/filter2htseq.groovy:
--------------------------------------------------------------------------------
 1 | filter2htseq = {
 2 |     doc title: "filter_featureCounts_to_htseq",
 3 |         desc: "filter featureCount output to fit HTSeq format, extract column 1 and 7 as well as skipping the header",
 4 |         constraints: "none.",
 5 |         bpipe_version: "tested with bpipe 0.9.8.7",
 6 |         author: "Oliver Drechsel"
 7 | 
 8 |     output.dir = filter2htseq_vars.outdir
 9 | 
10 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
11 | 
12 |     transform(".raw_readcounts.tsv") to (".readcounts.tsv") {
13 |         exec """
14 |             ${PREAMBLE} &&
15 | 
16 |             tail -n +3 $input | awk '{print \$1\"\\t\"\$7}' > $output
17 |         """
18 |     }
19 | }
20 | 
21 | 


--------------------------------------------------------------------------------
/modules/RNAseq/filter2htseq.header:
--------------------------------------------------------------------------------
1 | filter2htseq_vars=[
2 |     outdir: subread_count_vars.outdir
3 | ]
4 | 
5 | load PIPELINE_ROOT + "/modules/RNAseq/filter2htseq.groovy"
6 | 
7 | 


--------------------------------------------------------------------------------
/modules/RNAseq/genebodycov.groovy:
--------------------------------------------------------------------------------
 1 | geneBodyCov = {
 2 |     doc title: "geneBodyCoverage",
 3 |         desc: "Calculate the RNA-seq coverage over gene body. Useful to check the 5' or 3' coverage bias",
 4 |         constraints: "",
 5 |         bpipe_version: "tested with bpipe 0.9.8.7",
 6 |         author: "Sergi Sayols"
 7 | 
 8 |     output.dir = geneBodyCov_vars.outdir
 9 |     def GENEBODYCOV_FLAGS =
10 |         (geneBodyCov_vars.format ? " -f " + geneBodyCov_vars.format : "" ) +
11 |         (geneBodyCov_vars.bed    ? " -r " + geneBodyCov_vars.bed    : "" ) +
12 |         (geneBodyCov_vars.extra  ? " "    + geneBodyCov_vars.extra  : "" ) 
13 | 
14 |     def TOOL_ENV = prepare_tool_env("rseqc", tools["rseqc"]["version"], tools["rseqc"]["runenv"])
15 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
16 | 
17 |     // run the chunk
18 |     transform(".bam") to (".geneBodyCoverage.curves.png", ".geneBodyCoverage.r", ".geneBodyCoverage.txt") {
19 |         exec """
20 |             ${TOOL_ENV} &&
21 |             ${PREAMBLE} &&
22 | 
23 |             geneBody_coverage.py -i $input -o ${output3.prefix.prefix} $GENEBODYCOV_FLAGS
24 |         ""","geneBodyCov"
25 |     }
26 |     forward input
27 | }
28 | 


--------------------------------------------------------------------------------
/modules/RNAseq/genebodycov.header:
--------------------------------------------------------------------------------
 1 | geneBodyCov_vars=[
 2 |     outdir: QC + "/geneBodyCov",
 3 |     bed   : ESSENTIAL_GENESBED, // gene model
 4 |     format: "png",              // image format
 5 |     extra : ""                  // extra parms to sent to the tool
 6 | ]
 7 | 
 8 | load PIPELINE_ROOT + "/modules/RNAseq/genebodycov.groovy"
 9 | 
10 | 


--------------------------------------------------------------------------------
/modules/RNAseq/genebodycov2.groovy:
--------------------------------------------------------------------------------
 1 | geneBodyCov2 = {
 2 |     doc title: "geneBodyCoverage2",
 3 |         desc:  """Calculate the RNA-seq coverage over gene body. 
 4 |             Useful to check the 5' or 3' coverage bias""",
 5 |         constraints: "",
 6 |         bpipe_version: "tested with bpipe 0.9.9.9",
 7 |         author: "Sergi Sayols"
 8 | 
 9 |     output.dir = geneBodyCov2_vars.outdir
10 |     def GENEBODYCOV2_FLAGS =
11 |         (geneBodyCov2_vars.gtf      ? " gtf="      + geneBodyCov2_vars.gtf      : "" ) +
12 |         (geneBodyCov2_vars.paired   ? " paired="   + geneBodyCov2_vars.paired   : "" ) +
13 |         (geneBodyCov2_vars.stranded ? " stranded=" + geneBodyCov2_vars.stranded : "" ) +
14 |         (geneBodyCov2_vars.outdir   ? " outdir="   + geneBodyCov2_vars.outdir   : "" ) +
15 |         (geneBodyCov2_vars.threads  ? " threads="  + geneBodyCov2_vars.threads  : "" ) 
16 | 
17 |     def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"])
18 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
19 | 
20 |     // run the chunk
21 |     transform(".bam") to ("_geneBodyCov.png") {
22 |         exec """
23 |             ${TOOL_ENV} &&
24 |             ${PREAMBLE} &&
25 | 
26 |             if [[ ! -e "$output.dir" ]]; then
27 |                 mkdir -p "$output.dir";
28 |             fi &&
29 | 
30 |             Rscript ${PIPELINE_ROOT}/tools/geneBodyCov/geneBodyCov.R bam=$input $GENEBODYCOV2_FLAGS
31 |         ""","geneBodyCov2"
32 |     }
33 |     forward input
34 | }
35 | 


--------------------------------------------------------------------------------
/modules/RNAseq/genebodycov2.header:
--------------------------------------------------------------------------------
 1 | geneBodyCov2_vars=[
 2 |     gtf     : ESSENTIAL_GENESGTF,  // the gencode annotation GTF (can be compressed)
 3 |     paired  : ESSENTIAL_PAIRED,    // paired end yes|no
 4 |     stranded: ESSENTIAL_STRANDED,  // strandness yes|no|reverse
 5 |     outdir  : QC + "/geneBodyCov",
 6 |     threads : Integer.toString(ESSENTIAL_THREADS) // number of cores to use
 7 | ]
 8 | 
 9 | load PIPELINE_ROOT + "/modules/RNAseq/genebodycov2.groovy"
10 | 
11 | 


--------------------------------------------------------------------------------
/modules/RNAseq/inferexperiment.groovy:
--------------------------------------------------------------------------------
 1 | inferexperiment = {
 2 |     doc title: "inferexperiment",
 3 |         desc:  "Calculate the strand-specificity of the library",
 4 |         constraints: "",
 5 |         bpipe_version: "tested with bpipe 0.9.8.7",
 6 |         author: "Nastasja Kreim"
 7 | 
 8 |     output.dir = inferexperiment_vars.outdir
 9 | 
10 |     def INFEREXPERIMENT_FLAGS =
11 |         (inferexperiment_vars.bed   ? " -r " + inferexperiment_vars.bed   : "" ) +
12 |         (inferexperiment_vars.extra ? " "    + inferexperiment_vars.extra : "" )
13 | 
14 |     def TOOL_ENV = prepare_tool_env("rseqc", tools["rseqc"]["version"], tools["rseqc"]["runenv"])
15 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
16 | 
17 |     // run the chunk
18 |     transform(".bam") to (input.prefix + "_inferexperiment.txt") {
19 |         exec """
20 |             ${TOOL_ENV} &&
21 |             ${PREAMBLE} &&
22 | 
23 |             infer_experiment.py -i $input $INFEREXPERIMENT_FLAGS > $output
24 |         ""","inferexperiment"
25 |     }
26 | 
27 |     forward input
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/modules/RNAseq/inferexperiment.header:
--------------------------------------------------------------------------------
1 | inferexperiment_vars=[
2 |     outdir: QC + "/inferexperiment",
3 |     bed   : ESSENTIAL_GENESBED, //this variable is essential for the module to run do not set it to the empty string! 
4 |     extra : "-s 4000000"        //add other options here in this case it is the samples size (how many reads should be samples from the bam file) 
5 | ]
6 | 
7 | load PIPELINE_ROOT + "/modules/RNAseq/inferexperiment.groovy"
8 | 
9 | 


--------------------------------------------------------------------------------
/modules/RNAseq/prermats.header:
--------------------------------------------------------------------------------
 1 | PRERMATS_vars=[
 2 |     outdir   : RESULTS + "/PRERMATS",
 3 |     targets  : "targets.txt",
 4 |     contrasts: "contrasts.txt",
 5 |     suffix  : "_targets_rMATS.txt"
 6 | ]
 7 | 
 8 | load PIPELINE_ROOT + "/modules/RNAseq/prermats.groovy"
 9 | 
10 | 


--------------------------------------------------------------------------------
/modules/RNAseq/qualimap.groovy:
--------------------------------------------------------------------------------
 1 | qualimap = {
 2 |     doc title: "Qualimap",
 3 |         desc:  "Call qualimap to do rnaseq qualitycontrol",
 4 |         author: "Nastasja Kreim"
 5 | 
 6 |     output.dir = qualimap_vars.outdir
 7 |     // no|yes|reverse
 8 |     if(qualimap_vars.stranded == "no") {
 9 |         qualimap_vars.protocol = "non-strand-specific"
10 |     }
11 |     else if (qualimap_vars.stranded == "yes") {
12 |         qualimap_vars.protocol = "strand-specific-forward"
13 |     }
14 |     else {
15 |         qualimap_vars.protocol = "strand-specific-reverse"
16 |     }
17 |     if(qualimap_vars.paired){
18 |         qualimap_vars.extra = qualimap_vars.extra + " -pe"
19 |     }
20 | 
21 |     def TOOL_ENV = prepare_tool_env("qualimap", tools["qualimap"]["version"], tools["qualimap"]["runenv"])
22 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
23 | 
24 |     transform(".bam") to("_counts.txt") {
25 |         exec """
26 |             ${TOOL_ENV} &&
27 |             ${PREAMBLE} &&
28 |     
29 |             unset DISPLAY;
30 |             echo $output.prefix;
31 |             qualimap rnaseq -bam $input -outdir ${output.prefix}_qualimap -outformat html -gtf ${qualimap_vars.genesgtf} -oc $output -p ${qualimap_vars.protocol} ${qualimap_vars.extra}
32 |         ""","qualimap"
33 |     }
34 | 
35 |     forward input
36 | }
37 | 
38 | 


--------------------------------------------------------------------------------
/modules/RNAseq/qualimap.header:
--------------------------------------------------------------------------------
 1 | qualimap_vars=[
 2 |     outdir  : QC + "/qualimap",
 3 |     stranded: ESSENTIAL_STRANDED, // options are no/yes/reverse 
 4 |     genesgtf: ESSENTIAL_GENESGTF,
 5 |     paired  : (ESSENTIAL_PAIRED == "yes"),
 6 |     extra   : "--java-mem-size=10G"
 7 | ]
 8 | 
 9 | load PIPELINE_ROOT + "/modules/RNAseq/qualimap.groovy"
10 | 
11 | 


--------------------------------------------------------------------------------
/modules/RNAseq/rmats.header:
--------------------------------------------------------------------------------
 1 | rMATS_vars=[
 2 |     outdir    : RESULTS + "/rMATS",
 3 |     suffix    : PRERMATS_vars.suffix,
 4 |     sep       : "-",
 5 |     paired    : (ESSENTIAL_PAIRED == "yes"),
 6 |     stranded  : ESSENTIAL_STRANDED,
 7 |     gtf       : ESSENTIAL_GENESGTF,
 8 |     length    : ESSENTIAL_READLENGTH,
 9 |     threads   : Integer.toString(ESSENTIAL_THREADS),
10 |     varreadlen: true,                  // allow reads with lengths that differ from ESSENTIAL_READLENGTH
11 |     allowclip : true,                  // allow alignments with soft or hard clipping to be used
12 |     novelss   : true,                  // enable detection of novel (unannotated) splice sites
13 |     extra     : " --cstat 0.0001"      // set e.g. --paired-stats if samples are paired and a paired stats model should be used
14 | ]
15 | 
16 | maser_vars=[
17 |     gtf     : ESSENTIAL_GENESGTF,      // Needs to be an ensembl annotation for the plots to work correctly
18 |     db      : ESSENTIAL_DB,
19 |     ftype   : "JCEC",                  // tells which type of splicing events to consider: juncton counts (JC) or junction-exon counts (JCEC)
20 |     mincov  : "5",                     // ignore splicing events with read coverage below this count
21 |     fdr     : "0.01",                  // FDR cut-off to select statistically significant splicing events identified by rMATS
22 |     dpsi    : "0.1"                    // minimum percentage spliced in (PSI) to include in plots
23 | ]
24 | 
25 | load PIPELINE_ROOT + "/modules/RNAseq/rmats.groovy"
26 | 
27 | 


--------------------------------------------------------------------------------
/modules/RNAseq/rnaseqc.groovy:
--------------------------------------------------------------------------------
 1 | rnaseqc = {
 2 | 	doc title: "RNA-SeQC: Basic quality control for RNA-seq",
 3 | 	desc: "efficient RNA-seq quality control and quantification for large cohorts",
 4 | 	constraints: "",
 5 | 	author: "Sivarajan Karunanithi"
 6 | 
 7 | 	output.dir = rnaseqc_vars.outdir
 8 | 	def RNASEQC_FLAGS = 
 9 | 		(rnaseqc_vars.legacy ? " --legacy" : "") + 
10 | 		(rnaseqc_vars.extra    ? " " + rnaseqc_vars.extra : "")
11 | 
12 | 	def TOOL_ENV = prepare_tool_env("rnaseqc", tools["rnaseqc"]["version"], tools["rnaseqc"]["runenv"])
13 | 	def PREAMBLE = get_preamble(stage: stageName, outdir: output.dir, input: new File(input1.prefix).getName())
14 | 
15 | 	//run the chunk
16 | 	transform(".bam") to (".bam.gene_reads.gct") {
17 | 	exec """
18 | 		${TOOL_ENV} &&
19 | 		${PREAMBLE} &&
20 | 		
21 | 		rnaseqc ${rnaseqc_vars.gtf} $input $output.dir ${RNASEQC_FLAGS};
22 | 	""","rnaseqc"
23 | 
24 | 	}
25 | }
26 | 


--------------------------------------------------------------------------------
/modules/RNAseq/rnaseqc.header:
--------------------------------------------------------------------------------
1 | rnaseqc_vars=[
2 |     outdir: RESULTS + "/RNA-SeQC",
3 |     legacy: "--legacy",
4 |     gtf   : ESSENTIAL_GENESGTF,
5 |     extra : "-u -v"
6 | ]
7 | 
8 | load PIPELINE_ROOT + "modules/RNAseq/rnaseqc.groovy"
9 | 


--------------------------------------------------------------------------------
/modules/RNAseq/starfusion.groovy:
--------------------------------------------------------------------------------
 1 | STAR_Fusion = {
 2 |     doc title: "STAR-Fusion",
 3 |         desc:  "detection of fusion transcripts from RNA-Seq data",
 4 |         constraints: "tab-delimited summary file identifying the fusion pairs. Works only with PE data",
 5 |         bpipe_version: "tested with bpipe 0.9.9",
 6 |         author: "Giuseppe Petrosino"
 7 | 
 8 |     output.dir = STAR_Fusion_vars.outdir
 9 | 
10 |     def File f = new File(input1)
11 |     def OUTPUTFILE = (f.getName() =~ /.R1.fastq.gz/).replaceFirst("")
12 | 
13 |     def STARFUSION_FLAGS =
14 |         (STAR_Fusion_vars.threads    ? " --CPU "            + STAR_Fusion_vars.threads    : "") + 
15 |         (STAR_Fusion_vars.genome_lib ? " --genome_lib_dir " + STAR_Fusion_vars.genome_lib : "") +
16 |         (STAR_Fusion_vars.extra      ? " "                  + STAR_Fusion_vars.extra      : "")
17 | 
18 |     def TOOL_ENV = prepare_tool_env("starfusion", tools["starfusion"]["version"], tools["starfusion"]["runenv"])
19 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
20 | 
21 |     produce(OUTPUTFILE + "_starfusion.done") {   // change it to whatever STAR-Fusion produces, and remove the touch $output, it's useless!
22 |         exec """
23 |             ${TOOL_ENV} &&
24 |             ${PREAMBLE} &&
25 | 
26 |             STAR-Fusion $STARFUSION_FLAGS --tmpdir \${TMP}/\$(basename $output.prefix) --left_fq $input1 --right_fq $input2 --output_dir $output.prefix &&
27 |             touch $output
28 |         ""","STAR_Fusion"
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/modules/RNAseq/starfusion.header:
--------------------------------------------------------------------------------
 1 | STAR_Fusion_vars=[
 2 |     outdir     : FUSION,
 3 |     threads    :  "--CPU " + Integer.toString(ESSENTIAL_THREADS),
 4 |     genome_lib :  "--genome_lib_dir " + ESSENTIAL_STARFUSION_LIB,
 5 |     extra      : ""
 6 | ]
 7 | 
 8 | load PIPELINE_ROOT + "/modules/RNAseq/starfusion.groovy"
 9 | 
10 | 


--------------------------------------------------------------------------------
/modules/RNAseq/stringtie.header:
--------------------------------------------------------------------------------
 1 | StringTie_vars=[
 2 |     outdir  : RESULTS + "/stringtie",
 3 |     gtf     : ESSENTIAL_GENESGTF, 
 4 |     stranded: ESSENTIAL_STRANDED,
 5 |     threads : Integer.toString(ESSENTIAL_THREADS),
 6 |     extra   : "-f 0.1 -B -e"
 7 | ]
 8 | 
 9 | load PIPELINE_ROOT + "/modules/RNAseq/stringtie.groovy"
10 | 
11 | 


--------------------------------------------------------------------------------
/modules/RNAseq/subread.header:
--------------------------------------------------------------------------------
 1 | subread_count_vars=[
 2 |     outdir  : RESULTS + "/subread-count",
 3 |     stranded: ESSENTIAL_STRANDED,           //whether the data is from a strand-specific assay (illumina SR: always reverse)
 4 |     paired  : (ESSENTIAL_PAIRED == "yes"),  //paired end design
 5 |     feature : "exon",                       //annotation feature to count mapped reads ("exon" by default)
 6 |     genesgtf: ESSENTIAL_GENESGTF,
 7 |     threads : Integer.toString(ESSENTIAL_THREADS),
 8 |     extra   : ""                            //extra parms to sent to the tool
 9 | ]
10 | 
11 | load PIPELINE_ROOT + "/modules/RNAseq/subread.groovy"
12 | 
13 | 


--------------------------------------------------------------------------------
/modules/RNAseq/subread2rnatypes.header:
--------------------------------------------------------------------------------
 1 | subread2rnatypes_vars=[
 2 |     outdir    : QC + "/RNAtypes",
 3 |     stranded  : ESSENTIAL_STRANDED,              //whether the data is from a strand-specific assay (illumina SR: always reverse)
 4 |     paired    : (ESSENTIAL_PAIRED == "yes"),     //paired end design
 5 |     genesgtf  : ESSENTIAL_GENESGTF,
 6 |     feature   : "exon",                          // type of feature that is to be counted in
 7 |     accumulate: ESSENTIAL_FEATURETYPE,           // type of annotation counts should be accumulated on. Usually that would be gene_id, but in this case we choose gene_biotype
 8 |     threads   : Integer.toString(ESSENTIAL_THREADS),
 9 |     extra     : ""                               // extra parms to sent to the tool
10 | ]
11 | 
12 | load PIPELINE_ROOT + "/modules/RNAseq/subread2rnatypes.groovy"
13 | 
14 | 


--------------------------------------------------------------------------------
/modules/RNAseq/tpm.groovy:
--------------------------------------------------------------------------------
 1 | tpm = {
 2 |     doc title: "tpm",
 3 |         desc:  "Calculation TPMs based on raw counts",
 4 |         constraints: "",
 5 |         bpipe_version: "",
 6 |         author: "Anke Busch"
 7 | 
 8 |     output.dir  = tpm_vars.outdir
 9 |     def TPM_FLAGS =
10 |         (tpm_vars.genesgtf ? " -g " + tpm_vars.genesgtf : "") +
11 |         (tpm_vars.feature  ? " -f " + tpm_vars.feature  : "") +
12 |         (tpm_vars.extra    ? " "    + tpm_vars.extra    : "")
13 | 
14 |     def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"])
15 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
16 | 
17 |     // run the chunk
18 |     transform(".readcounts.tsv") to (".tpm.tsv") {
19 |         exec """
20 |             ${TOOL_ENV} &&
21 |             ${PREAMBLE} &&
22 | 
23 |              Rscript ${PIPELINE_ROOT}/tools/TPMs/TPMs.R -c $input -o $output $TPM_FLAGS
24 |     
25 |         ""","tpm"
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/modules/RNAseq/tpm.header:
--------------------------------------------------------------------------------
 1 | tpm_vars=[
 2 |     outdir  : RESULTS + "/TPMs",
 3 |     genesgtf: ESSENTIAL_GENESGTF,
 4 |     feature : "exon",              // considered features in genes
 5 |     extra   : ""                   // extra parms to sent to the tool
 6 | ]
 7 | 
 8 | load PIPELINE_ROOT + "/modules/RNAseq/tpm.groovy"
 9 | 
10 | 


--------------------------------------------------------------------------------
/modules/RNAseqVariantCalling/add_read_group.groovy:
--------------------------------------------------------------------------------
 1 | AddRG = {
 2 |     doc title: "AddReadGroup",
 3 |         desc: "Adds reads groups to bam as part of the GATK pipeline",
 4 |         constraints: "Picard tools version >= 1.141"
 5 |         author: "Antonio Domingues"
 6 | 
 7 |     output.dir = AddRG_vars.outdir
 8 | 
 9 |     File f = new File(input1)
10 |     def EXP = (f.getName() =~ /.bam/).replaceFirst("")
11 | 
12 |     def TOOL_ENV = prepare_tool_env("picard", tools["picard"]["version"], tools["picard"]["runenv"])
13 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
14 | 
15 |     transform(".bam") to (".rg.bam"){
16 |         exec """
17 |             ${TOOL_ENV} &&
18 |             ${PREAMBLE} &&
19 | 
20 |             java $JAVA_FLAGS -jar \${PICARD} AddOrReplaceReadGroups I=$input O=$output SO=coordinate RGID=${EXP} RGLB=${EXP} RGPL=illumina RGPU=genomics RGSM=${EXP}
21 |         ""","AddRG"
22 |     }
23 | }
24 | 


--------------------------------------------------------------------------------
/modules/RNAseqVariantCalling/add_read_group.header:
--------------------------------------------------------------------------------
1 | AddRG_vars=[
2 |     outdir    : STAR_pe_2nd_vars.outdir,
3 |     java_flags: "-Xmx5000m"
4 | ]
5 | 
6 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/add_read_group.groovy"
7 | 
8 | 


--------------------------------------------------------------------------------
/modules/RNAseqVariantCalling/base_recalibration.groovy:
--------------------------------------------------------------------------------
 1 | BaseRecalibration = {
 2 |     doc title: "GATK BaseRecalibrator",
 3 |         desc: "Recalibrate Base Qualities in BAM files, using GATK.",
 4 |         constraints: "GATK version >= 3.5",
 5 |         author: "Antonio Domingues"
 6 | 
 7 |     output.dir = BaseRecalibration_vars.outdir
 8 | 
 9 |     def BaseRecalibrator_FLAGS =
10 |         (BaseRecalibration_vars.vcf_ref    ? " -knownSites " + BaseRecalibration_vars.vcf_ref    : "" ) +
11 |         (BaseRecalibration_vars.threads    ? " -nct "        + BaseRecalibration_vars.threads    : "" ) +
12 |         (BaseRecalibration_vars.genome_ref ? " -R "          + BaseRecalibration_vars.genome_ref : "" )
13 | 
14 |     def PrintReads_FLAGS =
15 |         (BaseRecalibration_vars.threads    ? " -nct " + BaseRecalibration_vars.threads    : "" ) +
16 |         (BaseRecalibration_vars.genome_ref ? " -R "   + BaseRecalibration_vars.genome_ref : "" )
17 | 
18 |     def TOOL_ENV = prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && " +
19 |                    prepare_tool_env("gatk", tools["gatk"]["version"], tools["gatk"]["runenv"])
20 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
21 | 
22 |     transform (".bam") to (".recalibration.table", ".recalibrated.bam"){
23 |         exec """
24 |             ${TOOL_ENV} &&
25 |             ${PREAMBLE} &&
26 | 
27 |             java ${BaseRecalibration_vars.java_flags} -Djava.io.tmpdir=\${TMP} -jar \${gatk} -T BaseRecalibrator $BaseRecalibrator_FLAGS -I $input -o $output1 &&
28 |             java ${BaseRecalibration_vars.java_flags} -Djava.io.tmpdir=\${TMP} -jar \${gatk} -T PrintReads $PrintReads_FLAGS -I $input -BQSR $output1 -o $output2
29 |         ""","BaseRecalibration"
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/modules/RNAseqVariantCalling/base_recalibration.header:
--------------------------------------------------------------------------------
 1 | BaseRecalibration_vars=[
 2 |     outdir    : STAR_pe_2nd_vars.outdir,
 3 |     java_flags: "-Xmx2400m",
 4 |     threads   : Integer.toString(ESSENTIAL_THREADS),
 5 |     genome_ref: ESSENTIAL_GENOME_REF,
 6 |     vcf_ref   : ESSENTIAL_VCF_REF
 7 | ]
 8 | 
 9 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/base_recalibration.groovy"
10 | 
11 | 


--------------------------------------------------------------------------------
/modules/RNAseqVariantCalling/create_star_index_sjdb.header:
--------------------------------------------------------------------------------
 1 | GenerateStarIndexFromSJ_vars=[
 2 |     outdir    : MAPPED + "/sjdbStarIndex",
 3 |     sjdbfile  : MAPPED + "/sjdbStarIndex/SJ.out.tab.Pass1.sjdb",
 4 |     outdir_2nd_index: FilterAndMergeSJtab_vars.outdir,
 5 |     threads   : Integer.toString(ESSENTIAL_THREADS),
 6 |     genome_ref: ESSENTIAL_GENOME_REF,
 7 |     maxram    : STAR_pe_vars.maxram,
 8 |     bufsize   : STAR_pe_vars.bufsize,
 9 |     overhang  : STAR_pe_vars.overhang,
10 |     extra     : ""
11 | ]
12 | 
13 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/create_star_index_sjdb.groovy"
14 | 
15 | 


--------------------------------------------------------------------------------
/modules/RNAseqVariantCalling/mark_dups.groovy:
--------------------------------------------------------------------------------
 1 | MarkDups = {
 2 |     doc title: "MarkDups",
 3 |         desc: "Call picard tools to mark with/without removing duplicated reads from a bam file",
 4 |         constraints: "Picard tools version >= 1.141"
 5 |         author: "Sergi Sayols, modified by Antonio Domingues"
 6 | 
 7 |     output.dir = MarkDups_vars.outdir
 8 |     def MarkDups_FLAGS =
 9 |         " REMOVE_DUPLICATES=" + (MarkDups_vars.remove_dups   ? "TRUE" : "FALSE") +
10 |         " CREATE_INDEX="      + (MarkDups_vars.index         ? "TRUE" : "FALSE") +
11 |         " ASSUME_SORTED="     + (MarkDups_vars.assume_sorted ? "TRUE" : "FALSE") +
12 |         (MarkDups_vars.validation ? " VALIDATION_STRINGENCY=" + MarkDups_vars.validation : "") +
13 |         (MarkDups_vars.extra      ? " "                       + MarkDups_vars.extra      : "")
14 | 
15 |     def TOOL_ENV = prepare_tool_env("picard", tools["picard"]["version"], tools["picard"]["runenv"])
16 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
17 | 
18 |     transform(".rg.bam") to (".rg.duprm.bam"){
19 |         exec """
20 |             ${TOOL_ENV} &&
21 |             ${PREAMBLE} &&
22 | 
23 |             java ${MarkDups_vars.java_flags} -jar \${PICARD} MarkDuplicates $MarkDups_FLAGS INPUT=$input OUTPUT=$output METRICS_FILE=${input.prefix}_dupmetrics.tsv
24 |         ""","MarkDups"
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/modules/RNAseqVariantCalling/mark_dups.header:
--------------------------------------------------------------------------------
 1 | MarkDups_vars=[
 2 |     outdir       : MAPPED,
 3 |     java_flags   : "-Xmx5000m",
 4 |     remove_dups  : false,
 5 |     index        : true,
 6 |     assume_sorted: true,
 7 |     validation   : "SILENT",
 8 |     extra        : ""
 9 | ]
10 | 
11 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/mark_dups.groovy"
12 | 
13 | // expected parameter types
14 | class MarkDups_vars_schema {
15 |     String outdir
16 |     String java_flags
17 |     Boolean remove_dups
18 |     Boolean index
19 |     Boolean assume_sorted
20 |     String validation
21 |     String extra
22 | 
23 |     // check for the presence of mandatory params
24 |     boolean asBoolean() {
25 |       outdir
26 |     }
27 | }
28 | 
29 | validate_schema(MarkDups_vars_schema, MarkDups_vars)
30 | 


--------------------------------------------------------------------------------
/modules/RNAseqVariantCalling/merge_SJ_tab.groovy:
--------------------------------------------------------------------------------
 1 | FilterAndMergeSJtab = {
 2 |     doc title: "FilterAndMergeSJtab",
 3 |         desc: "GATK variant calling suggests 2-step STAR mapping for RNA-seq. In this steps all splice junctions files are collected, filtered and merged. Based on https://code.google.com/p/rna-star/issues/detail?id=7",
 4 |         constraints: "STAR STAR_2.4.2a",
 5 |         author: "Antonio Domingues"
 6 | 
 7 |     output.dir = FilterAndMergeSJtab_vars.outdir
 8 | 
 9 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
10 | 
11 |     produce("SJ.out.tab.Pass1.sjdb"){
12 |         exec """
13 |             ${PREAMBLE} &&
14 | 
15 |             cat $inputs | awk 'BEGIN {OFS="\t"; strChar[0]="."; strChar[1]="+"; strChar[2]="-";} {if(\$5>0){print \$1,\$2,\$3,strChar[\$4]}}' > $output
16 |         ""","FilterAndMergeSJtab"
17 |     }
18 | }
19 | 


--------------------------------------------------------------------------------
/modules/RNAseqVariantCalling/merge_SJ_tab.header:
--------------------------------------------------------------------------------
1 | FilterAndMergeSJtab_vars=[
2 |     outdir: MAPPED + "/sjdbStarIndex"
3 | ]
4 | 
5 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/merge_SJ_tab.groovy"
6 | 
7 | 


--------------------------------------------------------------------------------
/modules/RNAseqVariantCalling/splitNcigar.groovy:
--------------------------------------------------------------------------------
 1 | SplitNCigarReads = {
 2 |     doc title: "GATK SplitNCigarReads",
 3 |         desc: "Splits reads into exon segments (getting rid of Ns but maintaining grouping information) and hard-clip any sequences overhanging into the intronic regions",
 4 |         constraints: "GATK version >= 3.5",
 5 |         author: "Antonio Domingues"
 6 | 
 7 |     output.dir = SplitNCigarReads_vars.outdir
 8 | 
 9 |     def SplitNCigarReads_FLAGS =
10 |     (SplitNCigarReads_vars.gatk_ref         ? " -R "    + SplitNCigarReads_vars.gatk_ref         : "") +
11 |     (SplitNCigarReads_vars.read_filter_flag ? " -rf "   + SplitNCigarReads_vars.read_filter_flag : "") +
12 |     (SplitNCigarReads_vars.map_q_from_flag  ? " -RMQF " + SplitNCigarReads_vars.map_q_from_flag  : "") +
13 |     (SplitNCigarReads_vars.map_q_to_flag    ? " -RMQT " + SplitNCigarReads_vars.map_q_to_flag    : "") +
14 |     (SplitNCigarReads_vars.unsafe_flag      ? " -U "    + SplitNCigarReads_vars.unsafe_flag      : "")
15 | 
16 |     def TOOL_ENV = prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && " +
17 |                    prepare_tool_env("gatk", tools["gatk"]["version"], tools["gatk"]["runenv"])
18 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
19 | 
20 |     transform (".duprm.bam") to (".duprm.split.bam"){
21 |        exec """
22 |            ${TOOL_ENV} &&
23 |            ${PREAMBLE} &&
24 | 
25 |            java ${VariantCallHC_vars.java_flags} -Djava.io.tmpdir=\${TMP} -jar \${gatk} -T SplitNCigarReads $SplitNCigarReads_FLAGS -I $input -o $output
26 |        ""","SplitNCigarReads"
27 |     }
28 | }
29 | 


--------------------------------------------------------------------------------
/modules/RNAseqVariantCalling/splitNcigar.header:
--------------------------------------------------------------------------------
 1 | SplitNCigarReads_vars=[
 2 |     outdir          : STAR_pe_2nd_vars.outdir,
 3 |     gatk_ref        : ESSENTIAL_GENOME_REF,
 4 |     gatk_threads    : Integer.toString(ESSENTIAL_THREADS),
 5 |     java_flags      : "2400m",
 6 |     read_filter_flag: "ReassignOneMappingQuality",
 7 |     map_q_from_flag : 255,
 8 |     map_q_to_flag   : 60,
 9 |     unsafe_flag     : "ALLOW_N_CIGAR_READS"
10 | ]
11 | 
12 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/splitNcigar.groovy"
13 | 
14 | // expected parameter types
15 | class SplitNCigarReads_vars_schema {
16 |     String outdir
17 |     String gatk_ref
18 |     String gatk_threads
19 |     String java_flags
20 |     String read_filter_flag
21 |     Integer map_q_from_flag
22 |     Integer map_q_to_flag
23 |     String unsafe_flag
24 | 
25 |     // check for the presence of mandatory params
26 |     boolean asBoolean() {
27 |       outdir && gatk_ref && map_q_from_flag >= 0 && map_q_to_flag >= 0
28 |     }
29 | }
30 | 
31 | validate_schema(SplitNCigarReads_vars_schema, SplitNCigarReads_vars)
32 | 


--------------------------------------------------------------------------------
/modules/RNAseqVariantCalling/star1pass.header:
--------------------------------------------------------------------------------
 1 | STAR_pe_vars=[
 2 |     outdir    : MAPPED + "/1stPass",
 3 |     logdir    : LOGS + "/STAR_1stPass",
 4 |     threads   : Integer.toString(ESSENTIAL_THREADS),
 5 |     ref       : ESSENTIAL_STAR_REF,
 6 |     maxram    : "31000000000", // around 30Gb for mammals
 7 |     bufsize   : "150000000",   // buffer size
 8 |     mm        : "2",           // number of mismatches allowed
 9 |     multimap  : "10",          // max multimap positions per read
10 |     minintro  : "21",          // minimum intron size
11 |     overhang  : Integer.toString(ESSENTIAL_READLENGTH - 1),
12 |     extra     : ""
13 | ]
14 | 
15 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/star1pass.groovy"
16 | 
17 | // expected parameter types
18 | class STAR_pe_vars_schema {
19 |     String outdir    
20 |     String logdir    
21 |     String threads   
22 |     String ref       
23 |     String maxram    
24 |     String bufsize   
25 |     String mm        
26 |     String multimap  
27 |     String minintro  
28 |     String overhang  
29 |     String extra     
30 | 
31 |     // check for the presence of mandatory params
32 |     boolean asBoolean() {
33 |       outdir && ref
34 |     }
35 | }
36 | 
37 | validate_schema(STAR_pe_vars_schema, STAR_pe_vars)
38 | 


--------------------------------------------------------------------------------
/modules/RNAseqVariantCalling/star2pass.header:
--------------------------------------------------------------------------------
 1 | STAR_pe_2nd_vars=[
 2 |     outdir    : MAPPED + "/2ndPass",
 3 |     logdir    : LOGS + "/STAR_2ndPass",
 4 |     threads   : Integer.toString(ESSENTIAL_THREADS),
 5 |     ref       : FilterAndMergeSJtab_vars.outdir,
 6 |     maxram    : "31000000000", // around 30Gb for mammals
 7 |     bufsize   : "150000000",   // buffer size
 8 |     mm        : "2",           // number of mismatches allowed
 9 |     multimap  : "10",          // max multimap positions per read
10 |     minintro  : "21",          // minimum intron size
11 |     filter_sec: true,          // filter out secondary alignments from the bam file?
12 |     samtools_threads: Integer.toString(ESSENTIAL_THREADS),
13 |     overhang  : Integer.toString(ESSENTIAL_READLENGTH - 1),
14 |     extra     : ""
15 | ]
16 | 
17 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/star2pass.groovy"
18 | 
19 | // expected parameter types
20 | class STAR_pe_2nd_vars_schema {
21 |     String outdir    
22 |     String logdir    
23 |     String threads   
24 |     String ref       
25 |     String maxram    
26 |     String bufsize   
27 |     String mm        
28 |     String multimap  
29 |     String minintro  
30 |     Boolean filter_sec
31 |     String samtools_threads
32 |     String overhang  
33 |     String extra     
34 | 
35 |     // check for the presence of mandatory params
36 |     boolean asBoolean() {
37 |       outdir && ref
38 |     }
39 | }
40 | 
41 | validate_schema(STAR_pe_2nd_vars_schema, STAR_pe_2nd_vars)
42 | 


--------------------------------------------------------------------------------
/modules/RNAseqVariantCalling/variantCall_HC.groovy:
--------------------------------------------------------------------------------
 1 | VariantCallHC = {
 2 |    doc title: "GATK HaplotypeCaller",
 3 |        desc: "Call variants, using GATK HaplotypeCaller.",
 4 |        constraints: "GATK version >= 3.5",
 5 |        author: "Antonio Domingues"
 6 | 
 7 |     output.dir = VariantCallHC_vars.outdir
 8 | 
 9 |     def HaplotypeCaller_FLAGS =
10 |         " -dontUseSoftClippedBases" +
11 |         (VariantCallHC_vars.threads        ? " -nct "    + VariantCallHC_vars.threads  : "" ) +
12 |         (VariantCallHC_vars.gatk_ref       ? " -R "      + VariantCallHC_vars.gatk_ref : "" ) +
13 |         (VariantCallHC_vars.vcf_ref        ? " --dbsnp " + VariantCallHC_vars.vcf_ref  : "" ) +
14 |         (VariantCallHC_vars.min_score_call ? " -stand_call_conf " + VariantCallHC_vars.min_score_call : "") +
15 |         (VariantCallHC_vars.min_score_emit ? " -stand_emit_conf " + VariantCallHC_vars.min_score_emit : "")
16 | 
17 |     def TOOL_ENV = prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && " +
18 |                    prepare_tool_env("gatk", tools["gatk"]["version"], tools["gatk"]["runenv"])
19 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
20 | 
21 |     transform (".rg.duprm.split.recalibrated.bam") to (".HC.vcf.gz") {
22 |         exec """
23 |             ${TOOL_ENV} &&
24 |             ${PREAMBLE} &&
25 | 
26 |             java ${VariantCallHC_vars.java_flags} -Djava.io.tmpdir=\${TMP} -jar \${gatk} -T HaplotypeCaller $HaplotypeCaller_FLAGS -I $input -o $output
27 |         ""","VariantCallHC"
28 |    }
29 | }
30 | 


--------------------------------------------------------------------------------
/modules/RNAseqVariantCalling/variantCall_HC.header:
--------------------------------------------------------------------------------
 1 | VariantCallHC_vars=[
 2 |     outdir        : RESULTS + "/HC",
 3 |     java_flags    : "-Xmx2400m",
 4 |     gatk_ref      : ESSENTIAL_GENOME_REF,
 5 |     vcf_ref       : ESSENTIAL_VCF_REF,
 6 |     threads       : Integer.toString(ESSENTIAL_THREADS),
 7 |     min_score_call: 20,
 8 |     min_score_emit: 20
 9 | ]
10 | 
11 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/variantCall_HC.groovy"
12 | 
13 | // expected parameter types
14 | class VariantCallHC_vars_schema {
15 |     String outdir
16 |     String java_flags
17 |     String gatk_ref      
18 |     String vcf_ref       
19 |     String threads       
20 |     Integer min_score_call
21 |     Integer min_score_emit
22 | 
23 |     // check for the presence of mandatory params
24 |     boolean asBoolean() {
25 |       outdir && min_score_call >= 0 && min_score_emit >= 0
26 |     }
27 | }
28 | 
29 | validate_schema(VariantCallHC_vars_schema, VariantCallHC_vars)
30 | 


--------------------------------------------------------------------------------
/modules/RNAseqVariantCalling/variant_filtration.groovy:
--------------------------------------------------------------------------------
 1 | VariantFiltration = {
 2 |    doc title: "GATK HaplotypeCaller",
 3 |        desc: "Filter variants following bast practices:http://gatkforums.broadinstitute.org/gatk/discussion/3891/calling-variants-in-rnaseq. Note that values are hardcoded.",
 4 |        constraints: "GATK version >= 3.5",
 5 |        author: "Antonio Domingues"
 6 | 
 7 |     output.dir = VariantFiltration_vars.outdir
 8 | 
 9 |     def VariantFiltration_FLAGS =
10 |         " -window 35"            +
11 |         " -cluster 3"            +
12 |         " -filterName FS"        +
13 |         " -filter \"FS > 30.0\"" +
14 |         " -filterName QD"        +
15 |         " -filter \"QD < 2.0\""  +
16 |         (VariantFiltration_vars.ref     ? " -R "   + VariantFiltration_vars.ref     : "")
17 | 
18 |    def TOOL_ENV = prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && " +
19 |                   prepare_tool_env("gatk", tools["gatk"]["version"], tools["gatk"]["runenv"])
20 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
21 | 
22 |    transform (".vcf.gz") to (".filtered.vcf.gz") {
23 |       exec """
24 |             ${TOOL_ENV} &&
25 |             ${PREAMBLE} &&
26 | 
27 |             java ${VariantFiltration_vars.java_flags} -Djava.io.tmpdir=\${TMP} -jar \${gatk} -T VariantFiltration -V $input -o $output $VariantFiltration_FLAGS
28 |       ""","VariantFiltration"
29 |    }
30 | }
31 | 


--------------------------------------------------------------------------------
/modules/RNAseqVariantCalling/variant_filtration.header:
--------------------------------------------------------------------------------
1 | VariantFiltration_vars=[
2 |     outdir    : RESULTS + "/HC",
3 |     java_flags: "-Xmx2400m",
4 |     ref       : ESSENTIAL_GENOME_REF
5 | ]
6 | 
7 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/variant_filtration.groovy"
8 | 
9 | 


--------------------------------------------------------------------------------
/modules/breaktag/bwa.groovy:
--------------------------------------------------------------------------------
 1 | bwa = {
 2 |     doc title: "BWA SR/PE alignment",
 3 |     desc:  "Align SR/PE reads with BWA",
 4 |     constraints: "none",
 5 |     author: "Sergi Sayols"
 6 | 
 7 |     output.dir = BWA_vars.outdir
 8 | 
 9 |     def File f = new File(input1)
10 |     def OUTFILE = (f.getName() =~ /(.R1)*.filt.fastq.gz/).replaceFirst(".bam")
11 | 
12 |     def BWA_INPUT = (BWA_vars.paired ? "$input1 $input2" : "$input")
13 |     def BWA_FLAGS =
14 |         (BWA_vars.threads ? " -t " + BWA_vars.threads : "" ) +
15 |         (BWA_vars.extra   ? " "    + BWA_vars.extra   : "" )
16 | 
17 |     def SAMTOOLS_VIEW_FLAGS = "-bhSu" +
18 |         (BWA_vars.minqual ? " -q " + BWA_vars.minqual : "")
19 |     def SAMTOOLS_SORT_FLAGS = 
20 |         (BWA_vars.samtools_threads ? " -@ " + BWA_vars.samtools_threads : "" )
21 | 
22 |     def TOOL_ENV = prepare_tool_env("bwa", tools["bwa"]["version"], tools["bwa"]["runenv"]) + " && " +
23 |                    prepare_tool_env("samtools", tools["samtools"]["version"], tools["samtools"]["runenv"])
24 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
25 | 
26 |     produce(OUTFILE) {
27 |         exec """
28 |             ${TOOL_ENV} &&
29 |             ${PREAMBLE} &&
30 | 
31 |             bwa mem $BWA_FLAGS $BWA_vars.ref $BWA_INPUT | \
32 |               samtools view $SAMTOOLS_VIEW_FLAGS - | \
33 |               samtools sort $SAMTOOLS_SORT_FLAGS -T \${TMP}/${OUTFILE}_sort - > ${output} &&
34 | 
35 |             samtools index ${output}
36 |             ""","BWA_pe"
37 |     }
38 | }
39 | 
40 | 


--------------------------------------------------------------------------------
/modules/breaktag/bwa.header:
--------------------------------------------------------------------------------
 1 | BWA_vars=[
 2 |     outdir          : MAPPED,
 3 |     paired          : RUN_IN_PAIRED_END_MODE,
 4 |     ref             : ESSENTIAL_BWA_REF,
 5 |     threads         : Integer.toString(ESSENTIAL_THREADS),
 6 |     minqual         : Integer.toString(ESSENTIAL_QUALITY),
 7 |     samtools_threads: Integer.toString(ESSENTIAL_THREADS),
 8 |     extra           : "-v 1" // output only errors to stderr
 9 | ]
10 | 
11 | load PIPELINE_ROOT + "/modules/breaktag/bwa.groovy"
12 | 
13 | // expected parameter types
14 | class BWA_vars_schema {
15 |     String outdir
16 |     Boolean paired
17 |     String ref
18 |     String threads
19 |     String minqual
20 |     String samtools_threads
21 |     String extra
22 | 
23 |     // check for the presence of mandatory params
24 |     boolean asBoolean() {
25 |       outdir && ref && minqual.toInteger() >= 0
26 |     }
27 | }
28 | 
29 | validate_schema(BWA_vars_schema, BWA_vars)
30 | 


--------------------------------------------------------------------------------
/modules/breaktag/collect_stats.groovy:
--------------------------------------------------------------------------------
 1 | collect_stats = {
 2 |     doc title: "collect stats",
 3 |     desc:  "collect breaktag DSB stats",
 4 |     constraints: "none",
 5 |     author: "Sergi Sayols"
 6 | 
 7 |     output.dir = collect_stats_vars.outdir
 8 | 
 9 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
10 | 
11 |     transform(".strandless.bed.gz") to(".txt") {
12 |         exec """
13 |             ${PREAMBLE} &&
14 |             zcat $input | awk '{i+=\$5} END {print "breaks: ", i; print "loci: ", NR;}' > $output
15 |         """
16 |     }
17 |     forward input
18 | }
19 | 
20 | 


--------------------------------------------------------------------------------
/modules/breaktag/collect_stats.header:
--------------------------------------------------------------------------------
 1 | collect_stats_vars=[
 2 |     outdir: RESULTS + "/stats"
 3 | ]
 4 | 
 5 | load  PIPELINE_ROOT + "/modules/breaktag/collect_stats.groovy"
 6 | 
 7 | // expected parameter types
 8 | class collect_stats_vars_schema {
 9 |     String outdir
10 | 
11 |     // check for the presence of mandatory params
12 |     boolean asBoolean() {
13 |       outdir
14 |     }
15 | }
16 | 
17 | validate_schema(collect_stats_vars_schema, collect_stats_vars)
18 | 


--------------------------------------------------------------------------------
/modules/breaktag/count_breaks.header:
--------------------------------------------------------------------------------
 1 | count_breaks_vars=[
 2 |     outdir          : RESULTS + "/counts",
 3 |     paired          : RUN_IN_PAIRED_END_MODE,
 4 |     threads         : Integer.toString(ESSENTIAL_THREADS)
 5 | ]
 6 | 
 7 | load  PIPELINE_ROOT + "/modules/breaktag/count_breaks.groovy"
 8 | 
 9 | // expected parameter types
10 | class count_breaks_vars_schema {
11 |     String outdir
12 |     Boolean paired
13 |     String threads
14 | 
15 |     // check for the presence of mandatory params
16 |     boolean asBoolean() {
17 |       outdir
18 |     }
19 | }
20 | 
21 | validate_schema(count_breaks_vars_schema, count_breaks_vars)
22 | 


--------------------------------------------------------------------------------
/modules/breaktag/count_breaks_strandless.groovy:
--------------------------------------------------------------------------------
 1 | count_breaks_strandless = {
 2 |     doc title: "Count breaks per position",
 3 |     desc:  "Count breaks regardless of the strand where the read points them to be",
 4 |     constraints: "Expect to have perl installed",
 5 |     author: "Sergi Sayols"
 6 | 
 7 |     output.dir = count_breaks_strandless_vars.outdir
 8 | 
 9 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
10 | 
11 |     transform(".bed.gz") to(".strandless.bed.gz") {
12 |         exec """
13 |             ${PREAMBLE} &&
14 | 
15 |             zcat $input | \
16 |               perl -aln -e 'if(\$F[0]==\$F0[0] && \$F[1]==\$F0[1] && \$F[2]==\$F0[2]){ \$F0[4]+=\$F[4]; } else { \$F0[5]="*"; print join("\t", @F0); @F0=@F; } END{ \$F[5]="*"; print join("\t", @F) }' | \
17 |               tail -n +2 | \
18 |               gzip -c > $output
19 |         ""","count_breaks_strandless"
20 |     }
21 | }
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/modules/breaktag/count_breaks_strandless.header:
--------------------------------------------------------------------------------
 1 | count_breaks_strandless_vars=[
 2 |     outdir: RESULTS + "/counts"
 3 | ]
 4 | 
 5 | load  PIPELINE_ROOT + "/modules/breaktag/count_breaks_strandless.groovy"
 6 | 
 7 | // expected parameter types
 8 | class count_breaks_strandless_vars_schema {
 9 |     String outdir
10 | 
11 |     // check for the presence of mandatory params
12 |     boolean asBoolean() {
13 |       outdir
14 |     }
15 | }
16 | 
17 | validate_schema(count_breaks_strandless_vars_schema, count_breaks_strandless_vars)
18 | 


--------------------------------------------------------------------------------
/modules/breaktag/pattern_filtering.header:
--------------------------------------------------------------------------------
 1 | pattern_filtering_vars=[
 2 |     outdir : RAWDATA + "/filt",
 3 |     paired : RUN_IN_PAIRED_END_MODE,
 4 |     targets: TARGETS
 5 | ]
 6 | 
 7 | load  PIPELINE_ROOT + "/modules/breaktag/pattern_filtering.groovy"
 8 | 
 9 | // expected parameter types
10 | class pattern_filtering_vars_schema {
11 |     String outdir
12 |     Boolean paired
13 |     String targets
14 | 
15 |     // check for the presence of mandatory params
16 |     boolean asBoolean() {
17 |       outdir && targets
18 |     }
19 | }
20 | 
21 | validate_schema(pattern_filtering_vars_schema, pattern_filtering_vars)
22 | 


--------------------------------------------------------------------------------
/modules/miscellaneous/collect_tool_versions.groovy:
--------------------------------------------------------------------------------
 1 | collectToolVersions = {
 2 | 	doc title: "collectToolVersions",
 3 | 		desc:  "so far, a dumb dump of the `tools` map",
 4 | 		constraints: "needs the tool map defined in PIPELINE_ROOT/pipelines/<pipeline>/tools.groovy",
 5 | 		bpipe_version: "tested with bpipe 0.9.9.8",
 6 | 		author: "Sergi Sayols"
 7 | 	
 8 | 	output.dir = collectToolVersions_vars.outdir
 9 | 
10 |     produce("tool_versions.txt") {
11 |         File f = new File(collectToolVersions_vars.outdir + "/tool_versions.txt")
12 |         f.write "tool\tenv\tversion\n"
13 |         tools.each { tool, x -> f << "$tool\t$x.runenv\t$x.version\n" }
14 |     }
15 | }
16 | 
17 | 


--------------------------------------------------------------------------------
/modules/miscellaneous/collect_tool_versions.header:
--------------------------------------------------------------------------------
1 | collectToolVersions_vars=[
2 |     outdir: LOGS
3 | ]
4 | 
5 | load PIPELINE_ROOT + "/modules/miscellaneous/collect_tool_versions.groovy"
6 | 
7 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/CRmotifCounts.header:
--------------------------------------------------------------------------------
 1 | CRmotifCounts_vars=[
 2 |     outdir          : RESULTS + "/CRmotifCounts",
 3 |     project         : PROJECT,                     
 4 |     res             : RESULTS,   
 5 |     cellranger_aggr_id : (binding.variables.containsKey("cellranger_aggr_vars") ? cellranger_aggr_vars.id : (binding.variables.containsKey("cellrangeratac_aggr_vars") ? cellrangeratac_aggr_vars.id : (binding.variables.containsKey("cellrangerarc_aggr_vars") ? cellrangerarc_aggr_vars.id : "aggr"))), // folder name for results from cellranger / cellranger-atac / cellranger-arc aggr
 6 |     extra              : ""                //extra parms to sent to the tool
 7 | ]
 8 | 
 9 | load PIPELINE_ROOT + "/modules/scRNAseq/CRmotifCounts.groovy"
10 | 
11 | // expected parameter types
12 | class CRmotifCounts_vars_schema {
13 |     String outdir             
14 |     String project           
15 |     String res           
16 |     String cellranger_aggr_id           
17 |     String extra              
18 | 
19 |     // check for the presence of mandatory params
20 |     boolean asBoolean() {
21 |       outdir
22 |     }
23 | }
24 | 
25 | validate_schema(CRmotifCounts_vars_schema, CRmotifCounts_vars)
26 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/CTannoMarker.header:
--------------------------------------------------------------------------------
 1 | CTannoMarker_vars=[
 2 |     outdir     : RESULTS + "/CTanno",
 3 |     project    : PROJECT,                     
 4 |     res        : RESULTS,          
 5 |     assay      : (RUN_BATCHCORRECT ? "integrated" : "SCT"),          
 6 |     clusterVar : "clusters_wnn",          
 7 |     dbfile     : "https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/ScTypeDB_full.xlsx", // file should contain four columns (tissueType - tissue type, cellName - cell type, geneSymbolmore1 - positive marker genes, geneSymbolmore2 - marker genes not expected to be expressed by a cell type, shortName - short form of cell type)
 8 |     tissue     : "Brain", // e.g. Immune system, Pancreas, Liver, Eye, Kidney, Brain, Lung, Adrenal, Heart, Intestine, Muscle, Placenta, Spleen, Stomach, Thymus          
 9 |     ctcolumn   : "shortName", // Can be any column-name available in the dbfile, which should contain the celltype names in full form or as an abbreviation.
10 |     extra      : ""        //extra parms to sent to the tool
11 | ]
12 | 
13 | load PIPELINE_ROOT + "/modules/scRNAseq/CTannoMarker.groovy"
14 | 
15 | // expected parameter types
16 | class CTannoMarker_vars_schema {
17 |     String outdir             
18 |     String project           
19 |     String res           
20 |     String assay           
21 |     String clusterVar
22 |     String dbfile      
23 |     String tissue      
24 |     String ctcolumn      
25 |     String extra              
26 | 
27 |     // check for the presence of mandatory params
28 |     boolean asBoolean() {
29 |       outdir
30 |     }
31 | }
32 | 
33 | validate_schema(CTannoMarker_vars_schema, CTannoMarker_vars)
34 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/DNAaccess.header:
--------------------------------------------------------------------------------
 1 | DNAaccess_vars=[
 2 |     outdir           : RESULTS + "/DNAaccess",
 3 |     project          : PROJECT,                     
 4 |     res              : RESULTS,          
 5 |     featureCutoff    : "q5", // either percentile specified as 'q' followed by the minimum percentile or minimum number of cells containing the feature    
 6 |     skipFirstLSIcomp : "1",  // If we see a very strong correlation between the first LSI component and read depth, this component should be removed.    
 7 |     extra    : ""          //extra parms to sent to the tool
 8 | ]
 9 | 
10 | load PIPELINE_ROOT + "/modules/scRNAseq/DNAaccess.groovy"
11 | 
12 | // expected parameter types
13 | class DNAaccess_vars_schema {
14 |     String outdir             
15 |     String project           
16 |     String res           
17 |     String featureCutoff           
18 |     String skipFirstLSIcomp           
19 |     String extra              
20 | 
21 |     // check for the presence of mandatory params
22 |     boolean asBoolean() {
23 |       outdir && project && res
24 |     }
25 | }
26 | 
27 | validate_schema(DNAaccess_vars_schema, DNAaccess_vars)
28 | 
29 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/SCTransform.groovy:
--------------------------------------------------------------------------------
 1 | SCTransform = {
 2 |     doc title: "SCTransform",
 3 |         desc:  "normalize gene expression data in Seurat object",
 4 |         constraints: "",
 5 |         bpipe_version: "tested with bpipe 0.9.9.8",
 6 |         author: "Frank Rühle"
 7 | 
 8 |     output.dir = SCTransform_vars.outdir
 9 |     
10 |     def SCTransform_FLAGS =
11 |         (SCTransform_vars.outdir             ? " outdir="             + SCTransform_vars.outdir             : "") +
12 |         (SCTransform_vars.project            ? " project="            + SCTransform_vars.project            : "") +
13 |         (SCTransform_vars.res                ? " res="                + SCTransform_vars.res                : "") +
14 |         (SCTransform_vars.extra              ? " "                    + SCTransform_vars.extra              : "") 
15 | 
16 |     def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"])
17 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
18 | 
19 |     // The SCTransform module is not using any of its inputs, but needs to check their
20 |     // time stamp in order to know, if SCTransform should run (in case of pre-existing 
21 |     // results). This can be done by outputting/echo'ing all inputs. In order to not 
22 |     // confuse the pipeline user, this output is written to /dev/null
23 |     // --- THE echo COMMAND BELOW MUST NOT BE REMOVED ---
24 | 
25 |     // run the chunk
26 |     produce("SCTransform.RData") {
27 |         exec """
28 |             ${TOOL_ENV} &&
29 |             ${PREAMBLE} &&
30 |             echo $inputs > /dev/null &&
31 | 
32 |             Rscript ${PIPELINE_ROOT}/tools/sc_norm/SCTransform.R $SCTransform_FLAGS
33 |         ""","SCTransform"
34 |     }
35 | }
36 | 
37 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/SCTransform.header:
--------------------------------------------------------------------------------
 1 | SCTransform_vars=[
 2 |     outdir          : RESULTS + "/SCTransform",
 3 |     project         : PROJECT,                     
 4 |     res             : RESULTS,          
 5 |     extra    : ""                          //extra parms to sent to the tool
 6 | ]
 7 | 
 8 | load PIPELINE_ROOT + "/modules/scRNAseq/SCTransform.groovy"
 9 | 
10 | // expected parameter types
11 | class SCTransform_vars_schema {
12 |     String outdir             
13 |     String project           
14 |     String res             
15 |     String extra              
16 | 
17 |     // check for the presence of mandatory params
18 |     boolean asBoolean() {
19 |       outdir && project && res
20 |     }
21 | }
22 | 
23 | validate_schema(SCTransform_vars_schema, SCTransform_vars)
24 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/addumibarcodetofastq.groovy:
--------------------------------------------------------------------------------
 1 | AddUMIBarcodeToFastq = {
 2 |     doc title: "Adds UMI and Barcode of to the fastq header",
 3 |         desc:  "adds UMI and barcode of the second read in MARS-Seq samples to the fastq header using umitools",
 4 |         constraints: "",
 5 |         bpipe_version: "tested with bpipe 0.9.9.2",
 6 |         author: "Nastasja Kreim, Frank Rühle"
 7 | 
 8 |     output.dir = AddUMIBarcodeToFastq_vars.outdir
 9 | 
10 |     def File f = new File(input1)
11 |     def OUTPUTFILE = (f.getName() =~ /(.R1)*.fastq.gz/).replaceFirst("")
12 | 
13 |     def umi_tools_FLAGS =
14 |         (AddUMIBarcodeToFastq_vars.bcpattern   ? " --bc-pattern=" + AddUMIBarcodeToFastq_vars.bcpattern   : "") +
15 |         (AddUMIBarcodeToFastq_vars.barcodelist ? " --whitelist="  + AddUMIBarcodeToFastq_vars.barcodelist + " --filter-cell-barcode" : "") +
16 |         (AddUMIBarcodeToFastq_vars.extra       ? " "              + AddUMIBarcodeToFastq_vars.extra       : "")
17 | 
18 |     def TOOL_ENV = prepare_tool_env("umitools", tools["umitools"]["version"], tools["umitools"]["runenv"])
19 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
20 | 
21 |     produce(OUTPUTFILE + ".umibarcode.fastq.gz"){
22 |         exec """
23 |             ${TOOL_ENV} &&
24 |             ${PREAMBLE} &&
25 | 
26 |             umi_tools extract $umi_tools_FLAGS -I $input2 --stdout \${TMP}/\$(basename ${input2.prefix}).barcode.fastq.gz --read2-in $input1 --read2-out=\${TMP}/\$(basename ${OUTPUTFILE}).umibarcode.fastq.gz &&
27 |             
28 |             rm \${TMP}/\$(basename ${input2.prefix}).barcode.fastq.gz &&
29 |             mv \${TMP}/\$(basename ${OUTPUTFILE}).umibarcode.fastq.gz $output
30 |         ""","AddUMIBarcodeToFastq"
31 |     }
32 | }
33 | 
34 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/addumibarcodetofastq.header:
--------------------------------------------------------------------------------
 1 | AddUMIBarcodeToFastq_vars=[
 2 |     outdir     : PROJECT + "/rawdata_processed",
 3 |     bcpattern  : ESSENTIAL_BCPATTERN, // pattern of the umi and the barcode in the second read. The C are the barcode bases the Ns are the UMI bases
 4 |     barcodelist: ESSENTIAL_WHITELIST, // list of valid barcodes
 5 |     extra      : ""
 6 | ]
 7 | 
 8 | load PIPELINE_ROOT + "/modules/scRNAseq/addumibarcodetofastq.groovy"
 9 | 
10 | // expected parameter types
11 | class AddUMIBarcodeToFastq_vars_schema {
12 |     String outdir             
13 |     String bcpattern           
14 |     String barcodelist   
15 |     String extra              
16 | 
17 |     // check for the presence of mandatory params
18 |     boolean asBoolean() {
19 |       outdir && bcpattern
20 |     }
21 | }
22 | 
23 | validate_schema(AddUMIBarcodeToFastq_vars_schema, AddUMIBarcodeToFastq_vars)
24 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/assignSouporcellCluster.header:
--------------------------------------------------------------------------------
 1 | assignSouporcellCluster_vars=[
 2 |     outdir              : RESULTS + "/demux_gt" + "/assignSouporcellCluster",
 3 |     souporcelldir       : RESULTS + "/demux_gt",
 4 |     targets             : "targets.txt",  //targets file
 5 |     extra               : ""   // extra parameter 
 6 | ]
 7 | 
 8 | load PIPELINE_ROOT + "/modules/scRNAseq/assignSouporcellCluster.groovy"
 9 | 
10 | // expected parameter types
11 | class assignSouporcellCluster_vars_schema {
12 |     String outdir             
13 |     String souporcelldir           
14 |     String targets   
15 |     String extra              
16 | 
17 |     // check for the presence of mandatory params
18 |     boolean asBoolean() {
19 |       outdir && souporcelldir && targets
20 |     }
21 | }
22 | 
23 | validate_schema(assignSouporcellCluster_vars_schema, assignSouporcellCluster_vars)
24 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/cellranger_aggr.header:
--------------------------------------------------------------------------------
 1 | cellranger_aggr_vars=[
 2 |     outdir       : RESULTS,
 3 |     id           : "aggr",
 4 |     normalize    : "mapped", // "none" or "mapped" (default). Equalize the average mapped read depth per cell between GEM wells by sub-sampling before merging.
 5 |     cores        : "8",
 6 |     localmem     : "64",
 7 |     extra        : ""     // extra parms to sent to the tool
 8 | ]
 9 | 
10 | load PIPELINE_ROOT + "/modules/scRNAseq/cellranger_aggr.groovy"
11 | 
12 | // expected parameter types
13 | class cellranger_aggr_vars_schema {
14 |     String outdir             
15 |     String id           
16 |     String normalize   
17 |     String cores
18 |     String localmem      
19 |     String extra              
20 | 
21 |     // check for the presence of mandatory params
22 |     boolean asBoolean() {
23 |       outdir
24 |     }
25 | }
26 | 
27 | validate_schema(cellranger_aggr_vars_schema, cellranger_aggr_vars)
28 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/cellranger_count.header:
--------------------------------------------------------------------------------
 1 | cellranger_count_vars=[
 2 |     outdir          : MAPPED,
 3 |     transcriptome   : ESSENTIAL_TENX_TRANSCRIPTOME, 
 4 |     expect_cells    : ESSENTIAL_TENX_EXPECTED_CELLS,
 5 |     nuclei          : (ESSENTIAL_TENX_NUCLEI == "yes"),
 6 |     cores           : "8",
 7 |     localmem        : "64",
 8 |     extra           : " --chemistry=SC3Pv3"               // extra parms to sent to the tool
 9 | ]
10 | 
11 | load PIPELINE_ROOT + "/modules/scRNAseq/cellranger_count.groovy"
12 | 
13 | // expected parameter types
14 | class cellranger_count_vars_schema {
15 |     String outdir             
16 |     String transcriptome           
17 |     Integer expect_cells           
18 |     Boolean nuclei           
19 |     String cores
20 |     String localmem      
21 |     String extra              
22 | 
23 |     // check for the presence of mandatory params
24 |     boolean asBoolean() {
25 |       outdir && transcriptome
26 |     }
27 | }
28 | 
29 | validate_schema(cellranger_count_vars_schema, cellranger_count_vars)
30 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/cellrangerarc_aggr.header:
--------------------------------------------------------------------------------
 1 | cellrangerarc_aggr_vars=[
 2 |     outdir       : RESULTS,
 3 |     reference    : ESSENTIAL_TENX_REFERENCE, 
 4 |     id           : "aggr",
 5 |     normalize    : "depth", // "none" or "depth" (default). Equalize the average mapped read depth per cell between GEM wells by sub-sampling before merging.
 6 |     cores        : "8",
 7 |     localmem     : "64",
 8 |     extra        : ""     // extra parms to sent to the tool
 9 | ]
10 | 
11 | load PIPELINE_ROOT + "/modules/scRNAseq/cellrangerarc_aggr.groovy"
12 | 
13 | // expected parameter types
14 | class cellrangerarc_aggr_vars_schema {
15 |     String outdir             
16 |     String reference           
17 |     String id           
18 |     String normalize   
19 |     String cores
20 |     String localmem      
21 |     String extra              
22 | 
23 |     // check for the presence of mandatory params
24 |     boolean asBoolean() {
25 |       outdir && reference
26 |     }
27 | }
28 | 
29 | validate_schema(cellrangerarc_aggr_vars_schema, cellrangerarc_aggr_vars)
30 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/cellrangerarc_count.header:
--------------------------------------------------------------------------------
 1 | cellrangerarc_count_vars=[
 2 |     outdir          : MAPPED,
 3 |     reference       : ESSENTIAL_TENX_REFERENCE, 
 4 |     fastqdir        : ESSENTIAL_TENX_FASTQDIR, 
 5 |     cores           : "32",
 6 |     localmem        : "128",
 7 |     extra           : ""               // extra parms to sent to the tool
 8 | ]
 9 | 
10 | load PIPELINE_ROOT + "/modules/scRNAseq/cellrangerarc_count.groovy"
11 | 
12 | // expected parameter types
13 | class cellrangerarc_count_vars_schema {
14 |     String outdir             
15 |     String reference           
16 |     String fastqdir           
17 |     String cores
18 |     String localmem      
19 |     String extra              
20 | 
21 |     // check for the presence of mandatory params
22 |     boolean asBoolean() {
23 |       outdir && reference && fastqdir
24 |     }
25 | }
26 | 
27 | validate_schema(cellrangerarc_count_vars_schema, cellrangerarc_count_vars)
28 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/cellrangeratac_aggr.header:
--------------------------------------------------------------------------------
 1 | cellrangeratac_aggr_vars=[
 2 |     outdir       : RESULTS,
 3 |     id           : "aggr",
 4 |     reference    : ESSENTIAL_TENX_REFERENCE,
 5 |     normalize    : "depth", // "none" or "depth" (default). Equalize the average mapped read depth per cell between GEM wells by sub-sampling before merging.
 6 |     cores        : "8",
 7 |     localmem     : "64",
 8 |     extra        : ""     // extra parms to sent to the tool
 9 | ]
10 | 
11 | load PIPELINE_ROOT + "/modules/scRNAseq/cellrangeratac_aggr.groovy"
12 | 
13 | // expected parameter types
14 | class cellrangeratac_aggr_vars_schema {
15 |     String outdir             
16 |     String id           
17 |     String reference           
18 |     String normalize           
19 |     String cores
20 |     String localmem      
21 |     String extra              
22 | 
23 |     // check for the presence of mandatory params
24 |     boolean asBoolean() {
25 |       outdir && reference
26 |     }
27 | }
28 | 
29 | validate_schema(cellrangeratac_aggr_vars_schema, cellrangeratac_aggr_vars)
30 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/cellrangeratac_count.header:
--------------------------------------------------------------------------------
 1 | cellrangeratac_count_vars=[
 2 |     outdir          : MAPPED,
 3 |     reference       : ESSENTIAL_TENX_REFERENCE, 
 4 |     cores           : "8",
 5 |     localmem        : "64",
 6 |     extra           : ""               // extra parms to sent to the tool
 7 | ]
 8 | 
 9 | load PIPELINE_ROOT + "/modules/scRNAseq/cellrangeratac_count.groovy"
10 | 
11 | // expected parameter types
12 | class cellrangeratac_count_vars_schema {
13 |     String outdir             
14 |     String reference           
15 |     String cores
16 |     String localmem      
17 |     String extra              
18 | 
19 |     // check for the presence of mandatory params
20 |     boolean asBoolean() {
21 |       outdir && reference
22 |     }
23 | }
24 | 
25 | validate_schema(cellrangeratac_count_vars_schema, cellrangeratac_count_vars)
26 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/demux_gt.header:
--------------------------------------------------------------------------------
 1 | demux_gt_vars=[
 2 |     outdir              : RESULTS + "/demux_gt",
 3 |     targets             : "targets.txt",  //targets file
 4 |     ref                 : ESSENTIAL_TENX_TRANSCRIPTOME + "/fasta/genome.fa", // reference genome    
 5 |     cellranger_output   : MAPPED, // result files from cellranger to demultiplex 
 6 |     threads             : "30",
 7 |     extra               : ""   // extra parameter to send to Cite-Seq-Count
 8 | ]
 9 | 
10 | load PIPELINE_ROOT + "/modules/scRNAseq/demux_gt.groovy"
11 | 
12 | // expected parameter types
13 | class demux_gt_vars_schema {
14 |     String outdir             
15 |     String targets           
16 |     String ref           
17 |     String cellranger_output           
18 |     String threads           
19 |     String extra              
20 | 
21 |     // check for the presence of mandatory params
22 |     boolean asBoolean() {
23 |       outdir && targets && ref && cellranger_output
24 |     }
25 | }
26 | 
27 | validate_schema(demux_gt_vars_schema, demux_gt_vars)
28 | 
29 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/diffPeaks.header:
--------------------------------------------------------------------------------
 1 | diffPeaks_vars=[
 2 |     outdir          : RESULTS + "/DNAaccess",
 3 |     project         : PROJECT,                     
 4 |     res             : RESULTS,          
 5 |     assay           : "ATAC",
 6 |     minCells        : "20", //   minimum number of cells to include cluster in analysis
 7 |     clusterVar      : "clusters_wnn",   // variable name with clustering info for comparison by group and cluster ("clusters_atac" or "clusters_wnn")
 8 |     CTannoSelected  : ESSENTIAL_CELLTYPE_ANNO[0],  // select celltype annotation column for comparison by group and celltype
 9 |     test            : "LR",  // statistical test to use. One of "wilcox", "bimod", "roc" , "t", "negbinom", "poisson", "LR", "MAST" and "DESeq2".
10 |     latentVars      : "nFeature_ATAC",  // Variables to test, used only when test is one of 'LR', 'negbinom', 'poisson' or 'MAST'.
11 |     extra    : ""                          //extra parms to sent to the tool
12 | ]
13 | 
14 | load PIPELINE_ROOT + "/modules/scRNAseq/diffPeaks.groovy"
15 | 
16 | // expected parameter types
17 | class diffPeaks_vars_schema {
18 |     String outdir             
19 |     String project           
20 |     String res           
21 |     String assay           
22 |     String minCells           
23 |     String clusterVar           
24 |     String CTannoSelected
25 |     String test      
26 |     String latentVars      
27 |     String extra              
28 | 
29 |     // check for the presence of mandatory params
30 |     boolean asBoolean() {
31 |       outdir && project && res && CTannoSelected
32 |     }
33 | }
34 | 
35 | validate_schema(diffPeaks_vars_schema, diffPeaks_vars)
36 | 
37 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/motifActivity.header:
--------------------------------------------------------------------------------
 1 | motifActivity_vars=[
 2 |     outdir          : RESULTS + "/motifActivity",
 3 |     project         : PROJECT,                     
 4 |     res             : RESULTS,          
 5 |     db              : ESSENTIAL_DB,
 6 |     clusterVar      : "clusters_wnn",  // variable name with clustering info for comparison by group and cluster ("clusters_rna" or "clusters_wnn")
 7 |     CTannoSelected  : ESSENTIAL_CELLTYPE_ANNO[0],  // select celltype annotation column for comparison by group and celltype
 8 |     motif2plot      : "MA0497.1", // optional motifs for activity plots. Give either a single motif name or a path to a txt file with multiple motifs one per line.              
 9 |     extra    : ""                          //extra parms to sent to the tool
10 | ]
11 | 
12 | load PIPELINE_ROOT + "/modules/scRNAseq/motifActivity.groovy"
13 | 
14 | // expected parameter types
15 | class motifActivity_vars_schema {
16 |     String outdir             
17 |     String project           
18 |     String res             
19 |     String db   
20 |     String clusterVar
21 |     String CTannoSelected      
22 |     String motif2plot          
23 |     String extra              
24 | 
25 |     // check for the presence of mandatory params
26 |     boolean asBoolean() {
27 |       outdir && project && res && CTannoSelected
28 |     }
29 | }
30 | 
31 | validate_schema(motifActivity_vars_schema, motifActivity_vars)
32 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/motifEnrich.header:
--------------------------------------------------------------------------------
 1 | motifEnrich_vars=[
 2 |     outdir          : RESULTS + "/motifEnrichment",
 3 |     project         : PROJECT,                     
 4 |     res             : RESULTS,          
 5 |     db              : ESSENTIAL_DB,                
 6 |     diffPeaks_dir   : (binding.variables.containsKey("diffPeaks_vars") ? diffPeaks_vars.outdir : ""),                
 7 |     pval_thresh     : "0.05", //   p-value threshold of differential accessible peaks to include
 8 |     min_peaks       : "5",  // skip enrichment if less peaks available in comparison
 9 |     extra    : ""                          //extra parms to sent to the tool
10 | ]
11 | 
12 | load PIPELINE_ROOT + "/modules/scRNAseq/motifEnrich.groovy"
13 | 
14 | // expected parameter types
15 | class motifEnrich_vars_schema {
16 |     String outdir             
17 |     String project           
18 |     String res             
19 |     String db   
20 |     String diffPeaks_dir
21 |     String pval_thresh      
22 |     String min_peaks          
23 |     String extra              
24 | 
25 |     // check for the presence of mandatory params
26 |     boolean asBoolean() {
27 |       outdir && project && res
28 |     }
29 | }
30 | 
31 | validate_schema(motifEnrich_vars_schema, motifEnrich_vars)
32 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/peaks2genes.header:
--------------------------------------------------------------------------------
 1 | peaks2genes_vars=[
 2 |     outdir          : RESULTS + "/peaks2genes",
 3 |     project         : PROJECT,                     
 4 |     res             : RESULTS,  
 5 |     db              : ESSENTIAL_DB,                
 6 |     genes2use       : "'c(\"HMCN1\", \"EVL\", \"EML1\")'", // Genes to test. If empty, use all genes from expression assay.  
 7 |     genes2plot      : "HMCN1", // Genes defining the region to show in the coverage plot
 8 |     groupCellsInPlot : "clusters_wnn",  // name of metadata columns to group (color) the cells by in the Coverage plot (e.g. "clusters_wnn")
 9 |     plotUpstream    : "100000", // Number of bases to extend the plotting region upstream.
10 |     plotDownstream  : "100000", // Number of bases to extend the plotting region downstream.
11 |     extra    : ""                          //extra parms to sent to the tool
12 | ]
13 | 
14 | load PIPELINE_ROOT + "/modules/scRNAseq/peaks2genes.groovy"
15 | 
16 | // expected parameter types
17 | class peaks2genes_vars_schema {
18 |     String outdir             
19 |     String project           
20 |     String res             
21 |     String db   
22 |     String genes2use
23 |     String genes2plot      
24 |     String groupCellsInPlot          
25 |     String plotUpstream             
26 |     String plotDownstream           
27 |     String extra              
28 | 
29 |     // check for the presence of mandatory params
30 |     boolean asBoolean() {
31 |       outdir && project && res
32 |     }
33 | }
34 | 
35 | validate_schema(peaks2genes_vars_schema, peaks2genes_vars)
36 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/sc_filter.header:
--------------------------------------------------------------------------------
 1 | sc_filter_vars=[
 2 |     outdir             : QC + "/sc_qc",
 3 |     project            : PROJECT,                     
 4 |     res                : RESULTS,         
 5 |     nCount_ATAC_min    : "1000",          
 6 |     nCount_ATAC_max    : "50000",     
 7 |     nCount_RNA_min     : "1000",          
 8 |     nCount_RNA_max     : "30000",         
 9 |     FRiPmin            : "0.3",      // Fraction of reads in peaks (min threshold)
10 |     FRiBLmax           : "0.05",     // Fraction of reads in blacklisted regions (max treshold)    
11 |     nucleosome_sig_max : "2",        // nucleosome signal (max threshold)
12 |     TSS_enrich_min     : "1",        // TSS enrichment (min threshold) 
13 |     MT_perc_max        : "20",       // mitochondrial RNA count percentage (max threshold) 
14 |     extra              : ""        // extra parms to sent to the tool
15 | ]
16 | 
17 | load PIPELINE_ROOT + "/modules/scRNAseq/sc_filter.groovy"
18 | 
19 | // expected parameter types
20 | class sc_filter_vars_schema {
21 |     String outdir             
22 |     String project           
23 |     String res             
24 |     String nCount_ATAC_min   
25 |     String nCount_ATAC_max
26 |     String nCount_RNA_min      
27 |     String nCount_RNA_max          
28 |     String FRiPmin             
29 |     String FRiBLmax           
30 |     String nucleosome_sig_max           
31 |     String TSS_enrich_min             
32 |     String MT_perc_max   
33 |     String extra              
34 | 
35 |     // check for the presence of mandatory params
36 |     boolean asBoolean() {
37 |       outdir && project && res
38 |     }
39 | }
40 | 
41 | validate_schema(sc_filter_vars_schema, sc_filter_vars)
42 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/sc_integrateATAC.header:
--------------------------------------------------------------------------------
 1 | sc_integrateATAC_vars=[
 2 |     outdir          : RESULTS + "/sc_integrateATAC",
 3 |     project         : PROJECT,                     
 4 |     res             : RESULTS,         
 5 |     featureCutoff   : (binding.variables.containsKey("DNAaccess_vars") ? DNAaccess_vars.featureCutoff : "q5"), // either percentile specified as 'q' followed by the minimum percentile or minimum number of cells containing the feature
 6 |     skipFirstLSIcomp: (binding.variables.containsKey("DNAaccess_vars") ? DNAaccess_vars.skipFirstLSIcomp : "1"),  // If we see a very strong correlation between the first LSI component and read depth, this component should be removed.
 7 |     extra    : ""                          //extra parms to sent to the tool
 8 | ]
 9 | // please keep in mind that the results of batch-correction or integration is supposed to be used only to define the clusters. ALL downstream steps like DE analysis should use the RNA or SCT slots of the seurat object
10 | 
11 | load PIPELINE_ROOT + "/modules/scRNAseq/sc_integrateATAC.groovy"
12 | 
13 | // expected parameter types
14 | class sc_integrateATAC_vars_schema {
15 |     String outdir             
16 |     String project           
17 |     String res             
18 |     String featureCutoff   
19 |     String skipFirstLSIcomp
20 |     String extra              
21 | 
22 |     // check for the presence of mandatory params
23 |     boolean asBoolean() {
24 |       outdir && project && res
25 |     }
26 | }
27 | 
28 | validate_schema(sc_integrateATAC_vars_schema, sc_integrateATAC_vars)
29 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/sc_integrateRNA.header:
--------------------------------------------------------------------------------
 1 | sc_integrateRNA_vars=[
 2 |     outdir          : RESULTS + "/sc_integrateRNA",
 3 |     project         : PROJECT,                     
 4 |     res             : RESULTS,         
 5 |     batch           : "group",             // Provide a the column name from targets file. If more than one batch needs to be addressed, please create a concatenated name of both and provide that as a column in the targets file and here.
 6 |     n_features      : "3000",                // Number of variable features to identify for integration (or batch-correcting)
 7 |     rdtype          : "cca",               // Dimensional reduction to perform when finding anchors. Can be one of: cca, rpca, rlsi
 8 |     extra    : ""                          //extra parms to sent to the tool
 9 | ]
10 | // please keep in mind that the results of batch-correction or integration is supposed to be used only to define the clusters. ALL downstream steps like DE analysis should use the RNA or SCT slots of the seurat object
11 | 
12 | load PIPELINE_ROOT + "/modules/scRNAseq/sc_integrateRNA.groovy"
13 | 
14 | // expected parameter types
15 | class sc_integrateRNA_vars_schema {
16 |     String outdir             
17 |     String project           
18 |     String res             
19 |     String batch   
20 |     String n_features
21 |     String rdtype
22 |     String extra              
23 | 
24 |     // check for the presence of mandatory params
25 |     boolean asBoolean() {
26 |       outdir && project && res && batch
27 |     }
28 | }
29 | 
30 | validate_schema(sc_integrateRNA_vars_schema, sc_integrateRNA_vars)
31 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/sc_qc.groovy:
--------------------------------------------------------------------------------
 1 | sc_qc = {
 2 |     doc title: "sc_qc",
 3 |         desc:  "Quality control for single cell multiome experiment",
 4 |         constraints: "",
 5 |         bpipe_version: "tested with bpipe 0.9.9.8",
 6 |         author: "Frank Rühle"
 7 | 
 8 |     output.dir = sc_qc_vars.outdir
 9 |     
10 |     def sc_qc_FLAGS =
11 |         (sc_qc_vars.outdir             ? " outdir="             + sc_qc_vars.outdir             : "") +
12 |         (sc_qc_vars.project            ? " project="            + sc_qc_vars.project            : "") +
13 |         (sc_qc_vars.res                ? " res="                + sc_qc_vars.res                : "") +
14 |         (sc_qc_vars.db                 ? " db="                 + sc_qc_vars.db                 : "") +
15 |         (sc_qc_vars.extra              ? " "                    + sc_qc_vars.extra              : "") 
16 | 
17 |     def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"])
18 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
19 | 
20 |     // The sc_qc module is not using any of its inputs, but needs to check their
21 |     // time stamp in order to know, if sc_qc should run (in case of pre-existing 
22 |     // results). This can be done by outputting/echo'ing all inputs. In order to not 
23 |     // confuse the pipeline user, this output is written to /dev/null
24 |     // --- THE echo COMMAND BELOW MUST NOT BE REMOVED ---
25 | 
26 |     // run the chunk
27 |     produce("sc_qc.RData") {
28 |         exec """
29 |             ${TOOL_ENV} &&
30 |             ${PREAMBLE} &&
31 |             echo $inputs > /dev/null &&
32 | 
33 |             Rscript ${PIPELINE_ROOT}/tools/sc_qc/sc_qc_multiome.R $sc_qc_FLAGS
34 |         ""","sc_qc"
35 |     }
36 | }
37 | 
38 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/sc_qc.header:
--------------------------------------------------------------------------------
 1 | sc_qc_vars=[
 2 |     outdir          : QC + "/sc_qc",
 3 |     project         : PROJECT,                     
 4 |     res             : RESULTS,          //where the results lie
 5 |     db              : ESSENTIAL_DB,        
 6 |     extra    : ""                          //extra parms to sent to the tool
 7 | ]
 8 | 
 9 | load PIPELINE_ROOT + "/modules/scRNAseq/sc_qc.groovy"
10 | 
11 | // expected parameter types
12 | class sc_qc_vars_schema {
13 |     String outdir             
14 |     String project           
15 |     String res             
16 |     String db   
17 |     String extra              
18 | 
19 |     // check for the presence of mandatory params
20 |     boolean asBoolean() {
21 |       outdir && project && res
22 |     }
23 | }
24 | 
25 | validate_schema(sc_qc_vars_schema, sc_qc_vars)
26 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/splitpipe_all.header:
--------------------------------------------------------------------------------
 1 | splitpipe_all_vars=[
 2 |     outdir          : MAPPED,
 3 |     logdir          : LOGS + "/splitpipe_all",
 4 |     targets         : "targets.txt",                 // targets file describing the samples
 5 |     genome          : ESSENTIAL_TENX_REFERENCE, 
 6 |     chemistry       : "v2",
 7 |     expect_cells    : ESSENTIAL_TENX_EXPECTED_CELLS,
 8 |     threads         : "8",
 9 |     extra           : " "               // extra parms to sent to the tool
10 | ]
11 | 
12 | load PIPELINE_ROOT + "/modules/scRNAseq/splitpipe_all.groovy"
13 | 
14 | // expected parameter types
15 | class splitpipe_all_vars_schema {
16 |     String outdir             
17 |     String logdir             
18 |     String targets             
19 |     String genome           
20 |     String chemistry           
21 |     Integer expect_cells           
22 |     String threads
23 |     String extra              
24 | 
25 |     // check for the presence of mandatory params
26 |     boolean asBoolean() {
27 |       outdir && genome
28 |     }
29 | }
30 | 
31 | validate_schema(splitpipe_all_vars_schema, splitpipe_all_vars)
32 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/splitpipe_comb.groovy:
--------------------------------------------------------------------------------
 1 | splitpipe_comb = {
 2 |     doc title: "split-pipe comb",
 3 |         desc:  "Combining multiple samples with split-pipe comb",
 4 |         constraints: "",
 5 |         bpipe_version: "tested with bpipe 0.9.9.8",
 6 |         author: "Frank Rühle"
 7 | 
 8 |     output.dir = splitpipe_comb_vars.outdir + "/"
 9 |     indir = splitpipe_comb_vars.indir + "/"
10 | 
11 |     def TOOL_ENV = prepare_tool_env("split_pipe", tools["split_pipe"]["version"], tools["split_pipe"]["runenv"]) 
12 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
13 | 
14 |     produce("splitpipe_comb.done") {
15 |         exec """
16 |             ${TOOL_ENV} &&
17 |             ${PREAMBLE} &&
18 | 
19 |             split-pipe --mode comb --output_dir $output.dir --sublibraries \$(dirname $inputs.bam) &&
20 | 
21 |             touch $output
22 | 
23 |         ""","splitpipe_comb"
24 |     }
25 | }
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/splitpipe_comb.header:
--------------------------------------------------------------------------------
 1 | splitpipe_comb_vars=[
 2 |     outdir       : RESULTS + "/aggr/",
 3 |     indir        : MAPPED,
 4 |     extra        : ""     // extra parms to sent to the tool
 5 | ]
 6 | 
 7 | load PIPELINE_ROOT + "/modules/scRNAseq/splitpipe_comb.groovy"
 8 | 
 9 | // expected parameter types
10 | class splitpipe_comb_vars_schema {
11 |     String outdir             
12 |     String indir             
13 |     String extra              
14 | 
15 |     // check for the presence of mandatory params
16 |     boolean asBoolean() {
17 |       outdir && indir
18 |     }
19 | }
20 | 
21 | validate_schema(splitpipe_comb_vars_schema, splitpipe_comb_vars)
22 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/subread.header:
--------------------------------------------------------------------------------
 1 | subread_count_vars=[
 2 |     outdir  : RESULTS + "/subread-count",
 3 |     stranded: ESSENTIAL_STRANDED,           //whether the data is from a strand-specific assay (illumina SR: always reverse)
 4 |     paired  : (ESSENTIAL_PAIRED == "yes"),  //paired end design
 5 |     genesgtf: ESSENTIAL_GENESGTF,
 6 |     threads : ESSENTIAL_THREADS,
 7 |     extra   : ""                            // extra parms to sent to the tool
 8 | ]
 9 | 
10 | load PIPELINE_ROOT + "/modules/scRNAseq/subread.groovy"
11 | 
12 | // expected parameter types
13 | class subread_count_vars_schema {
14 |     String outdir             
15 |     String stranded           
16 |     Boolean paired           
17 |     String genesgtf             
18 |     Integer threads             
19 |     String extra              
20 | 
21 |     // check for the presence of mandatory params
22 |     boolean asBoolean() {
23 |       outdir
24 |     }
25 | }
26 | 
27 | validate_schema(subread_count_vars_schema, subread_count_vars)
28 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/umicount.groovy:
--------------------------------------------------------------------------------
 1 | umicount = {
 2 |     doc title: "Deduplication and Counting reads per gene",
 3 |         desc: "Deduplication and counting of mapped data and splitting accoring to cellbarcode with umi_tools",
 4 |         constraints: "",
 5 |         bpipe_version: "tested with bpipe 0.9.9.3",
 6 |         author: "Nastasja Kreim"
 7 | 
 8 |     output.dir = umicount_vars.outdir
 9 | 
10 |     // create the log folder if it doesn't exists
11 |     def umicount_LOGDIR = new File(umicount_vars.logdir)
12 |     if (!umicount_LOGDIR.exists()) {
13 |         umicount_LOGDIR.mkdirs()
14 |     }
15 | 
16 |     def umicount_FLAGS = 
17 |         (umicount_vars.verbose ? "--verbose=1 " : "") +
18 |         (umicount_vars.paired  ? "--paired "    : "") +
19 |         (umicount_vars.param   ? " " + umicount_vars.param : "") +
20 |         (umicount_vars.extra   ? " " + umicount_vars.extra : "")
21 | 
22 |     def TOOL_ENV = prepare_tool_env("umitools", tools["umitools"]["version"], tools["umitools"]["runenv"])
23 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
24 | 
25 |     // run the chunk
26 |     transform(".bam") to (".umicount.tsv.gz") {
27 |         def SAMPLENAME = input.prefix
28 |         exec """
29 |             ${TOOL_ENV} &&
30 |             ${PREAMBLE} &&
31 | 
32 |             SAMPLENAME_BASE=\$(basename ${SAMPLENAME}) &&
33 |             umi_tools count $umicount_FLAGS -I $input -S $output -L ${umicount_LOGDIR}/\${SAMPLENAME_BASE}.umicount.log -E ${umicount_LOGDIR}/\${SAMPLENAME_BASE}.umicount.error 
34 |         ""","umicount"
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/umicount.header:
--------------------------------------------------------------------------------
 1 | umicount_vars=[
 2 |     outdir: RESULTS + "/umicount",
 3 |     logdir: LOGS + "/umicount",
 4 |     log   : true,
 5 |     paired: (ESSENTIAL_PAIRED == "yes"),
 6 |     //This assumes that the labeling is done on the bam file e.g. by processing with featureCounts beforehand
 7 |     //Additionaly this is configured to fit marsseq paramers. It might be 
 8 |     //necessary to add --read-length if you want to ensure that not only the position 
 9 |     //+ UMI is used to deduplicate but also the read-length. For marsseq this opition 
10 |     //is not set because we expect reads with the same umi+starting position to be 
11 |     //PCR duplicates event if they are of different length
12 |     param : "--per-gene --gene-tag=XT --assigned-status-tag=XS --per-cell --wide-format-cell-counts",
13 |     extra : "--edit-distance-threshold=0 " //Spliced reads are treated different from unspliced
14 | ]
15 | 
16 | load PIPELINE_ROOT + "/modules/scRNAseq/umicount.groovy"
17 | 
18 | // expected parameter types
19 | class umicount_vars_schema {
20 |     String outdir             
21 |     String logdir           
22 |     Boolean log           
23 |     Boolean paired           
24 |     String param             
25 |     String extra              
26 | 
27 |     // check for the presence of mandatory params
28 |     boolean asBoolean() {
29 |       outdir && logdir
30 |     }
31 | }
32 | 
33 | validate_schema(umicount_vars_schema, umicount_vars)
34 | 
35 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/umidedup.groovy:
--------------------------------------------------------------------------------
 1 | umidedup = {
 2 |     doc title: "deduplication based on UMIs",
 3 |         desc: "Deduplication of mapped data using UMIs with umi_tools",
 4 |         constraints: "",
 5 |         bpipe_version: "tested with bpipe 0.9.9.3",
 6 |         author: "Nastasja Kreim"
 7 | 
 8 |     output.dir = umidedup_vars.outdir
 9 |     def umidedup_FLAGS =
10 |         (umidedup_vars.verbose ? "--verbose=1 " : "") +
11 |         (umidedup_vars.paired  ? "--paired "    : "") +
12 |         (umidedup_vars.param   ? " " + umidedup_vars.param : "") +
13 |         (umidedup_vars.extra   ? " " + umidedup_vars.extra : "")
14 | 
15 |     def TOOL_ENV = prepare_tool_env("umitools", tools["umitools"]["version"], tools["umitools"]["runenv"])
16 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
17 | 
18 |     // run the chunk
19 |     transform(".bam") to (".umidedup.bam") {
20 |         exec """
21 |             ${TOOL_ENV} &&
22 |             ${PREAMBLE} &&
23 | 
24 |             umi_tools dedup $umidedup_FLAGS -I $input -S $output --output-stats=${output.prefix}.stats
25 |         ""","umidedup"
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/umidedup.header:
--------------------------------------------------------------------------------
 1 | umidedup_vars=[
 2 |     outdir: RESULTS+ "/umidedup",
 3 |     log   : "--verbose=1",
 4 |     //this assumes that the labeling is done on the bam file e.g. by processing with featureCounts beforehand
 5 |     //additionaly this is configured to fit marsseq paramers. It might be 
 6 |     //necessary to add --read-length if you want to ensure that not only the position 
 7 |     //+ UMI is used to deduplicate but also the read-length. For marsseq this opition 
 8 |     //is not set because we expect reads with the same umi+starting position to be 
 9 |     //PCR duplicates event if they are of different length
10 |     param: "--per-cell",
11 |     extra: " --spliced-is-unique --edit-distance-threshold=0 " //Spliced reads are treated different from unspliced
12 | ]
13 | 
14 | load PIPELINE_ROOT + "/modules/scRNAseq/umidedup.groovy"
15 | 
16 | // expected parameter types
17 | class umidedup_vars_schema {
18 |     String outdir             
19 |     String log           
20 |     String param             
21 |     String extra              
22 | 
23 |     // check for the presence of mandatory params
24 |     boolean asBoolean() {
25 |       outdir
26 |     }
27 | }
28 | 
29 | validate_schema(umidedup_vars_schema, umidedup_vars)
30 | 
31 | 


--------------------------------------------------------------------------------
/modules/scRNAseq/wnn.header:
--------------------------------------------------------------------------------
 1 | wnn_vars=[
 2 |     outdir          : RESULTS + "/wnn",
 3 |     project         : PROJECT,                     
 4 |     res             : RESULTS,          
 5 |     knn             : "20",  // the number of multimodal neighbors to compute. 20 by default
 6 |     knnRange        : "200", // The number of approximate neighbors to compute. 200 by default
 7 |     clusterAlg      : "1", // Algorithm for modularity optimization (1 = Louvain; 2 = Louvain with multilevel refinement; 3 = SLM; 4 = Leiden).
 8 |     clusterRes      : "0.8", // resolution parameter above (below) 1.0 for larger (smaller) number of communities (default 0.8).
 9 |     skipFirstLSIcomp : DNAaccess_vars.skipFirstLSIcomp,  // skip first LSI component(s) if selected in DNAaccess.header   
10 |     batchCorrection : RUN_BATCHCORRECT, // We use this to determine in the WNN step which dimensionality reductions to use.
11 |     extra    : ""        //extra parms to sent to the tool
12 | ]
13 | 
14 | load PIPELINE_ROOT + "/modules/scRNAseq/wnn.groovy"
15 | 
16 | // expected parameter types
17 | class wnn_vars_schema {
18 |     String outdir             
19 |     String project           
20 |     String res           
21 |     String knn             
22 |     String knnRange             
23 |     String clusterAlg             
24 |     String clusterRes           
25 |     String skipFirstLSIcomp           
26 |     Boolean batchCorrection             
27 |     String extra              
28 | 
29 |     // check for the presence of mandatory params
30 |     boolean asBoolean() {
31 |       outdir && project && res
32 |     }
33 | }
34 | 
35 | validate_schema(wnn_vars_schema, wnn_vars)
36 | 


--------------------------------------------------------------------------------
/modules/smallRNAseq/bowtie1.header:
--------------------------------------------------------------------------------
 1 | bowtie1_sRNA_vars=[
 2 |     mapped          : MAPPED,               // output dir
 3 |     logdir          : LOGS_MY + "/mapping", // mapping logs
 4 |     threads         : Integer.toString(ESSENTIAL_THREADS), // threads to use
 5 |     samtools_threads: Integer.toString(ESSENTIAL_THREADS),
 6 |     ref             : ESSENTIAL_BOWTIE_REF, // prefix of the bowtie reference genome
 7 |     mm              : "1",                  // number of mismatches allowed
 8 |     multireport     : "1",                  // if a read has more than <int> reportable alignments, one is reported at random.
 9 |     best            : true,                 // bowtie best mode (implies --best --strata --tryhard). Doesn't apply to PE
10 |     quals           : "--phred33-quals",    // phred33-quals. Use --phred64-quals for old sequencing runs
11 |     extra           : ""
12 | ]
13 | 
14 | load PIPELINE_ROOT + "/modules/smallRNAseq/bowtie1.groovy"
15 | 
16 | // expected parameter types
17 | class bowtie1_vars_schema {
18 |     String mapped
19 |     String logdir
20 |     String threads
21 |     String samtools_threads
22 |     String ref
23 |     String mm
24 |     String multireport
25 |     Boolean best
26 |     String quals
27 |     String extra
28 | 
29 |     // check for the presence of mandatory params
30 |     boolean asBoolean() {
31 |       mapped && ref
32 |     }
33 | }
34 | 
35 | validate_schema(bowtie1_vars_schema, bowtie1_sRNA_vars)
36 | 


--------------------------------------------------------------------------------
/modules/smallRNAseq/dedup.header:
--------------------------------------------------------------------------------
1 | FilterDuplicates_vars=[
2 |     outdir: TRIMMED,
3 |     logdir: LOGS_MY + "/removeDup"
4 | ]
5 | 
6 | load PIPELINE_ROOT + "/modules/smallRNAseq/dedup.groovy"
7 | 
8 | 


--------------------------------------------------------------------------------
/modules/smallRNAseq/fastq_quality_filter.groovy:
--------------------------------------------------------------------------------
 1 | FastQQualityFilter = {
 2 |     doc title: "Remove sequences",
 3 |         desc:  "filter reads containing low-quality (Phred score below 20) bases in order to facilitate the PCR duplicates removal.",
 4 |         constraints: "Only supports compressed FASTQ files",
 5 |         author: "Antonio Domingues, Anke Busch"
 6 | 
 7 |     output.dir = FastQQualityFilter_vars.outdir
 8 | 
 9 |     // create the log folder if it doesn't exists
10 |     def FASTQ_QUALITY_FILTER_LOGDIR = new File(FastQQualityFilter_vars.logdir)
11 |     if (!FASTQ_QUALITY_FILTER_LOGDIR.exists()) {
12 |         FASTQ_QUALITY_FILTER_LOGDIR.mkdirs()
13 |     }
14 | 
15 |     def FASTQ_QUALITY_FILTER_FLAGS=
16 |         (FastQQualityFilter_vars.min_qual    ? " -q " + FastQQualityFilter_vars.min_qual    : "") +
17 |         (FastQQualityFilter_vars.min_percent ? " -p " + FastQQualityFilter_vars.min_percent : "") +
18 |         (FastQQualityFilter_vars.qual_format ? " -Q " + FastQQualityFilter_vars.qual_format : "") +
19 |         (FastQQualityFilter_vars.extra       ? " "    + FastQQualityFilter_vars.extra       : "")
20 | 
21 |     def TOOL_ENV = prepare_tool_env("fastx", tools["fastx"]["version"], tools["fastx"]["runenv"])
22 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
23 | 
24 |     transform(".fastq.gz") to (".highQ.fastq.gz") {
25 |         def SAMPLENAME = input.prefix.prefix    
26 |         exec """
27 |             ${TOOL_ENV} &&
28 |             ${PREAMBLE} &&
29 | 
30 |             SAMPLENAME_BASE=\$(basename ${SAMPLENAME}) &&
31 |             zcat $input | fastq_quality_filter $FASTQ_QUALITY_FILTER_FLAGS -o $output 2>&1 >> ${FastQQualityFilter_vars.logdir}/\${SAMPLENAME_BASE}.fastq_quality_filter.log
32 |         ""","FastQQualityFilter"
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/modules/smallRNAseq/fastq_quality_filter.header:
--------------------------------------------------------------------------------
 1 | FastQQualityFilter_vars=[
 2 |     outdir     : TRIMMED,
 3 |     logdir     : LOGS_MY + "/filterQuality",
 4 |     min_qual   : ESSENTIAL_MINIMAL_QUAL,  // minimal quality of bases in reads to be kept
 5 |     min_percent: "100", // percentage of bases fulfilling the minimal quality requirement
 6 |     qual_format: "33",  // format of the quality scores
 7 |     extra      : "-v -z"
 8 | ]
 9 | 
10 | load PIPELINE_ROOT + "/modules/smallRNAseq/fastq_quality_filter.groovy"
11 | 
12 | 


--------------------------------------------------------------------------------
/modules/smallRNAseq/filter2htseq.groovy:
--------------------------------------------------------------------------------
 1 | filter2htseq = {
 2 |     doc title: "filter2htseq",
 3 |         desc: "filter featureCount output to fit HTSeq format, extract column 1 and 7 as well as skipping the header",
 4 |         constraints: "none.",
 5 |         author: "Oliver Drechsel, Antonio Domingues, Anke Busch"
 6 | 
 7 |     var subdir : ""
 8 |     output.dir = filter2htseq_vars.outdir + "/$subdir"
 9 | 
10 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
11 | 
12 |     transform(".raw_readcounts.tsv") to (".readcounts.tsv") {
13 |         exec """
14 |             ${PREAMBLE} &&
15 | 
16 |             tail -n +3 $input | awk '{print \$1\"\\t\"\$7}' > $output
17 |         ""","filter2htseq"
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/modules/smallRNAseq/filter2htseq.header:
--------------------------------------------------------------------------------
1 | filter2htseq_vars=[
2 |     outdir: RESULTS + "/subread-count"
3 | ]
4 | 
5 | load PIPELINE_ROOT + "/modules/smallRNAseq/filter2htseq.groovy"
6 | 
7 | 


--------------------------------------------------------------------------------
/modules/smallRNAseq/filter_smallrna_counts.groovy:
--------------------------------------------------------------------------------
 1 | filter_smallRNA_counts = {
 2 |     doc title: "filter_smallRNA_counts",
 3 |         desc:  "Extract count of a selected type of smallRNAs to separate count table files",
 4 |         constraints: "based on subread (featurecounts) run",
 5 |         author: "Anke Busch"
 6 | 
 7 |     var subdir : ""
 8 |     output.dir = filter_smallRNA_counts_vars.outdir + "/$subdir"
 9 | 
10 |     def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"])
11 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
12 | 
13 |     transform(".readcounts.tsv") to ("." + filter_smallRNA_counts_vars.smallrna + ".readcounts.tsv") {
14 | 
15 |         exec """
16 | 
17 |             ${TOOL_ENV} &&
18 |             ${PREAMBLE} &&
19 | 
20 |             Rscript ${PIPELINE_ROOT}/tools/smallRNA_BCF/extract_smallRNA.R gtf=$filter_smallRNA_counts_vars.genesgtf input=$input outdir=$output.dir type=$filter_smallRNA_counts_vars.type smallrna=$filter_smallRNA_counts_vars.smallrna
21 | 
22 |         ""","filter_smallRNA_counts"
23 |     }
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/modules/smallRNAseq/filter_smallrna_counts.header:
--------------------------------------------------------------------------------
 1 | filter_smallRNA_counts_vars=[
 2 |     outdir            : RESULTS + "/subread-count",
 3 |     genesgtf          : ESSENTIAL_GENESGTF,
 4 |     type              : ESSENTIAL_FEATURETYPE,      // describes type of RNA in gtf, gencode uses gene_type, ensembl uses gene_biotype
 5 |     smallrna          : ESSENTIAL_SMALLRNA          // type of smallRNA to be analyzed 
 6 | ]
 7 | 
 8 | load PIPELINE_ROOT + "/modules/smallRNAseq/filter_smallrna_counts.groovy"
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/modules/smallRNAseq/mirDeep2.groovy:
--------------------------------------------------------------------------------
 1 | miRDeep2 = {
 2 |     doc title: "miRDeep2",
 3 |         desc:  """Quantification of miRNAs performed in 2 steps: (1) Processes reads and mappping to the reference genome; (2) quantification of miRNA expression. This is step 2""",
 4 |         constraints: "Requires mirDeep2.",
 5 |         author: "Antonio Domingues, Anke Busch"
 6 | 
 7 |     def EXP = input1.split("/")[-1].replaceAll(".arf", "")
 8 |     output.dir = miRDeep2_vars.outdir + "/" + EXP
 9 | 
10 |     def TOOL_ENV = prepare_tool_env("mirdeep2", tools["mirdeep2"]["version"], tools["mirdeep2"]["runenv"])
11 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
12 | 
13 |     transform(".arf", ".fa") to (".tmp") {
14 |         exec """
15 |             ${TOOL_ENV} &&
16 |             ${PREAMBLE} &&
17 | 
18 |             reads_fa=`realpath $input2`;
19 |             genome_fa=`realpath $miRDeep2_vars.genome_seq`;
20 |             reads_vs_genome_arf=`realpath $input1`;
21 |             mautre_ref_miRNAs_fa=`realpath $miRDeep2_vars.mature_mirna`;
22 |             mature_other_miRNAs_fa="none";
23 |             hairpin_ref_miRNAs=`realpath $miRDeep2_vars.hairpin_mirna`;
24 | 
25 |             mkdir -p $output.dir &&
26 |             cd $output.dir &&
27 | 
28 |             miRDeep2.pl \$reads_fa \$genome_fa \$reads_vs_genome_arf \$mautre_ref_miRNAs_fa \$mature_other_miRNAs_fa \$hairpin_ref_miRNAs -t $miRDeep2_vars.species -c -d -v -r ${EXP} -z ".${EXP}" 2> ${output.dir}/${EXP}.report.log &&
29 |             touch \$(basename $output)
30 |         ""","miRDeep2"
31 |     }
32 | }
33 | 


--------------------------------------------------------------------------------
/modules/smallRNAseq/mirDeep2.header:
--------------------------------------------------------------------------------
 1 | miRDeep2_vars=[
 2 |     outdir       : RESULTS + "/miRDeep2",
 3 |     genome_seq   : ESSENTIAL_GENOME_REF,
 4 |     mature_mirna : ESSENTIAL_MATURE_MIRNA,
 5 |     hairpin_mirna: ESSENTIAL_HAIRPIN_MIRNA,
 6 |     species      : ESSENTIAL_SPECIES
 7 | ]
 8 | 
 9 | load PIPELINE_ROOT + "/modules/smallRNAseq/mirDeep2.groovy"
10 | 
11 | 


--------------------------------------------------------------------------------
/modules/smallRNAseq/mirDeep2_mapper.groovy:
--------------------------------------------------------------------------------
 1 | miRDeep2Mapper = {
 2 |     doc title: "miRDeep2",
 3 |         desc:  "Quantification of miRNAs performed in 2 steps: (1) Processes reads and mappping to the reference genome; (2) quantification of miRNA expression.",
 4 |         constraints: "Requires mirDeep2.",
 5 |         author: "Antonio Domingues, Anke Busch"
 6 | 
 7 |     output.dir = miRDeep2Mapper_vars.outdir
 8 | 
 9 |     def MIRDEEP2MAPPER_FLAGS=
10 |         (miRDeep2Mapper_vars.genome_ref ? " -p " + miRDeep2Mapper_vars.genome_ref : "") +
11 |         (miRDeep2Mapper_vars.extra      ? " "    + miRDeep2Mapper_vars.extra      : "")
12 | 
13 |     def TOOL_ENV = prepare_tool_env("mirdeep2", tools["mirdeep2"]["version"], tools["mirdeep2"]["runenv"])
14 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
15 | 
16 |     transform(".fastq.gz") to (".arf", ".fa") {
17 |         exec """
18 |             ${TOOL_ENV} &&
19 |             ${PREAMBLE} &&
20 | 
21 |             x="\${TMP}/\$(basename $input.prefix)" &&
22 |             gzip -cd $input > \$x &&
23 |             mapper.pl \$x $MIRDEEP2MAPPER_FLAGS -s $output2 -t $output1 &> ${output2.prefix}.mapper.log &&
24 |             rm \$x
25 |         ""","miRDeep2Mapper"
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/modules/smallRNAseq/mirDeep2_mapper.header:
--------------------------------------------------------------------------------
 1 | miRDeep2Mapper_vars=[
 2 |     outdir    : RESULTS + "/miRDeep2",
 3 |     genome_ref: ESSENTIAL_BOWTIE_REF,
 4 |     extra     : "-e " + // please, add a description for this parm
 5 |                 "-h " + // please, add a description for this parm
 6 |                 "-m " + // please, add a description for this parm
 7 |                 "-i " + // please, add a description for this parm
 8 |                 "-j " + // please, add a description for this parm
 9 |                 "-o 8"  // please, add a description for this parm
10 | ]
11 | 
12 | load PIPELINE_ROOT + "/modules/smallRNAseq/mirDeep2_mapper.groovy"
13 | 
14 | 


--------------------------------------------------------------------------------
/modules/smallRNAseq/subread.groovy:
--------------------------------------------------------------------------------
 1 | subread_count = {
 2 |     doc title: "subread_count",
 3 |         desc:  "Counting reads in features with feature-count out of the subread package",
 4 |         constraints: "Default: strand specific counting.",
 5 |         author: "Oliver Drechsel, Antonio Domingues, Anke Busch"
 6 | 
 7 |     var subdir : ""
 8 |     output.dir = subread_count_vars.outdir + "/$subdir"
 9 | 
10 |     def SUBREAD_FLAGS =
11 |         "--donotsort " +
12 |         (subread_count_vars.threads  ? " -T " + subread_count_vars.threads  : "") +
13 |         (subread_count_vars.genesgtf ? " -a " + subread_count_vars.genesgtf : "") +
14 |         (subread_count_vars.count_multimapping ? " -M "                     : "") +
15 |         (subread_count_vars.count_ambiguous    ? " -O "                     : "") +
16 |         (subread_count_vars.feature  ? " -t " + subread_count_vars.feature  : "") +
17 |         (subread_count_vars.attribute? " -g " + subread_count_vars.attribute: "") +
18 |         (subread_count_vars.extra    ? " "    + subread_count_vars.extra    : "") +
19 |         (subread_count_vars.stranded == "no" ? " -s0 " : (subread_count_vars.stranded == "yes" ? " -s1 " : " -s2 "))
20 |     
21 |     def TOOL_ENV = prepare_tool_env("subread", tools["subread"]["version"], tools["subread"]["runenv"])
22 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
23 | 
24 |     // run the chunk
25 |     transform(".bam") to (".raw_readcounts.tsv") {
26 |         exec """
27 |             ${TOOL_ENV} &&
28 |             ${PREAMBLE} &&
29 | 
30 |             featureCounts $SUBREAD_FLAGS -o $output $input 2> ${output.prefix}_subreadlog.stderr
31 |         ""","subread_count"
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/modules/smallRNAseq/subread.header:
--------------------------------------------------------------------------------
 1 | subread_count_vars=[
 2 |     outdir            : RESULTS + "/subread-count",
 3 |     stranded          : ESSENTIAL_STRANDED, // whether the data is from a strand-specific assay (illumina SR: always reverse)
 4 |     count_multimapping: true,               // also count multi-mapping reads
 5 |     count_ambiguous   : true,               // also count ambiguous reads
 6 |     feature           : "exon",             // annotation feature to count mapped reads ("exon" by default)
 7 |     attribute         : "gene_id",          // annotation attribute type used to group features ("gene_id" by default) 
 8 |     genesgtf          : ESSENTIAL_GENESGTF,
 9 |     threads           : Integer.toString(ESSENTIAL_THREADS),
10 |     extra             : " "            // extra params to sent to the tool
11 | ]
12 | 
13 | load PIPELINE_ROOT + "/modules/smallRNAseq/subread.groovy"
14 | 
15 | // expected parameter types
16 | class subread_count_vars_schema {
17 |     String outdir  
18 |     String stranded
19 |     Boolean count_multimapping   
20 |     Boolean count_ambiguous   
21 |     String feature   
22 |     String attribute      
23 |     String genesgtf   
24 |     String threads    
25 |     String extra    
26 | 
27 |     // check for the presence of mandatory params
28 |     boolean asBoolean() {
29 |       outdir && genesgtf && stranded
30 |     }
31 | }
32 | 
33 | validate_schema(subread_count_vars_schema, subread_count_vars)
34 | 


--------------------------------------------------------------------------------
/modules/smallRNAseq/subread2rnatypes.header:
--------------------------------------------------------------------------------
 1 | subread2rnatypes_vars=[
 2 |     outdir    : QC + "/RNAtypes",
 3 |     stranded  : ESSENTIAL_STRANDED,              //whether the data is from a strand-specific assay (illumina SR: always reverse)
 4 |     paired    : (ESSENTIAL_PAIRED == "yes"),     //paired end design
 5 |     genesgtf  : ESSENTIAL_GENESGTF,
 6 |     feature   : "exon",                          // type of feature that is to be counted in
 7 |     accumulate: ESSENTIAL_FEATURETYPE,           // type of annotation counts should be accumulated on. Usually that would be gene_id, but in this case we choose gene_biotype
 8 |     threads   : Integer.toString(ESSENTIAL_THREADS),
 9 |     extra     : "-M -O "                         // extra parms to sent to the tool (-M also count multi-mapping reads, -O also count ambigiuous reads)
10 | ]
11 | 
12 | load PIPELINE_ROOT + "/modules/RNAseq/subread2rnatypes.groovy"
13 | 
14 | 


--------------------------------------------------------------------------------
/modules/smallRNAseq/subread_mirnamature.groovy:
--------------------------------------------------------------------------------
 1 | subread_miRNAmature_count = {
 2 |     doc title: "subread_miRNAmature_count",
 3 |         desc:  "Counting reads on mature miRNAs with featurecount of the subread package",
 4 |         constraints: "miRNA gff (from miRBase) needs to be available.",
 5 |         author: "Anke Busch"
 6 | 
 7 |     var subdir : ""
 8 |     output.dir = subread_miRNAmature_count_vars.outdir + "/$subdir"
 9 | 
10 |     def SUBREAD_MIRNAMATURE_FLAGS =
11 |         "--donotsort " +
12 |         (subread_miRNAmature_count_vars.threads  ? " -T " + subread_miRNAmature_count_vars.threads  : "") +
13 |         (subread_miRNAmature_count_vars.genesgff ? " -a " + subread_miRNAmature_count_vars.genesgff : "") +
14 |         (subread_miRNAmature_count_vars.count_multimapping ? " -M "                     : "") +
15 |         (subread_miRNAmature_count_vars.feature  ? " -t " + subread_miRNAmature_count_vars.feature  : "") +
16 |         (subread_miRNAmature_count_vars.attribute? " -g " + subread_miRNAmature_count_vars.attribute: "") +
17 |         (subread_miRNAmature_count_vars.extra    ? " "    + subread_miRNAmature_count_vars.extra    : "") +
18 |         (subread_miRNAmature_count_vars.stranded == "no" ? " -s0 " : (subread_miRNAmature_count_vars.stranded == "yes" ? " -s1 " : " -s2 "))
19 |     
20 |     def TOOL_ENV = prepare_tool_env("subread", tools["subread"]["version"], tools["subread"]["runenv"])
21 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
22 | 
23 |     // run the chunk
24 |     transform(".bam") to (".miRNAmature.raw_readcounts.tsv") {
25 |         exec """
26 |             ${TOOL_ENV} &&
27 |             ${PREAMBLE} &&
28 | 
29 |             featureCounts $SUBREAD_MIRNAMATURE_FLAGS -o $output $input 2> ${output.prefix}_subreadlog.stderr
30 |         ""","subread_miRNAmature_count"
31 |     }
32 | }
33 | 


--------------------------------------------------------------------------------
/modules/smallRNAseq/subread_mirnamature.header:
--------------------------------------------------------------------------------
 1 | subread_miRNAmature_count_vars=[
 2 |     outdir            : RESULTS + "/subread-count",
 3 |     stranded          : ESSENTIAL_STRANDED, // whether the data is from a strand-specific assay (illumina SR: always reverse)
 4 |     count_multimapping: true,               // also count multi-mapping reads
 5 |     feature           : "miRNA",            // annotation feature to count mapped reads ("exon" by default)
 6 |     attribute         : "Name",             // annotation attribute type used to group features ("gene_id" by default)
 7 |     genesgff          : ESSENTIAL_MIRNAGFF,
 8 |     threads           : Integer.toString(ESSENTIAL_THREADS),
 9 |     extra             : " "            // extra params to sent to the tool
10 | ]
11 | 
12 | load PIPELINE_ROOT + "/modules/smallRNAseq/subread_mirnamature.groovy"
13 | 
14 | // expected parameter types
15 | class subread_miRNAmature_count_vars_schema {
16 |     String outdir  
17 |     String stranded
18 |     Boolean count_multimapping   
19 |     String feature   
20 |     String attribute      
21 |     String genesgff   
22 |     String threads    
23 |     String extra    
24 | 
25 |     // check for the presence of mandatory params
26 |     boolean asBoolean() {
27 |       outdir && genesgff && stranded
28 |     }
29 | }
30 | 
31 | validate_schema(subread_miRNAmature_count_vars_schema, subread_miRNAmature_count_vars)
32 | 


--------------------------------------------------------------------------------
/modules/smallRNAseq/trim_umis.groovy:
--------------------------------------------------------------------------------
 1 | TrimUMIs = {
 2 |     doc title: "Trim UMIs",
 3 |         desc:  """Trims random barcodes that help in the identification of PCR duplicates and are in adapter-removed reads: NNNN-insert-NNNN.""",
 4 |         constraints: "Requires seqtk.",
 5 |         author: "Antonio Domingues, Anke Busch"
 6 | 
 7 |     output.dir = TrimUMIs_vars.outdir
 8 | 
 9 |     def TRIMFQ_FLAGS =
10 |         (TrimUMIs_vars.left_trim  ? " -b " + TrimUMIs_vars.left_trim  : "") +
11 |         (TrimUMIs_vars.right_trim ? " -e " + TrimUMIs_vars.right_trim : "")
12 | 
13 |     def TOOL_ENV = prepare_tool_env("seqtk", tools["seqtk"]["version"], tools["seqtk"]["runenv"])
14 |     def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName())
15 | 
16 |     transform(".fastq.gz") to (".trimmed.fastq.gz") {
17 |         exec """
18 |             ${TOOL_ENV} &&
19 |             ${PREAMBLE} &&
20 | 
21 |             seqtk trimfq $TRIMFQ_FLAGS $input | gzip > $output
22 |         ""","TrimUMIs"
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/modules/smallRNAseq/trim_umis.header:
--------------------------------------------------------------------------------
1 | TrimUMIs_vars=[
2 |     outdir    : TRIMMED,
3 |     left_trim : ESSENTIAL_UMI_LENGTH_LEFT,
4 |     right_trim: ESSENTIAL_UMI_LENGTH_RIGHT
5 | ]
6 | 
7 | load PIPELINE_ROOT + "/modules/smallRNAseq/trim_umis.groovy"
8 | 
9 | 


--------------------------------------------------------------------------------
/pipelines/ChIPseq/contrasts_diffbind.txt:
--------------------------------------------------------------------------------
1 | contrast.name	contrast	mmatrix	sub_experiment
2 | mutvswt	(mut_Pol2-wt_Pol2)	~group	1
3 | 


--------------------------------------------------------------------------------
/pipelines/ChIPseq/targets.txt:
--------------------------------------------------------------------------------
1 | IP	IPname	INPUT	INPUTname	group	Replicate	PeakCaller
2 | wt_Pol2_1	wt_Pol2_1	wt_Input_1	wt_Input_1	wt_Pol2	1	macs
3 | wt_Pol2_2	wt_Pol2_2	wt_Input_2	wt_Input_2	wt_Pol2	2	macs
4 | mut_Pol2_1	mut_Pol2_1	mut_Input_1	mut_Input_1	mut_Pol2	1	macs
5 | mut_Pol2_2	mut_Pol2_2	mut_Input_2	mut_Input_2	mut_Pol2	2	macs
6 | 


--------------------------------------------------------------------------------
/pipelines/ChIPseq/tools.groovy:
--------------------------------------------------------------------------------
 1 | // Tools custom versions and run environments
 2 | // Overrides the defaults defined in ${PIPELINE_ROOT}/config/tools.groovy
 3 | // Names should match tools_defaults.keys() in ${PIPELINE_ROOT}/config/tools.groovy
 4 | // 
 5 | // The structure of this map is:
 6 | //   tools_custom = [
 7 | //       R       : [ runenv: "lmod", version: "3.6.0" ],
 8 | //    <...>
 9 | //       samtools: [ runenv: "lmod", version: "1.9"   ]
10 | //   ]
11 | //
12 | // Tips:
13 | //   * Indentation is important in this file. Please, use 4 spaces for indent. *NO TABS*.
14 | load PIPELINE_ROOT + "/config/tools.groovy" // tools_defaults are specified here
15 | tools_custom = [ ] 
16 | 
17 | tools = new LinkedHashMap(tools_defaults)   // create new tools map based on defaults
18 | tools.putAll(tools_custom)                  // override with users custom versions/runenvs
19 | 


--------------------------------------------------------------------------------
/pipelines/DNAseq/targets.txt:
--------------------------------------------------------------------------------
1 | sample	file
2 | sample_1	filename1
3 | sample_2	filename2
4 | 
5 | 


--------------------------------------------------------------------------------
/pipelines/DNAseq/tools.groovy:
--------------------------------------------------------------------------------
 1 | // Tools custom versions and run environments
 2 | // Overrides the defaults defined in ${PIPELINE_ROOT}/config/tools.groovy
 3 | // Names should match tools_defaults.keys() in ${PIPELINE_ROOT}/config/tools.groovy
 4 | // 
 5 | // The structure of this map is:
 6 | //   tools_custom = [
 7 | //       R       : [ runenv: "lmod", version: "3.6.0" ],
 8 | //    <...>
 9 | //       samtools: [ runenv: "lmod", version: "1.9"   ]
10 | //   ]
11 | //
12 | // Tips:
13 | //   * Indentation is important in this file. Please, use 4 spaces for indent. *NO TABS*.
14 | load PIPELINE_ROOT + "/config/tools.groovy" // tools_defaults are specified here
15 | tools_custom = [ ] 
16 | 
17 | tools = new LinkedHashMap(tools_defaults)   // create new tools map based on defaults
18 | tools.putAll(tools_custom)                  // override with users custom versions/runenvs
19 | 


--------------------------------------------------------------------------------
/pipelines/RNAseq/contrasts.txt:
--------------------------------------------------------------------------------
1 | contrast.name	contrast	mmatrix
2 | KO.vs.WT	(KO-WT)	~group
3 | 


--------------------------------------------------------------------------------
/pipelines/RNAseq/targets.txt:
--------------------------------------------------------------------------------
1 | sample	file	group	replicate
2 | mut_1	mut_1_f.readcounts.tsv	mut	1
3 | mut_2	mut_2_f.readcounts.tsv	mut	2
4 | wt_1	wt_1_f.readcounts.tsv	wt	1
5 | wt_2	wt_2_f.readcounts.tsv	wt	2
6 | 


--------------------------------------------------------------------------------
/pipelines/RNAseq/tools.groovy:
--------------------------------------------------------------------------------
 1 | // Tools custom versions and run environments
 2 | // Overrides the defaults defined in ${PIPELINE_ROOT}/config/tools.groovy
 3 | // Names should match tools_defaults.keys() in ${PIPELINE_ROOT}/config/tools.groovy
 4 | // 
 5 | // The structure of this map is:
 6 | //   tools_custom = [
 7 | //       R       : [ runenv: "lmod", version: "3.6.0" ],
 8 | //    <...>
 9 | //       samtools: [ runenv: "lmod", version: "1.9"   ]
10 | //   ]
11 | //
12 | // Tips:
13 | //   * Indentation is important in this file. Please, use 4 spaces for indent. *NO TABS*.
14 | load PIPELINE_ROOT + "/config/tools.groovy" // tools_defaults are specified here
15 | tools_custom = [ ] 
16 | 
17 | tools = new LinkedHashMap(tools_defaults)   // create new tools map based on defaults
18 | tools.putAll(tools_custom)                  // override with users custom versions/runenvs
19 | 


--------------------------------------------------------------------------------
/pipelines/RNAseqVariantCalling/README.md:
--------------------------------------------------------------------------------
 1 | **Important NOTE: this is a legacy pipeline using GATK3, which is now obsolete. There's no plans to upgrade the pipeline to GATK4, and it is actually unsupported by BCF. Use the pipeline at your own risk.**
 2 | 
 3 | # SNP calling on RNAseq data pipeline
 4 | 
 5 | ### What it will it do
 6 | 
 7 | Heavily undocumented, but it will identify SNP from RNA-seq data using GATK best practices. Features a 2-step mapping approach with STAR. 
 8 | 
 9 | To be Documented at a later stage.
10 | 
11 | ### Prerequisites
12 | 
13 | - picard tools (> 1.119)
14 | - GenomeAnalysisTK version 3. **Please note it is obsolete**
15 | 


--------------------------------------------------------------------------------
/pipelines/RNAseqVariantCalling/essential.vars.groovy:
--------------------------------------------------------------------------------
 1 | //Pipeline GATK RNA-seq variant calling
 2 | ESSENTIAL_PROJECT="/local/scratch1/imb-kettinggr/adomingues/projects/snps-splicing"
 3 | ESSENTIAL_STAR_REF="/fsimb/groups/imb-kettinggr/genomes/Danio_rerio/Ensembl/Zv10/Sequence/StarIndex2_4_1d_modified/"
 4 | ESSENTIAL_GENOME_REF="/fsimb/groups/imb-kettinggr/genomes/Danio_rerio/Ensembl/Zv10/Sequence/chr_sequences/chr.clean.fa"
 5 | ESSENTIAL_VCF_REF="/home/adomingu/imb-kettinggr/genomes/Danio_rerio/Ensembl/Zv10/Annotation/variation/Danio_rerio.vcf.gz"
 6 | ESSENTIAL_READLENGTH=101
 7 | ESSENTIAL_THREADS=4
 8 | 
 9 | //global vars that will be reused in some global vars
10 | PROJECT=ESSENTIAL_PROJECT
11 | LOGS=PROJECT + "/logs"
12 | MAPPED=PROJECT + "/mapped"
13 | QC=PROJECT + "data/qc"
14 | REPORTS=PROJECT + "/reports"
15 | RESULTS=PROJECT + "/results"
16 | TMP=PROJECT + "/tmp"
17 | TRACKS=MAPPED + "/tracks"
18 | 
19 | // optional pipeline stages to include
20 | 


--------------------------------------------------------------------------------
/pipelines/RNAseqVariantCalling/rnaseq_variant_calling.pipeline.groovy:
--------------------------------------------------------------------------------
 1 | PIPELINE="RNAseqVariantCalling"
 2 | PIPELINE_VERSION="1.0"
 3 | PIPELINE_ROOT="./NGSpipe2go/" // adjust to your projects needs
 4 | 
 5 | load PIPELINE_ROOT + "/pipelines/RNAseqVariantCalling/essential.vars.groovy"
 6 | load PIPELINE_ROOT + "/pipelines/RNAseqVariantCalling/tools.groovy"
 7 | load PIPELINE_ROOT + "/config/preambles.groovy"
 8 | load PIPELINE_ROOT + "/config/bpipe.config.groovy"
 9 | load PIPELINE_ROOT + "/config/validate_module_params.groovy"
10 | 
11 | load PIPELINE_ROOT + "/modules/NGS/bamindexer.header"
12 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/star1pass.header"
13 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/merge_SJ_tab.header"
14 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/star2pass.header"
15 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/add_read_group.header"
16 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/mark_dups.header"
17 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/splitNcigar.header"
18 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/base_recalibration.header"
19 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/create_star_index_sjdb.header"
20 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/variantCall_HC.header"
21 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/variant_filtration.header"
22 | load PIPELINE_ROOT + "/modules/miscellaneous/collect_tool_versions.header"
23 | 
24 | //MAIN PIPELINE TASK
25 | dontrun = { println "didn't run $module" }
26 | 
27 | Bpipe.run {
28 |     "%R*.fastq.gz" * [ STAR_pe ] +
29 |     "*.SJ.out.tab" * [ FilterAndMergeSJtab + GenerateStarIndexFromSJ ] +
30 |     "%R*.fastq.gz" * [
31 |         STAR_pe_2nd + AddRG + MarkDups + SplitNCigarReads + BaseRecalibration + VariantCallHC + VariantFiltration
32 |     ] + collectToolVersions
33 | }
34 | 


--------------------------------------------------------------------------------
/pipelines/RNAseqVariantCalling/tools.groovy:
--------------------------------------------------------------------------------
 1 | // Tools custom versions and run environments
 2 | // Overrides the defaults defined in ${PIPELINE_ROOT}/config/tools.groovy
 3 | // Names should match tools_defaults.keys() in ${PIPELINE_ROOT}/config/tools.groovy
 4 | // 
 5 | // The structure of this map is:
 6 | //   tools_custom = [
 7 | //       R       : [ runenv: "lmod", version: "3.6.0" ],
 8 | //    <...>
 9 | //       samtools: [ runenv: "lmod", version: "1.9"   ]
10 | //   ]
11 | //
12 | // Tips:
13 | //   * Indentation is important in this file. Please, use 4 spaces for indent. *NO TABS*.
14 | load PIPELINE_ROOT + "/config/tools.groovy" // tools_defaults are specified here
15 | tools_custom = [
16 |   gatk: [ runenv: "lmod", version: "3.4-46" ]
17 | ]
18 | 
19 | tools = new LinkedHashMap(tools_defaults)   // create new tools map based on defaults
20 | tools.putAll(tools_custom)                  // override with users custom versions/runenvs
21 | 


--------------------------------------------------------------------------------
/pipelines/breaktag/breaktag.pipeline.groovy:
--------------------------------------------------------------------------------
 1 | PIPELINE="breaktag"
 2 | PIPELINE_VERSION="1.0.0"
 3 | PIPELINE_ROOT="./NGSpipe2go"
 4 | 
 5 | load PIPELINE_ROOT + "/pipelines/breaktag/essential.vars.groovy"
 6 | load PIPELINE_ROOT + "/pipelines/breaktag/tools.groovy"
 7 | load PIPELINE_ROOT + "/config/preambles.groovy"
 8 | load PIPELINE_ROOT + "/config/bpipe.config.groovy"
 9 | load PIPELINE_ROOT + "/config/validate_module_params.groovy"
10 | 
11 | load PIPELINE_ROOT + "/modules/breaktag/pattern_filtering.header"
12 | load PIPELINE_ROOT + "/modules/breaktag/bwa.header"
13 | load PIPELINE_ROOT + "/modules/breaktag/count_breaks.header"
14 | load PIPELINE_ROOT + "/modules/breaktag/count_breaks_strandless.header"
15 | load PIPELINE_ROOT + "/modules/breaktag/collect_stats.header"
16 | load PIPELINE_ROOT + "/modules/NGS/fastqc.header"
17 | load PIPELINE_ROOT + "/modules/NGS/multiqc.header"
18 | load PIPELINE_ROOT + "/modules/miscellaneous/collect_tool_versions.header"
19 | 
20 | //MAIN PIPELINE TASK
21 | dontrun = { println "didn't run $module" }
22 | collect_bams = { forward inputs.bam }
23 | 
24 | Bpipe.run {
25 |   (RUN_IN_PAIRED_END_MODE ? "%.R*.fastq.gz" : "%.fastq.gz") * [
26 |     FastQC,
27 |     pattern_filtering +
28 |     bwa +
29 |     count_breaks +
30 |     count_breaks_strandless +
31 |     collect_stats
32 |   ] +
33 |   collectToolVersions +
34 |   MultiQC
35 | }
36 | 


--------------------------------------------------------------------------------
/pipelines/breaktag/essential.vars.groovy:
--------------------------------------------------------------------------------
 1 | // breaktag ESSENTIAL VARIABLES
 2 | 
 3 | // Define essential variables here.
 4 | // Further module-specific variables can be adjusted in the corresponding ".header" files for each module.
 5 | //
 6 | 
 7 | // General parameters
 8 | ESSENTIAL_PROJECT="/fsimb/groups/imb-bioinfocf/projects/roukos/imb_roukos_2021_29_longo_breaktag_novogene/ngspipe2go"
 9 | ESSENTIAL_SAMPLE_PREFIX="" 
10 | ESSENTIAL_THREADS=16
11 | 
12 | // Mapping parameters
13 | ESSENTIAL_BWA_REF="/fsimb/common/genomes/homo_sapiens/ucsc/hg38/canonical/index/bwa/hg38.fa"
14 | ESSENTIAL_PAIRED="yes"        // paired end design
15 | ESSENTIAL_QUALITY=60          // min mapping quality of reads to be kept. Defaults to 60
16 | 
17 | // further optional pipeline stages to include
18 | RUN_IN_PAIRED_END_MODE=(ESSENTIAL_PAIRED == "yes")
19 | 
20 | // project folders
21 | PROJECT=ESSENTIAL_PROJECT
22 | LOGS=PROJECT + "/logs"
23 | MAPPED=PROJECT + "/mapped"
24 | QC=PROJECT + "/qc"
25 | RAWDATA=PROJECT + "/rawdata"
26 | REPORTS=PROJECT + "/reports"
27 | RESULTS=PROJECT + "/results"
28 | TMP=PROJECT + "/tmp"
29 | TRACKS=PROJECT + "/tracks"
30 | TARGETS=PROJECT + "/targets.txt"
31 | 
32 | 


--------------------------------------------------------------------------------
/pipelines/breaktag/tools.groovy:
--------------------------------------------------------------------------------
 1 | // Tools custom versions and run environments
 2 | // Overrides the defaults defined in ${PIPELINE_ROOT}/config/tools.groovy
 3 | // Names should match tools_defaults.keys() in ${PIPELINE_ROOT}/config/tools.groovy
 4 | // 
 5 | // The structure of this map is:
 6 | //   tools_custom = [
 7 | //       R       : [ runenv: "lmod", version: "3.6.0" ],
 8 | //    <...>
 9 | //       samtools: [ runenv: "lmod", version: "1.9"   ]
10 | //   ]
11 | //
12 | // Tips:
13 | //   * Indentation is important in this file. Please, use 4 spaces for indent. *NO TABS*.
14 | load PIPELINE_ROOT + "/config/tools.groovy" // tools_defaults are specified here
15 | tools_custom = [
16 | //  fastqc : [ runenv: "lmod", version: "0.11.8" ]
17 | ] 
18 | 
19 | tools = new LinkedHashMap(tools_defaults)   // create new tools map based on defaults
20 | tools.putAll(tools_custom)                  // override with users custom versions/runenvs
21 | 


--------------------------------------------------------------------------------
/pipelines/scRNAseq/contrasts.txt:
--------------------------------------------------------------------------------
1 | contrast.name	contrast
2 | Treatment1.vs.Ctrl	(Treat1-Ctrl)
3 | Treatment2.vs.Ctrl	(Treat2-Ctrl)
4 | 


--------------------------------------------------------------------------------
/pipelines/scRNAseq/targets.txt:
--------------------------------------------------------------------------------
1 | sample	file	group	replicate
2 | S1	org1_1	organoid1	1
3 | S3	org3_1	organoid3	1
4 | 


--------------------------------------------------------------------------------
/pipelines/scRNAseq/tenXatac.pipeline.groovy:
--------------------------------------------------------------------------------
 1 | PIPELINE="tenXatac"
 2 | PIPELINE_VERSION="1.0"
 3 | PIPELINE_ROOT="./NGSpipe2go/"    // may need adjustment for some projects
 4 | 
 5 | load PIPELINE_ROOT + "/pipelines/scRNAseq/essential.vars.groovy"
 6 | load PIPELINE_ROOT + "/pipelines/scRNAseq/tools.groovy"
 7 | load PIPELINE_ROOT + "/config/preambles.groovy"
 8 | load PIPELINE_ROOT + "/config/bpipe.config.groovy"
 9 | load PIPELINE_ROOT + "/config/validate_module_params.groovy"
10 | 
11 | load PIPELINE_ROOT + "/modules/scRNAseq/cellrangeratac_count.header"
12 | load PIPELINE_ROOT + "/modules/scRNAseq/cellrangeratac_aggr.header"
13 | load PIPELINE_ROOT + "/modules/NGS/bamcoverage.header"
14 | load PIPELINE_ROOT + "/modules/NGS/bamindexer.header"
15 | load PIPELINE_ROOT + "/modules/NGS/fastqc.header"
16 | load PIPELINE_ROOT + "/modules/NGS/fastqscreen.header"
17 | load PIPELINE_ROOT + "/modules/NGS/markdups2.header"
18 | load PIPELINE_ROOT + "/modules/NGS/insertsize.header"
19 | load PIPELINE_ROOT + "/modules/NGS/cutadapt.header"
20 | load PIPELINE_ROOT + "/modules/miscellaneous/collect_tool_versions.header"
21 | load PIPELINE_ROOT + "/modules/scRNAseq/shinyreports.header"
22 | load PIPELINE_ROOT + "/modules/NGS/multiqc.header"
23 | 
24 | 
25 | dontrun = { println "didn't run $module" }
26 | 
27 | Bpipe.run { 
28 |     "%.fastq.gz" * [ FastQC + FastqScreen +
29 |       (RUN_CUTADAPT ? Cutadapt + FastQC.using(subdir:"trimmed") : dontrun.using(module:"Cutadapt")) ] + 
30 |       "%_S*_L*_R*_001.fastq.gz" * [
31 |          cellrangeratac_count + [
32 |             bamCoverage,
33 |             InsertSize
34 |          ]
35 |     ] + 
36 |     cellrangeratac_aggr +
37 |     (RUN_TRACKHUB ? trackhub_config + trackhub : dontrun.using(module:"trackhub")) +
38 |     collectToolVersions + MultiQC + shinyReports
39 | }
40 | 
41 | 


--------------------------------------------------------------------------------
/pipelines/scRNAseq/tools.groovy:
--------------------------------------------------------------------------------
 1 | // Tools custom versions and run environments
 2 | // Overrides the defaults defined in ${PIPELINE_ROOT}/config/tools.groovy
 3 | // Names should match tools_defaults.keys() in ${PIPELINE_ROOT}/config/tools.groovy
 4 | // 
 5 | // The structure of this map is:
 6 | //   tools_custom = [
 7 | //       R       : [ runenv: "lmod", version: "3.6.0" ],
 8 | //    <...>
 9 | //       samtools: [ runenv: "lmod", version: "1.9"   ]
10 | //   ]
11 | //
12 | // Tips:
13 | //   * Indentation is important in this file. Please, use 4 spaces for indent. *NO TABS*.
14 | load PIPELINE_ROOT + "/config/tools.groovy" // tools_defaults are specified here
15 | tools_custom = [ ] 
16 | 
17 | tools = new LinkedHashMap(tools_defaults)   // create new tools map based on defaults
18 | tools.putAll(tools_custom)                  // override with users custom versions/runenvs
19 | 


--------------------------------------------------------------------------------
/pipelines/smallRNAseq/contrasts.txt:
--------------------------------------------------------------------------------
1 | contrast.name	contrast	mmatrix
2 | KO.vs.WT	(KO-WT)	~group
3 | 


--------------------------------------------------------------------------------
/pipelines/smallRNAseq/targets.txt:
--------------------------------------------------------------------------------
1 | sample	file	group	replicate
2 | WT_1	datasetID_WT_1	WT	1
3 | WT_2	datasetID_WT_2	WT	2
4 | KO_1	datasetID_KO_1	KO	1
5 | KO_2	datasetID_KO_2	KO	2
6 | 


--------------------------------------------------------------------------------
/pipelines/smallRNAseq/tools.groovy:
--------------------------------------------------------------------------------
 1 | // Tools custom versions and run environments
 2 | // Overrides the defaults defined in ${PIPELINE_ROOT}/config/tools.groovy
 3 | // Names should match tools_defaults.keys() in ${PIPELINE_ROOT}/config/tools.groovy
 4 | // 
 5 | // The structure of this map is:
 6 | //   tools_custom = [
 7 | //       R       : [ runenv: "lmod", version: "3.6.0" ],
 8 | //    <...>
 9 | //       samtools: [ runenv: "lmod", version: "1.9"   ]
10 | //   ]
11 | //
12 | // Tips:
13 | //   * Indentation is important in this file. Please, use 4 spaces for indent. *NO TABS*.
14 | load PIPELINE_ROOT + "/config/tools.groovy" // tools_defaults are specified here
15 | tools_custom = [ 
16 |     bowtie     : [ runenv: "lmod", version: "1.3.1"        ],
17 |     cutadapt   : [ runenv: "lmod", version: "4.0"          ],
18 |     kentutils  : [ runenv: "lmod", version: "v385"         ],
19 |     subread    : [ runenv: "lmod", version: "2.0.0"        ]
20 | ] 
21 | 
22 | tools = new LinkedHashMap(tools_defaults)   // create new tools map based on defaults
23 | tools.putAll(tools_custom)                  // override with users custom versions/runenvs
24 | 


--------------------------------------------------------------------------------
/pipelines/test/README.md:
--------------------------------------------------------------------------------
1 | # test pipeline
2 | Basically does nothing, but serves as a backbone to test the NGSpipe2go modular system
3 | 


--------------------------------------------------------------------------------
/pipelines/test/test.pipeline.groovy:
--------------------------------------------------------------------------------
 1 | PIPELINE="test"
 2 | PIPELINE_VERSION="1.0"
 3 | PIPELINE_ROOT="./NGSpipe2go/"    // may need adjustment for some projects
 4 | 
 5 | // essential vars
 6 | PROJECT="/fsimb/imbc_home/ssayolsp/tmp/test"
 7 | LOGS=PROJECT + "/logs"
 8 | OUT=PROJECT + "/out"
 9 | TMP=PROJECT + "/tmp"
10 | 
11 | // load external things
12 | load PIPELINE_ROOT + "/config/preambles.groovy"
13 | load PIPELINE_ROOT + "/config/bpipe.config.groovy"
14 | load PIPELINE_ROOT + "/config/validate_module_params.groovy"
15 | 
16 | //MAIN PIPELINE TASK
17 | test = { 
18 |   output.dir = OUT
19 |   def branch_outdir = new File(output.dir).getName()
20 | 
21 |   def PREAMBLE = get_preamble(module:"test", branch:branch, branch_outdir:branch_outdir)
22 | 
23 |   transform("*.in") to (".out") {
24 |       exec """
25 |           >&2 echo "before preamble, logs go to the screen";
26 |           ${PREAMBLE} &&
27 |           >&2 echo "after preamble, logs go to the corresponding file";
28 |           cat $input > $output;
29 |       """
30 |   }
31 | }
32 | 
33 | Bpipe.run { "%.in" * [ test ] }
34 | 
35 | 


--------------------------------------------------------------------------------
/resources/IMB_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/imbforge/NGSpipe2go/46d835b6e80c92296a9338addb2215f6b26ed487/resources/IMB_logo.png


--------------------------------------------------------------------------------
/resources/MARS-Seq_protocol_Step-by-Step_MML.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/imbforge/NGSpipe2go/46d835b6e80c92296a9338addb2215f6b26ed487/resources/MARS-Seq_protocol_Step-by-Step_MML.pdf


--------------------------------------------------------------------------------
/resources/NGSpipe2go_scheme.old.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/imbforge/NGSpipe2go/46d835b6e80c92296a9338addb2215f6b26ed487/resources/NGSpipe2go_scheme.old.png


--------------------------------------------------------------------------------
/resources/NGSpipe2go_scheme.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/imbforge/NGSpipe2go/46d835b6e80c92296a9338addb2215f6b26ed487/resources/NGSpipe2go_scheme.png


--------------------------------------------------------------------------------
/testdata/ChIPseq/README.Rmd:
--------------------------------------------------------------------------------
 1 | Obtained from the `chpseqDB` Bioconductor [package](http://bioconductor.org/packages/release/workflows/html/chipseqDB.html)
 2 | Five NF-YA (nuclear transcription factor Y subunit alpha) ChIP-seq libraries, with two biological replicates for murine terminal neurons, two replicates for embryonic stem cells and one input ontrol. This uses single-end sequencing data from accession GSE25532 of the NCBI Gene Expression Omnibus.
 3 | 
 4 | 
 5 | ```{r}
 6 | if(!require(chipseqDBData)) {
 7 |   BiocManager::install("chipseqDBData")
 8 |   require(chipseqDBData)
 9 | }
10 | 
11 | nfyadata <- NFYAData()
12 | nfyadata$name <- gsub("[- ]", "_", gsub("[\\(\\)]", "", nfyadata$Description))
13 | nfyadata$path <- sapply(nfyadata$Path, function(x) x$path)
14 | cat("mkdir ./rawdata", file="download_rawdata.sh", fill=TRUE, append=FALSE)
15 | cat("module load samtools", file="download_rawdata.sh", fill=TRUE, append=TRUE)
16 | Map(name=nfyadata$name, path=nfyadata$path, function(name, path) {
17 |   cat("samtools bam2fq ", path, " | /fsimb/common/tools/pigz/2.4/pigz -p8 > ./rawdata/", name, ".fastq.gz\n",
18 |       file="download_rawdata.sh", sep="", append=TRUE)
19 | })
20 | ```
21 | 
22 | 


--------------------------------------------------------------------------------
/testdata/ChIPseq/contrasts_diffbind.txt:
--------------------------------------------------------------------------------
1 | contrast.name	contrast	mmatrix	sub_experiment
2 | NF_YA_ESCvsNF_YA_TN	(NF_YA_ESC-NF_YA_TN)	~group	1
3 | 


--------------------------------------------------------------------------------
/testdata/ChIPseq/targets.txt:
--------------------------------------------------------------------------------
1 | IP	IPname	INPUT	INPUTname	group	Replicate	PeakCaller
2 | NF_YA_ESC_1	NF_YA_ESC_1	Input	Input	NF_YA_ESC	1	macs
3 | NF_YA_ESC_2	NF_YA_ESC_2	Input	Input	NF_YA_ESC	2	macs
4 | NF_YA_TN_1	NF_YA_TN_1	Input	Input	NF_YA_TN	1	macs
5 | NF_YA_TN_2	NF_YA_TN_2	Input	Input	NF_YA_TN	2	macs
6 | 


--------------------------------------------------------------------------------
/testdata/DNASeq/README.md:
--------------------------------------------------------------------------------
 1 | Run this bash code to generate 5M 101bp of PE reads for a reference (eg. Yeast), 2 replicates x 2 conditions.
 2 | 
 3 | ```sh
 4 | #!/bin/bash
 5 | # 
 6 | set -euo pipefail
 7 | 
 8 | export READ_LEN=101
 9 | export FRAG_LEN=500
10 | export NUM_READS=5000000
11 | export ERR_RATE=0.001
12 | export MUT_RATE=0.0001
13 | export INDEL_RATE=0.15
14 | export INDEL_EXTEND_RATE=0.3
15 | 
16 | export REF="/fsimb/groups/imb-bioinfocf/common-data/GATK_resources/gatk_bundle_hg38_v0/Homo_sapiens_assembly38.fasta"
17 | export RAWDATA="./rawdata"
18 | 
19 | CORES=4
20 | 
21 | function f {
22 |   REF=$1
23 |   BASE=$2
24 |   REPL=${RAWDATA}/${BASE}_$3
25 |   SEED=$3
26 | 
27 |   echo "replicate $REPL using ref $REF"
28 |   wgsim \
29 |     -1${READ_LEN} \
30 |     -2${READ_LEN} \
31 |     -d${FRAG_LEN} \
32 |     -N${NUM_READS} \
33 |     -e${ERR_RATE} \
34 |     -r${MUT_RATE} \
35 |     -R${INDEL_RATE} \
36 |     -X${INDEL_EXTEND_RATE} \
37 |     -S${SEED} \
38 |     ${REF} ${REPL}.R1.fastq ${REPL}.R2.fastq | gzip > ${REPL}_sim.txt.gz
39 | 
40 |   gzip ${REPL}.R1.fastq ${REPL}.R2.fastq
41 | }
42 | export -f f
43 | 
44 | # Generate 2 replicates for 2 conditions. Both from the same reference, thus they'll have no differences.
45 | # Use a modified reference for treated, if you wanna introduce changes
46 | parallel --xapply -j $CORES "f {1} {2} {3}" ::: "$REF" ::: control treated ::: 1 1 2 2
47 | ```
48 | 


--------------------------------------------------------------------------------
/testdata/RNAseq/README.Rmd:
--------------------------------------------------------------------------------
 1 | Obtained from the `yeastRNASeq` Bioconductor [package](https://bioconductor.org/packages/release/data/experiment/html/yeastRNASeq.html)
 2 | The subset of the data which this package contains is more specifically
 3 | data from a wild-type and a single mutant yeast. For each condition (mutant,
 4 | wild-type) there is two lanes worth of data, each lane containing a sample of
 5 | 500,000 raw (unaligned) reads from each of 2 lanes each. 
 6 | 
 7 | ```{r}
 8 | if(!require(yeastRNASeq)) {
 9 |   BiocManager::install("yeastRNASeq")
10 |   require(yeastRNASeq)
11 | }
12 | 
13 | files <-  list.files(file.path(system.file(package = "yeastRNASeq"), "reads"), full.names=TRUE)
14 | files.fastq.gz <- files[grepl("\\.fastq\\.gz$", files)]
15 | dir.create("./rawdata", showWarnings=FALSE)
16 | sapply(files.fastq.gz, file.copy, to="./rawdata/")
17 | ```
18 | 
19 | In order to execute the pipeline on test data, you may follow the steps described [here](https://gitlab.rlp.net/imbforge/NGSpipe2go). However, you would have to create a symlink for the files located in the testdata folder like
20 | 
21 |   ln -s NGSpipe2go/testdata/RNAseq/* .
22 |   ln -s NGSpipe2go/pipelines/RNAseq/rnaseq.pipeline.groovy .
23 | 
24 | Please ensure to make the following modifications, in order for the test run to work:
25 | 
26 |  - Change the _ESSENTIAL_PROJECT_ variable in the _essential.vars.groovy_ file
27 |  - Adjust the _rnaseq.pipeline.groovy_ file to reflect the location of the correct _essential.vars.groovy_ file
28 |  - Adjust the _target_ variable in the _shinyreports.header_ file located at NGSpipe2go/modules/RNAseq/ folder
29 | 
30 | Now you should be able to run the pipeline successfully.
31 | 


--------------------------------------------------------------------------------
/testdata/RNAseq/contrasts.txt:
--------------------------------------------------------------------------------
1 | contrast.name	contrast	mmatrix
2 | mut.vs.wt	(mut-wt)	~group
3 | 


--------------------------------------------------------------------------------
/testdata/RNAseq/targets.txt:
--------------------------------------------------------------------------------
1 | sample	file	group	replicate
2 | mut_1	mut_1.readcounts.tsv	mut	1
3 | mut_2	mut_2.readcounts.tsv	mut	2
4 | wt_1	wt_1.readcounts.tsv	wt	1
5 | wt_2	wt_2.readcounts.tsv	wt	2
6 | 


--------------------------------------------------------------------------------
/testdata/RNAseqVariantCalling/essential.vars.groovy:
--------------------------------------------------------------------------------
 1 | //Pipeline GATK RNA-seq variant calling
 2 | ESSENTIAL_PROJECT="/tmp/ngspipe2go_rnaseqvariantcalling_test"
 3 | ESSENTIAL_STAR_REF="/tmp/ngspipe2go_rnaseqvariantcalling_test/ref/"
 4 | ESSENTIAL_GENOME_REF="/tmp/ngspipe2go_rnaseqvariantcalling_test/ref/ref.fa"
 5 | ESSENTIAL_VCF_REF="/tmp/ngspipe2go_rnaseqvariantcalling_test/knowVariants.vcf"
 6 | ESSENTIAL_READLENGTH=101
 7 | ESSENTIAL_THREADS=4
 8 | 
 9 | //global vars that will be reused in some global vars
10 | PROJECT=ESSENTIAL_PROJECT
11 | LOGS=PROJECT + "/logs"
12 | MAPPED=PROJECT + "/mapped"
13 | QC=PROJECT + "data/qc"
14 | REPORTS=PROJECT + "/reports"
15 | RESULTS=PROJECT + "/results"
16 | TMP=PROJECT + "/tmp"
17 | TRACKS=MAPPED + "/tracks"
18 | 
19 | // optional pipeline stages to include
20 | 


--------------------------------------------------------------------------------
/testdata/RNAseqVariantCalling/knowVariants.vcf.idx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/imbforge/NGSpipe2go/46d835b6e80c92296a9338addb2215f6b26ed487/testdata/RNAseqVariantCalling/knowVariants.vcf.idx


--------------------------------------------------------------------------------
/testdata/smallRNAseq_BCF_miRNA/miRNA.essential.vars.groovy:
--------------------------------------------------------------------------------
 1 | //Pipeline generated with command line: ./imb-pip.pl --tasks-pip=1 --force
 2 | //By: ssayolsp At: Fr 17 Okt 2014 17:12:41 CEST
 3 | //
 4 | // REMEMBER TO CHANGE THESE ESSENTIAL VARS!!
 5 | //
 6 | ESSENTIAL_PROJECT="./"
 7 | ESSENTIAL_BOWTIE_REF="./ref/mmu"
 8 | ESSENTIAL_GENOME_REF="./ref/mmu.fa"
 9 | 
10 | ESSENTIAL_GENESGTF="./ref/mmu.gtf"
11 | ESSENTIAL_RRNA_BOWTIE_REF="./ref/rrna"
12 | 
13 | ESSENTIAL_SPECIES="Mouse"       // necessary for miRDeep2, used to refer to UCSC
14 | ESSENTIAL_SAMPLE_PREFIX=""
15 | ESSENTIAL_FEATURETYPE="gene_biotype" //gencode uses gene_type; ensemble uses gene_biotype
16 | ESSENTIAL_PAIRED="no"           // paired end design
17 | ESSENTIAL_STRANDED="yes"        // strandness: no|yes|reverse
18 | ESSENTIAL_THREADS=4             // number of threads for parallel tasks
19 | 
20 | ESSENTIAL_READLENGTH=51         // actual read length in original raw data (incl. insert, UMIs, adapter)
21 | ESSENTIAL_MINADAPTEROVERLAP=5   // minimal overlap with adapter
22 | ESSENTIAL_MINREADLENGTH=15      // remaining read length plus UMIs (2x4) 
23 | ESSENTIAL_UMI_LENGTH=4          // (2x4bp)
24 | ESSENTIAL_ADAPTER_SEQUENCE="TGGAATTCTCGGGTGCCAAGG" // needed for cutadapt adapter trimming
25 | 
26 | // vars for mirDeep2
27 | ESSENTIAL_MATURE_MIRNA="./ref/mature.fa"
28 | ESSENTIAL_HAIRPIN_MIRNA="./ref/hairpin.fa"
29 | 
30 | //global vars that will be reused in some global vars
31 | PROJECT=ESSENTIAL_PROJECT
32 | LOGS=PROJECT + "/logs"
33 | QC=PROJECT + "/qc"
34 | REPORTS=PROJECT + "/reports"
35 | RESULTS=PROJECT + "/results"
36 | PROCESSED=PROJECT + "/rawdata_processed"
37 | MAPPED=PROJECT + "/mapped"
38 | TMP=PROJECT + "/tmp"
39 | TRACKS=PROJECT + "/tracks"
40 | 
41 | // optional pipeline stages to include
42 | 
43 | 


--------------------------------------------------------------------------------
/testdata/smallRNAseq_BCF_smallrnaseq/README.md:
--------------------------------------------------------------------------------
 1 | Get mouse microRNA experiment from [GSE57138](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE57138).
 2 | Groenendyk J, Fan X, Peng Z, Ilnytskyy Y et al. Genome-wide analysis of thapsigargin-induced microRNAs and their targets in NIH3T3 cells. Genom Data 2014 Dec;2:325-7. PMID: 26484121
 3 | 
 4 | ```bash
 5 | ml sratoolkit bowtie
 6 | 
 7 | # get rawdata
 8 | mkdir rawdata
 9 | parallel --xapply -j4 "fastq-dump --stdout {1} | gzip > rawdata/{2}.fastq.gz" ::: SRR1269676 SRR1269677 SRR1269678 SRR1269679 ::: control1 control2 thapsigargin1 thapsigargin2
10 | 
11 | # get mouse reference & annotation
12 | mkdir ref
13 | wget -qO- ftp://ftp.ensembl.org/pub/release-98/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna.chromosome.19.fa.gz | gzip -cd > ref/mmu_chr19.fa
14 | bowtie-build --threads 8 ref/mmu_chr19.fa ref/mmu_chr19
15 | wget -qO- ftp://ftp.ensembl.org/pub/release-98/gtf/mus_musculus/Mus_musculus.GRCm38.98.gtf.gz | gzip -cd | grep "^19"> ref/mmu_chr19.gtf
16 | 
17 | # rrna reference
18 | Rscript -e 'biomaRt::exportFASTA(biomaRt::getBM(filters="biotype", values="rRNA", attributes=c("gene_exon_intron", "ensembl_gene_id"), mart=biomaRt::useEnsembl("ensembl", dataset="mmusculus_gene_ensembl")), "ref/rrna.fa")'
19 | bowtie-build --threads 8 ref/rrna.fa ref/rrna
20 | ```
21 | 


--------------------------------------------------------------------------------
/testdata/smallRNAseq_BCF_smallrnaseq/smallrnaseq.essential.vars.groovy:
--------------------------------------------------------------------------------
 1 | //Pipeline generated with command line: ./imb-pip.pl --tasks-pip=1 --force
 2 | //By: ssayolsp At: Fr 17 Okt 2014 17:12:41 CEST
 3 | //
 4 | // REMEMBER TO CHANGE THESE ESSENTIAL VARS!!
 5 | //
 6 | ESSENTIAL_PROJECT="./"
 7 | ESSENTIAL_BOWTIE_REF="./ref/mmu_chr19"
 8 | ESSENTIAL_GENOME_REF="./ref/mmu_chr19.fa"
 9 | 
10 | ESSENTIAL_GENESGTF="./ref/mmu_chr19.gtf"
11 | ESSENTIAL_RRNA_BOWTIE_REF="./ref/rrna"
12 | 
13 | ESSENTIAL_SPECIES="Mouse"   // necessary for miRDeep2 and fastqscreen
14 | ESSENTIAL_SAMPLE_PREFIX=""
15 | ESSENTIAL_FEATURETYPE="gene_biotype" //gencode uses gene_type; ensemble uses gene_biotype
16 | ESSENTIAL_PAIRED="no"             // paired end design
17 | ESSENTIAL_STRANDED="yes"          // strandness: no|yes|reverse
18 | ESSENTIAL_THREADS=4               // number of threads for parallel tasks
19 | 
20 | ESSENTIAL_READLENGTH=51      // actual read length in original raw data (incl. insert, UMIs, adapter)
21 | ESSENTIAL_MINADAPTEROVERLAP=5 // minimal overlap with adapter
22 | ESSENTIAL_MINREADLENGTH=26   // remaining read length plus UMIs (2x4) 
23 | ESSENTIAL_UMI_LENGTH=8       // (2x4bp)
24 | ESSENTIAL_ADAPTER_SEQUENCE="TGGAATTCTCGGGTGCCAAGG" // needed for cutadapt adapter trimming
25 | 
26 | //global vars that will be reused in some global vars
27 | PROJECT=ESSENTIAL_PROJECT
28 | LOGS=PROJECT + "/logs"
29 | QC=PROJECT + "/qc"
30 | REPORTS=PROJECT + "/reports"
31 | RESULTS=PROJECT + "/results"
32 | PROCESSED=PROJECT + "/rawdata_processed"
33 | MAPPED=PROJECT + "/mapped"
34 | TMP=PROJECT + "/tmp"
35 | TRACKS=PROJECT + "/tracks"
36 | 
37 | // optional pipeline stages to include
38 | 
39 | 


--------------------------------------------------------------------------------
/testdata/tenx_ATAC/README.md:
--------------------------------------------------------------------------------
1 | Use the test data from 10X Genomics `cellranger-atac` program.
2 | 
3 | The tiny reference and sample FASTQ data can be found in the `external/atac_testrun_ref` and `external/cellranger_atac_tiny_fastq` subfolders of the installation folder (as of v2.0.0).
4 | 
5 | The sample FASTQs should be placed in the `rawdata` subfolder of the test project folder, and the `essential.vars.groovy` file edited to point to the relevant reference and raw data folders.
6 | 
7 | Note that the genes GTF file is compressed with cellranger-atac (`genes/genes.gtf.gz`). This is not important as no NGSpipe2go pipeline modules use it in this pipeline.
8 | 
9 | 


--------------------------------------------------------------------------------
/testdata/tenx_GEX/README.md:
--------------------------------------------------------------------------------
1 | Use the test data from 10X Genomics `cellranger` program.
2 | 
3 | The tiny reference and sample FASTQ data can be found in the `external/cellranger_tiny_ref` and `external/cellranger_tiny_fastq` subfolders of the installation folder (as of v6.0.0).
4 | 
5 | The sample FASTQs should be placed in the `rawdata` subfolder of the test project folder, and the `essential.vars.groovy` file edited to point to the relevant reference and raw data folders.
6 | 
7 | Note that for the test data associated with cellranger (v6.0.0), the `ESSENTIAL_FEATURETYPE` variable should be set to the (ENSEMBL-associated) `gene_biotype`, not the (GENCODE-associated) `gene_type`. The full human and mouse 10X datasets, as well as the test datasets for cellranger-atac and cellranger-arc, use the GENCODE-style `gene_type`.
8 | 
9 | 


--------------------------------------------------------------------------------
/testdata/tenx_multiome/README.md:
--------------------------------------------------------------------------------
1 | Use the test data from 10X Genomics `cellranger-arc` program.
2 | 
3 | The tiny reference and sample FASTQ data can be found in the `external/arc_testrun_files` subfolder of the installation folder (as of v2.0.0).
4 | 
5 | The sample FASTQs should be placed in the `rawdata` subfolder of the test project folder, and the `essential.vars.groovy` file edited to point to the relevant reference and raw data folders.
6 | 
7 | Note that the genes GTF file is compressed with cellranger-arc (`genes/genes.gtf.gz`). The `qualimap` tool requires uncompressed GTF files, so an uncompressed version needs to be available for it, and this is the one that should be set in the `essential.vars.groovy` file. The `geneBodyCov2` tool also uses the GTF file, but it can read the compressed version as well.
8 | 
9 | 


--------------------------------------------------------------------------------
/tools/breaktag/umi_filtering.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import sys
 4 | import csv
 5 | import numpy as np
 6 | 
 7 | filename = sys.argv[1]
 8 | 
 9 | with open(filename, 'r') as f:
10 |     reader = csv.reader(f)
11 |     data = list(reader)
12 | 
13 | # GROUP TOGETHER CONSECUTIVE IDENTICAL UMIS, ASSUMING THEIR PROXIMITY
14 | row_old = data[0]
15 | data_aggegated_by_umi_identity = [row_old]
16 | for row in data[1:]:
17 |     if (row_old[0]==row[0] and row_old[3]==row[3] and row_old[4]==row[4]):
18 |         if int(row[5]) >= int(row_old[5]):
19 |             row[5] = str(int(row[5])+int(row_old[5]))
20 |         else:
21 |             row[5] = str(int(row[5])+int(row_old[5]))
22 |             row[1:3] = row_old[1:3]
23 |         del data_aggegated_by_umi_identity[-1]
24 |     data_aggegated_by_umi_identity.append(row)
25 |     row_old = row
26 | 
27 | # GROUP TOGETHER CLOSE SPATIAL CONSECUTIVE READS WHOSE UMI DIFFERS AT MOST BY 2 MISMATCHES
28 | row_old = data_aggegated_by_umi_identity[0]
29 | data_aggegated_by_umi_similarity = [row_old]
30 | space_gap = 30 
31 | mm_gap = 2
32 | for row in data_aggegated_by_umi_identity[1:]:
33 |     s1 = row_old[4]
34 |     s2 = row[4]
35 |     numb_mismatches = sum(c1!=c2 for c1,c2 in zip(s1,s2))
36 |     dist = abs(int(row[1])-int(row_old[1]))
37 |     if (row_old[0]==row[0] and dist<=space_gap and row_old[3]==row[3] and numb_mismatches<=mm_gap):
38 |         if int(row[5]) >= int(row_old[5]):
39 |             row[5] = str(int(row[5])+int(row_old[5]))
40 |         else:
41 |             row[5] = str(int(row[5])+int(row_old[5]))
42 |             row[1:3] = row_old[1:3]
43 |             row[4] = row_old[4]
44 |         del data_aggegated_by_umi_similarity[-1]
45 |     data_aggegated_by_umi_similarity.append(row)
46 |     row_old = row
47 | 
48 | for item in data_aggegated_by_umi_similarity:
49 |   print('\t'.join(item))
50 | 


--------------------------------------------------------------------------------
/tools/collectBpipeLogs/collectBpipeLogs.sh:
--------------------------------------------------------------------------------
 1 | PROJECT=$1
 2 | LOGS=$2
 3 | 
 4 | # iterate through .bpipe outputs to pick up the run IDs of all finished tasks
 5 | # copy all log files of the finished tasks out of the commandtmp folder
 6 | for f in ${PROJECT}/.bpipe/outputs/*; 
 7 | do 
 8 |  echo $f
 9 |  F=$(basename $f)
10 |  #echo "FILEname " $F
11 |  JOB=$(echo $F | cut -d. -f1)
12 |  #echo "JOB " ${JOB}
13 |  ID=$(grep -E "^commandId" $f | cut -d= -f2)
14 |  #echo "ID " $ID
15 |  FILE=$(grep -E "^outputFile" $f|cut -d= -f2) 
16 |  FILE=$(basename ${FILE})                       
17 |  echo "JOB: ${JOB}, ID: ${ID}, FILE: ${FILE}, DIR: ${PROJECT}"
18 |  if [ ! -d "${LOGS}/${JOB}" ]; 
19 |  then
20 |   echo "mkdir ${LOGS}/${JOB}"
21 |   mkdir -p ${LOGS}/${JOB}
22 |  fi
23 | 
24 |  if [ -e ${PROJECT}/.bpipe/commandtmp/${ID}/${ID}.err ]; 
25 |  then
26 |   echo "${PROJECT}/.bpipe/commandtmp/${ID}/${ID}.err --> ${LOGS}/${JOB}/${FILE}.log"
27 |   cp -v ${PROJECT}/.bpipe/commandtmp/${ID}/${ID}.err ${LOGS}/${JOB}/${FILE}.log
28 |   # cp -v ${PROJECT}/.bpipe/commandtmp/${ID}/${ID}.out ${LOGS}/${JOB}/${FILE}.out
29 |  fi
30 | done 
31 | 


--------------------------------------------------------------------------------
/tools/dedup/remove_duplicates_with_stats.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | input=${1}
4 | output=${input/.highQ.fastq.gz/.deduped_barcoded.fastq.gz}
5 | highQ=${input%.highQ.fastq.gz}".highQ"
6 | unique=${input%.highQ.fastq.gz}".unique"
7 | 
8 | zcat ${input} | paste -d, - - - - | tee >(awk -v var="$highQ" 'END {print NR,var}' >> dedup.stats.txt) | sort -u -t, -k2,2 | tee >(awk -v var="$uniq" 'END {print NR,var}' >> dedup.stats.txt) | tr ',' '\\n' | gzip > ${output}
9 | 


--------------------------------------------------------------------------------
/tools/reports/shiny_smallrnaseq_reporting_tool/README:
--------------------------------------------------------------------------------
1 | 
2 | The analysis of mature miRNA needs a separate report file, since for mature miRNAs
3 | a different gtf (gff3) file is used, which requires another treatment than the
4 | other analyses.
5 | 
6 | 


--------------------------------------------------------------------------------