├── LICENSE ├── README.md ├── config ├── bpipe.config.groovy ├── preambles.groovy ├── tools.groovy └── validate_module_params.groovy ├── modules ├── ChIPseq │ ├── GREAT.groovy │ ├── GREAT.header │ ├── blacklist_filter.groovy │ ├── blacklist_filter.header │ ├── bowtie1.groovy │ ├── bowtie1.header │ ├── bowtie2.groovy │ ├── bowtie2.header │ ├── diffbind3.groovy │ ├── diffbind3.header │ ├── filbowtie2unique.groovy │ ├── filbowtie2unique.header │ ├── ipstrength.groovy │ ├── ipstrength.header │ ├── macs2.groovy │ ├── macs2.header │ ├── make_greylist.groovy │ ├── make_greylist.header │ ├── normbigwig.groovy │ ├── normbigwig.header │ ├── pbc.groovy │ ├── pbc.header │ ├── peak_annotation.groovy │ ├── peak_annotation.header │ ├── phantompeak.groovy │ ├── phantompeak.header │ ├── shinyreports.groovy │ ├── shinyreports.header │ ├── upsetPlot.groovy │ └── upsetPlot.header ├── DNAseq │ ├── bwa.groovy │ ├── bwa.header │ ├── collectVariantCallingMetrics.groovy │ ├── collectVariantCallingMetrics.header │ ├── gatherBQSRReports.groovy │ ├── gatherBQSRReports.header │ ├── genomicsDBImport.groovy │ ├── genomicsDBImport.header │ ├── genotypeGVCFs.groovy │ ├── genotypeGVCFs.header │ ├── realignment.groovy │ ├── realignment.header │ ├── recalibration.groovy │ ├── recalibration.header │ ├── shinyreports.groovy │ ├── shinyreports.header │ ├── snpEff.groovy │ ├── snpEff.header │ ├── validateVariants.groovy │ ├── validateVariants.header │ ├── variantFiltration.groovy │ ├── variantFiltration.header │ ├── variant_score_recalibration.groovy │ ├── variant_score_recalibration.header │ ├── variantcallHC.groovy │ ├── variantcallHC.header │ ├── variantcallUG.groovy │ ├── variantcallUG.header │ ├── varianteval.groovy │ ├── varianteval.header │ ├── variantfuseHC.groovy │ └── variantfuseHC.header ├── NGS │ ├── README.md │ ├── bam2bw.groovy │ ├── bam2bw.header │ ├── bamcoverage.groovy │ ├── bamcoverage.header │ ├── bamindexer.groovy │ ├── bamindexer.header │ ├── bamqc.groovy │ ├── bamqc.header │ ├── cutadapt.groovy │ ├── cutadapt.header │ ├── downsamplebam.groovy │ ├── downsamplebam.header │ ├── downsamplefastqPE.groovy │ ├── downsamplefastqPE.header │ ├── downsamplefastqSE.groovy │ ├── downsamplefastqSE.header │ ├── extend.groovy │ ├── extend.header │ ├── fastqc.groovy │ ├── fastqc.header │ ├── fastqscreen.groovy │ ├── fastqscreen.header │ ├── filterchromosomes.groovy │ ├── filterchromosomes.header │ ├── insertsize.groovy │ ├── insertsize.header │ ├── markdups.groovy │ ├── markdups.header │ ├── markdups2.groovy │ ├── markdups2.header │ ├── mergebam.groovy │ ├── mergebam.header │ ├── multiqc.groovy │ ├── multiqc.header │ ├── rmdups.groovy │ ├── rmdups.header │ ├── samtoolscov.groovy │ ├── samtoolscov.header │ ├── strandSpecificBW.groovy │ ├── strandSpecificBW.header │ ├── trackhub.groovy │ ├── trackhub.header │ ├── trackhub_config.groovy │ └── trackhub_config.header ├── RNAseq │ ├── GO_Enrichment.groovy │ ├── GO_Enrichment.header │ ├── deseq2.groovy │ ├── deseq2.header │ ├── deseq2_mm.groovy │ ├── deseq2_mm.header │ ├── dupradar.groovy │ ├── dupradar.header │ ├── filter2htseq.groovy │ ├── filter2htseq.header │ ├── genebodycov.groovy │ ├── genebodycov.header │ ├── genebodycov2.groovy │ ├── genebodycov2.header │ ├── inferexperiment.groovy │ ├── inferexperiment.header │ ├── prermats.groovy │ ├── prermats.header │ ├── qualimap.groovy │ ├── qualimap.header │ ├── rmats.groovy │ ├── rmats.header │ ├── rnaseqc.groovy │ ├── rnaseqc.header │ ├── shinyreports.groovy │ ├── shinyreports.header │ ├── star.groovy │ ├── star.header │ ├── starfusion.groovy │ ├── starfusion.header │ ├── stringtie.groovy │ ├── stringtie.header │ ├── subread.groovy │ ├── subread.header │ ├── subread2rnatypes.groovy │ ├── subread2rnatypes.header │ ├── tpm.groovy │ └── tpm.header ├── RNAseqVariantCalling │ ├── add_read_group.groovy │ ├── add_read_group.header │ ├── base_recalibration.groovy │ ├── base_recalibration.header │ ├── create_star_index_sjdb.groovy │ ├── create_star_index_sjdb.header │ ├── mark_dups.groovy │ ├── mark_dups.header │ ├── merge_SJ_tab.groovy │ ├── merge_SJ_tab.header │ ├── splitNcigar.groovy │ ├── splitNcigar.header │ ├── star1pass.groovy │ ├── star1pass.header │ ├── star2pass.groovy │ ├── star2pass.header │ ├── variantCall_HC.groovy │ ├── variantCall_HC.header │ ├── variant_filtration.groovy │ └── variant_filtration.header ├── breaktag │ ├── bwa.groovy │ ├── bwa.header │ ├── collect_stats.groovy │ ├── collect_stats.header │ ├── count_breaks.groovy │ ├── count_breaks.header │ ├── count_breaks_strandless.groovy │ ├── count_breaks_strandless.header │ ├── pattern_filtering.groovy │ └── pattern_filtering.header ├── miscellaneous │ ├── collect_tool_versions.groovy │ └── collect_tool_versions.header ├── scRNAseq │ ├── CRmotifCounts.groovy │ ├── CRmotifCounts.header │ ├── CTannoMarker.groovy │ ├── CTannoMarker.header │ ├── CTannoSeurat.groovy │ ├── CTannoSeurat.header │ ├── DNAaccess.groovy │ ├── DNAaccess.header │ ├── SCTransform.groovy │ ├── SCTransform.header │ ├── addumibarcodetofastq.groovy │ ├── addumibarcodetofastq.header │ ├── assignSouporcellCluster.groovy │ ├── assignSouporcellCluster.header │ ├── cellranger_aggr.groovy │ ├── cellranger_aggr.header │ ├── cellranger_count.groovy │ ├── cellranger_count.header │ ├── cellrangerarc_aggr.groovy │ ├── cellrangerarc_aggr.header │ ├── cellrangerarc_count.groovy │ ├── cellrangerarc_count.header │ ├── cellrangeratac_aggr.groovy │ ├── cellrangeratac_aggr.header │ ├── cellrangeratac_count.groovy │ ├── cellrangeratac_count.header │ ├── demux_gt.groovy │ ├── demux_gt.header │ ├── demux_hto.groovy │ ├── demux_hto.header │ ├── diffExprSeurat.groovy │ ├── diffExprSeurat.header │ ├── diffPeaks.groovy │ ├── diffPeaks.header │ ├── grn.groovy │ ├── grn.header │ ├── motifActivity.groovy │ ├── motifActivity.header │ ├── motifEnrich.groovy │ ├── motifEnrich.header │ ├── motifFootprinting.groovy │ ├── motifFootprinting.header │ ├── peaks2genes.groovy │ ├── peaks2genes.header │ ├── sc_filter.groovy │ ├── sc_filter.header │ ├── sc_integrateATAC.groovy │ ├── sc_integrateATAC.header │ ├── sc_integrateRNA.groovy │ ├── sc_integrateRNA.header │ ├── sc_qc.groovy │ ├── sc_qc.header │ ├── sc_readAggrData.groovy │ ├── sc_readAggrData.header │ ├── sc_readIndivSamplesAndMerge.groovy │ ├── sc_readIndivSamplesAndMerge.header │ ├── shinyreports.groovy │ ├── shinyreports.header │ ├── splitpipe_all.groovy │ ├── splitpipe_all.header │ ├── splitpipe_comb.groovy │ ├── splitpipe_comb.header │ ├── subread.groovy │ ├── subread.header │ ├── umicount.groovy │ ├── umicount.header │ ├── umidedup.groovy │ ├── umidedup.header │ ├── wnn.groovy │ └── wnn.header └── smallRNAseq │ ├── bowtie1.groovy │ ├── bowtie1.header │ ├── dedup.groovy │ ├── dedup.header │ ├── deseq2.groovy │ ├── deseq2.header │ ├── deseq2_mirnamature.groovy │ ├── deseq2_mirnamature.header │ ├── fastq_quality_filter.groovy │ ├── fastq_quality_filter.header │ ├── filter2htseq.groovy │ ├── filter2htseq.header │ ├── filter_smallrna_counts.groovy │ ├── filter_smallrna_counts.header │ ├── mirDeep2.groovy │ ├── mirDeep2.header │ ├── mirDeep2_mapper.groovy │ ├── mirDeep2_mapper.header │ ├── shinyreports.groovy │ ├── shinyreports.header │ ├── subread.groovy │ ├── subread.header │ ├── subread2rnatypes.header │ ├── subread_mirnamature.groovy │ ├── subread_mirnamature.header │ ├── trim_umis.groovy │ └── trim_umis.header ├── pipelines ├── ChIPseq │ ├── README.md │ ├── chipseq.pipeline.groovy │ ├── contrasts_diffbind.txt │ ├── essential.vars.groovy │ ├── targets.txt │ └── tools.groovy ├── DNAseq │ ├── README.md │ ├── dnaseq.pipeline.groovy │ ├── essential.vars.groovy │ ├── targets.txt │ └── tools.groovy ├── RNAseq │ ├── README.md │ ├── contrasts.txt │ ├── essential.vars.groovy │ ├── rnaseq.pipeline.groovy │ ├── targets.txt │ └── tools.groovy ├── RNAseqVariantCalling │ ├── README.md │ ├── essential.vars.groovy │ ├── rnaseq_variant_calling.pipeline.groovy │ └── tools.groovy ├── breaktag │ ├── README.md │ ├── breaktag.pipeline.groovy │ ├── essential.vars.groovy │ ├── targets.txt │ └── tools.groovy ├── scRNAseq │ ├── README.md │ ├── combiBarcoding.pipeline.groovy │ ├── contrasts.txt │ ├── essential.vars.groovy │ ├── marsseq.pipeline.groovy │ ├── smartseq.pipeline.groovy │ ├── targets.txt │ ├── tenX.pipeline.groovy │ ├── tenXatac.pipeline.groovy │ ├── tenXmultiome.pipeline.groovy │ └── tools.groovy ├── smallRNAseq │ ├── README.md │ ├── contrasts.txt │ ├── smallrnaseq.essential.vars.groovy │ ├── smallrnaseq.pipeline.groovy │ ├── targets.txt │ └── tools.groovy └── test │ ├── README.md │ └── test.pipeline.groovy ├── resources ├── IMB_logo.png ├── MARS-Seq_protocol_Step-by-Step_MML.pdf ├── NGSpipe2go_scheme.old.png └── NGSpipe2go_scheme.png ├── testdata ├── ChIPseq │ ├── README.Rmd │ ├── contrasts_diffbind.txt │ ├── essential.vars.groovy │ └── targets.txt ├── DNASeq │ ├── README.md │ └── essential.vars.groovy ├── RNAseq │ ├── README.Rmd │ ├── contrasts.txt │ ├── essential.vars.groovy │ └── targets.txt ├── RNAseqVariantCalling │ ├── README.md │ ├── essential.vars.groovy │ ├── knowVariants.vcf │ └── knowVariants.vcf.idx ├── scRNAseq_smartseq2 │ ├── README.md │ ├── essential.vars.groovy │ └── targets.txt ├── smallRNAseq_BCF_miRNA │ ├── README.md │ └── miRNA.essential.vars.groovy ├── smallRNAseq_BCF_smallrnaseq │ ├── README.md │ └── smallrnaseq.essential.vars.groovy ├── tenx_ATAC │ ├── README.md │ └── essential.vars.groovy ├── tenx_GEX │ ├── README.md │ └── essential.vars.groovy └── tenx_multiome │ ├── README.md │ └── essential.vars.groovy └── tools ├── BlackList_Filter ├── BlackList_Filter.R └── make_greylist.R ├── CTanno ├── CTannoMarker.R ├── CTannoSeurat.R └── renv.lock ├── DE_DESeq2 ├── DE_DESeq2.R └── DE_DESeq2_miRNAmature.R ├── DE_edgeR ├── DE_edgeR.R └── DE_edgeR.mmatrix.R ├── ENCODEqc ├── IPstrength.R ├── PBC.R └── phantompeak.R ├── GO_Enrichment ├── GO_Enrichment.R └── GREAT.R ├── Peak_Annotation └── Peak_Annotation.R ├── TPMs └── TPMs.R ├── breaktag ├── pattern_filtering.pl └── umi_filtering.py ├── collectBpipeLogs └── collectBpipeLogs.sh ├── dedup ├── PCRDuplicatesPlot.R └── remove_duplicates_with_stats.sh ├── demux └── demux_hto.R ├── diffbind └── diffbind3.R ├── dupRadar └── dupRadar.R ├── geneBodyCov └── geneBodyCov.R ├── mapping_stats └── mapping_stats_bowtie1.R ├── maser ├── createMaserPlots.R ├── plotTranscriptsMod.R └── volcanoMod.R ├── piRNA ├── piRNABaseTerminalBases.py ├── piRNABaseTerminalBasesPlot.R ├── piRNA_quantification_summarization.R ├── ping-pong_signature.py └── plotPP.R ├── reports ├── shiny_chipseq_reporting_tool │ ├── ChIP.shinyrep.helpers.R │ ├── ChIPreport.Rmd │ └── styles.css ├── shiny_dnaseq_reporting_tool │ ├── styles.css │ ├── variant.shinyrep.helpers.R │ └── variantreport.Rmd ├── shiny_rnaseq_reporting_tool │ ├── DE.shinyrep.helpers.R │ ├── DEreport.Rmd │ └── styles.css ├── shiny_scrnaseq_reporting_tool │ ├── Read_individual_10X_sample_data.Rmd │ ├── app.R │ ├── sc.report.Rmd │ ├── sc.shinyrep.helpers.R │ ├── scatac.report.Rmd │ ├── scmultiome.report.Rmd │ ├── server.R │ ├── styles.css │ └── ui.R └── shiny_smallrnaseq_reporting_tool │ ├── README │ ├── smallRNA.shinyrep.helpers.R │ ├── smallRNAreport.Rmd │ ├── smallRNAreport.miRNAmature.Rmd │ ├── smallRNAreport.type.Rmd │ └── styles.css ├── sc_DNAaccess ├── DNAaccess.R ├── diffPeaks.R └── renv.lock ├── sc_diffExprSeurat ├── diffExprSeurat.R └── renv.lock ├── sc_grn ├── grn.R └── renv.lock ├── sc_integrate ├── sc_integrateATAC.R └── sc_integrateRNA.R ├── sc_motifs ├── CRmotifCounts.R ├── motifActivity.R ├── motifEnrich.R ├── motifFootprinting.R └── renv.lock ├── sc_norm ├── SCTransform.R └── renv.lock ├── sc_peaks2genes ├── peaks2genes.R └── renv.lock ├── sc_qc ├── renv.lock ├── sc_filter_multiome.R └── sc_qc_multiome.R ├── sc_readData ├── renv.lock ├── sc_readAggrData.R └── sc_readIndivSamplesAndMerge.R ├── sc_wnn ├── renv.lock └── wnn.R ├── smallRNA ├── DEG_analysis_DESeq2.Rmd ├── DEG_analysis_DESeq2.helpers.R ├── DEG_analysis_DESeq2.transposons.Rmd ├── DEG_analysis_Transposons_DESeq2.Rmd ├── DEG_analysis_edgeR.R ├── SummarizeLibrariesRPMs.R ├── SummarizeknownTargetsRPMs.R ├── countNonStructuralReads.R ├── countNonStructuralReadsHTseq-count.R ├── filterSmallRNAclasses.py ├── plotReadLength.R ├── plot_sensor_coverage.R ├── sequence_bias_plot.R ├── smallRNA_classes_plot.R └── summarizeNucleotideByReadLenght.py ├── smallRNA_BCF └── extract_smallRNA.R ├── tSNE_exprs └── app.R ├── trackhub ├── Configure_Trackhub.R └── Make_Trackhub.R └── upsetPlot └── upsetPlot.R /config/validate_module_params.groovy: -------------------------------------------------------------------------------- 1 | class Ngspipe2goWrongTypeException extends Exception { 2 | Ngspipe2goWrongTypeException(String message) { 3 | super(message) 4 | } 5 | } 6 | 7 | Boolean validate_schema(Class Params, Map params) { 8 | try { 9 | // validate parameter types against the schema 10 | p = Params.newInstance(params) 11 | params.each{ k, v -> 12 | if(p[k].getClass() != params[k].getClass()) { 13 | String message = "param ${k} is ${params[k].getClass()} instead of ${p[k].getClass()}" 14 | throw new Ngspipe2goWrongTypeException(message) 15 | } 16 | } 17 | // validate presence of mandatory parameters 18 | assert true == !!p 19 | } catch(Ngspipe2goWrongTypeException e) { 20 | throw new RuntimeException("invalid parameter types\n${e}") 21 | } catch(AssertionError e) { 22 | throw new RuntimeException("mandatory arguments missing or invalid") 23 | } catch(Exception e) { 24 | throw new RuntimeException("invalid parameter types\n${e}") 25 | } 26 | return true 27 | } 28 | -------------------------------------------------------------------------------- /modules/ChIPseq/GREAT.groovy: -------------------------------------------------------------------------------- 1 | GREAT = { 2 | doc title: "GREAT", 3 | desc: "Genomic Regions Enrichment Analysis", 4 | constraints: "", 5 | bpipe_version:"", 6 | author:"Giuseppe Petrosino" 7 | 8 | var subdir : "" 9 | output.dir = GREAT_vars.outdir + "/$subdir" 10 | 11 | def GREAT_FLAGS = 12 | (GREAT_vars.files ? " peakData=" + GREAT_vars.files + "/$subdir" : "") + 13 | (GREAT_vars.targets ? " targets=" + GREAT_vars.targets : "") + 14 | (GREAT_vars.outdir ? " out=" + GREAT_vars.outdir + "/$subdir" : "") + 15 | (GREAT_vars.padj ? " padj=" + GREAT_vars.padj : "") + 16 | (GREAT_vars.nterms ? " nterms=" + GREAT_vars.nterms : "") + 17 | (GREAT_vars.db ? " db=" + GREAT_vars.db : "") + 18 | (GREAT_vars.upstream ? " adv_upstream=" + GREAT_vars.upstream : "") + 19 | (GREAT_vars.downstream ? " adv_downstream=" + GREAT_vars.downstream : "") + 20 | (GREAT_vars.extra ? " " + GREAT_vars.extra : "") 21 | 22 | def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"]) 23 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 24 | 25 | produce("GREAT.RData") { 26 | exec """ 27 | ${TOOL_ENV} && 28 | ${PREAMBLE} && 29 | 30 | Rscript ${PIPELINE_ROOT}/tools/GO_Enrichment/GREAT.R $GREAT_FLAGS 31 | ""","GREAT" 32 | } 33 | } 34 | 35 | -------------------------------------------------------------------------------- /modules/ChIPseq/GREAT.header: -------------------------------------------------------------------------------- 1 | GREAT_vars=[ 2 | outdir : RESULTS + "/GREAT_analysis", 3 | files : RESULTS + "/macs2", 4 | targets : "targets.txt", // targets file describing the samples 5 | padj : "0.01", 6 | nterms : "5", 7 | db : ESSENTIAL_DB, 8 | upstream : "5", // 5 kb upstream of the TSS 9 | downstream: "1", // 1 kb downstream of the TSS 10 | extra : "" 11 | ] 12 | 13 | load PIPELINE_ROOT + "/modules/ChIPseq/GREAT.groovy" 14 | 15 | -------------------------------------------------------------------------------- /modules/ChIPseq/blacklist_filter.groovy: -------------------------------------------------------------------------------- 1 | blacklist_filter = { 2 | doc title: "blacklist_filter", 3 | desc: "Remove peaks overlapping blacklisted genomic regions", 4 | constraints: "", 5 | bpipe_version:"", 6 | author:"Giuseppe Petrosino, modified by Frank Ruehle" 7 | 8 | var subdir : "" 9 | var blacklist: blacklist_filter_vars.blacklist 10 | output.dir = blacklist_filter_vars.outdir + "/$subdir" 11 | 12 | println blacklist 13 | 14 | def BLACKLIST_FILTER_FLAGS = 15 | (blacklist_filter_vars.files ? " peakData=" + blacklist_filter_vars.files + "/$subdir" : "") + 16 | (blacklist ? " blacklistRegions=" + blacklist : "") + 17 | (blacklist_filter_vars.outdir ? " out=" + blacklist_filter_vars.outdir + "/$subdir" : "") + 18 | (blacklist_filter_vars.extra ? blacklist_filter_vars.extra : "") 19 | 20 | def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"]) 21 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 22 | 23 | produce("BlackList_Filter.RData") { 24 | exec """ 25 | ${TOOL_ENV} && 26 | ${PREAMBLE} && 27 | 28 | Rscript ${PIPELINE_ROOT}/tools/BlackList_Filter/BlackList_Filter.R $BLACKLIST_FILTER_FLAGS; 29 | ""","blacklist_filter" 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /modules/ChIPseq/blacklist_filter.header: -------------------------------------------------------------------------------- 1 | blacklist_filter_vars=[ 2 | outdir : RESULTS + "/macs2", 3 | blacklist: ESSENTIAL_BLACKLIST, 4 | files : RESULTS + "/macs2", 5 | extra : "" 6 | ] 7 | 8 | load PIPELINE_ROOT + "/modules/ChIPseq/blacklist_filter.groovy" 9 | 10 | -------------------------------------------------------------------------------- /modules/ChIPseq/filbowtie2unique.header: -------------------------------------------------------------------------------- 1 | filbowtie2unique_vars=[ 2 | mapped : MAPPED, 3 | paired : RUN_IN_PAIRED_END_MODE, // run in se or pe mode 4 | samtools_mapq_pe : "10", // MAPQ for paired end data; >=3 should exclude "true multireads", multi mapped reads within the window of insert size 5 | samtools_mapq_se : "10", // MAPQ for single end data; 6 | samtools_threads : Integer.toString(ESSENTIAL_THREADS) 7 | ] 8 | 9 | dupremoval_vars=[ 10 | remove_pcr_dups : ESSENTIAL_DEDUPLICATION // added to remove PCR duplicates 11 | ] 12 | 13 | load PIPELINE_ROOT + "/modules/ChIPseq/filbowtie2unique.groovy" 14 | 15 | -------------------------------------------------------------------------------- /modules/ChIPseq/ipstrength.header: -------------------------------------------------------------------------------- 1 | ipstrength_vars=[ 2 | outdir : QC + "/ipstrength", 3 | targets : "targets.txt", // targets file describing the samples 4 | mapped : MAPPED, // where the bam files are stored 5 | bsgenome: ESSENTIAL_BSGENOME // a bioconductor BSgenome reference available in the R libPath() BSgenome.Hsapiens.UCSC.hg19 6 | ] 7 | 8 | load PIPELINE_ROOT + "/modules/ChIPseq/ipstrength.groovy" 9 | 10 | -------------------------------------------------------------------------------- /modules/ChIPseq/macs2.header: -------------------------------------------------------------------------------- 1 | macs2_vars=[ 2 | outdir : RESULTS + "/macs2", // output dir 3 | targets: "targets.txt", // targets file describing the samples 4 | gsize : ESSENTIAL_MACS2_GSIZE, // the mappable genome size 5 | minlen : ESSENTIAL_MIN_PEAKLENGTH, // MACS2 minimum peak length (default is fragment size). Could be increased if broad option is used. 6 | broad : ESSENTIAL_MACS2_BROAD, // use broad setting for peak calling 7 | mapped : MAPPED, // where the bam files are stored 8 | paired : (ESSENTIAL_PAIRED == "yes"), // for PE data use fragments in peak calling 9 | extra : "--keep-dup " + ESSENTIAL_DUP // other parameters sent to macs2 10 | ] 11 | 12 | load PIPELINE_ROOT + "/modules/ChIPseq/macs2.groovy" 13 | 14 | // expected parameter types 15 | class macs2_vars_schema { 16 | String outdir 17 | String targets 18 | String gsize 19 | String minlen 20 | Boolean broad 21 | String mapped 22 | Boolean paired 23 | String extra 24 | 25 | // check for the presence of mandatory params 26 | boolean asBoolean() { 27 | outdir && targets && mapped 28 | } 29 | } 30 | 31 | validate_schema(macs2_vars_schema, macs2_vars) 32 | -------------------------------------------------------------------------------- /modules/ChIPseq/make_greylist.header: -------------------------------------------------------------------------------- 1 | make_greylist_vars=[ 2 | outdir : RESULTS + "/greylist", 3 | karyoFile : ESSENTIAL_CHROMSIZES, // file with chromosome sizes 4 | targets : "targets.txt", // targets file. Check the bin directory for the format 5 | bams : MAPPED, // directory with the bam files 6 | peaks : RESULTS + "/macs2", // directory with peak caller output 7 | reps : "100", // The number of times to sample bins and estimate the parameters of the negative binomial distribution. 8 | sampleSize : "30000", // The number of bins to sample on each repetition. 9 | pThreshold : "0.99", // The p-value threshold for marking bins as “grey”. 10 | maxgap : "10000", // merge grey regions if distance below maxGap 11 | extra : "" 12 | ] 13 | 14 | load PIPELINE_ROOT + "/modules/ChIPseq/make_greylist.groovy" 15 | 16 | -------------------------------------------------------------------------------- /modules/ChIPseq/normbigwig.header: -------------------------------------------------------------------------------- 1 | normbigwig_vars=[ 2 | outdir : TRACKS + "/input_normalised_cov", 3 | targets : "targets.txt", // targets file describing the samples 4 | extension_length: ESSENTIAL_FRAGLEN-ESSENTIAL_READLEN, //for paired end the pairs are automatically used. The extension length is only used if there are singletons 5 | mapped : MAPPED, 6 | threads : Integer.toString(ESSENTIAL_THREADS), 7 | extra : "--scaleFactorsMethod readCount --operation subtract " + "--extendReads " + Integer.toString(EXTENSION_LENGTH) + " --outFileFormat bedgraph" 8 | ] 9 | 10 | load PIPELINE_ROOT + "/modules/ChIPseq/normbigwig.groovy" 11 | 12 | -------------------------------------------------------------------------------- /modules/ChIPseq/pbc.groovy: -------------------------------------------------------------------------------- 1 | pbc = { 2 | doc title: "PBC", 3 | desc: "PCR Bottleneck Coefficient", 4 | constraints: "", 5 | bpipe_version: "tested with bpipe 0.9.8.7", 6 | author: "Sergi Sayols" 7 | 8 | var subdir : "" 9 | output.dir = pbc_vars.outdir + "/$subdir" 10 | 11 | def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"]) 12 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 13 | 14 | transform(".bam") to("_PBC.csv") { 15 | exec """ 16 | ${TOOL_ENV} && 17 | ${PREAMBLE} && 18 | 19 | Rscript ${PIPELINE_ROOT}/tools/ENCODEqc/PBC.R $input && mv ${input.prefix}_PBC.csv $output.dir 20 | ""","pbc" 21 | } 22 | 23 | forward input 24 | 25 | } 26 | -------------------------------------------------------------------------------- /modules/ChIPseq/pbc.header: -------------------------------------------------------------------------------- 1 | pbc_vars=[ 2 | outdir: QC + "/pbc" 3 | ] 4 | 5 | load PIPELINE_ROOT + "/modules/ChIPseq/pbc.groovy" 6 | 7 | -------------------------------------------------------------------------------- /modules/ChIPseq/peak_annotation.header: -------------------------------------------------------------------------------- 1 | peak_annotation_vars=[ 2 | outdir : RESULTS + "/Peak_Annotation", 3 | files : RESULTS + "/macs2", 4 | transcript_type: "Bioconductor", 5 | transcript_db : ESSENTIAL_TXDB, // eg, TxDb.Mmusculus.UCSC.mm9.knownGene" 6 | orgdb : ESSENTIAL_ANNODB, // eg, org.Mm.eg.db 7 | regiontss : "3000", 8 | targets : "targets.txt", 9 | orderby : "group", // name of the column in targets.txt to order the libraries in plots 10 | extra : "" 11 | ] 12 | 13 | load PIPELINE_ROOT + "/modules/ChIPseq/peak_annotation.groovy" 14 | 15 | -------------------------------------------------------------------------------- /modules/ChIPseq/phantompeak.groovy: -------------------------------------------------------------------------------- 1 | phantompeak = { 2 | doc title: "Phantompeak QC plot", 3 | desc: "Phantompeak", 4 | constraints: "", 5 | bpipe_version: "tested with bpipe 0.9.8.7", 6 | author: "Sergi Sayols" 7 | 8 | var subdir : "" 9 | output.dir = phantompeak_vars.outdir + "/$subdir" 10 | 11 | def PHANTOMPEAK_FLAGS = 12 | (phantompeak_vars.minshift ? " " + phantompeak_vars.minshift : "") + 13 | (phantompeak_vars.maxshift ? " " + phantompeak_vars.maxshift : "") + 14 | (phantompeak_vars.binsize ? " " + phantompeak_vars.binsize : "") + 15 | (phantompeak_vars.readlen ? " " + phantompeak_vars.readlen : "") + 16 | (phantompeak_vars.threads ? " " + phantompeak_vars.threads : "") + 17 | (phantompeak_vars.extra ? " " + phantompeak_vars.extra : "") 18 | 19 | def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"]) 20 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 21 | 22 | transform(".bam") to("_phantompeak.png") { 23 | exec """ 24 | ${TOOL_ENV} && 25 | ${PREAMBLE} && 26 | 27 | Rscript ${PIPELINE_ROOT}/tools/ENCODEqc/phantompeak.R $input \$(basename $input.prefix) $PHANTOMPEAK_FLAGS && 28 | 29 | mv \$(basename $input.prefix)_phantompeak.* $output.dir 30 | ""","phantompeak" 31 | } 32 | 33 | forward input 34 | } 35 | 36 | -------------------------------------------------------------------------------- /modules/ChIPseq/phantompeak.header: -------------------------------------------------------------------------------- 1 | phantompeak_vars=[ 2 | outdir : QC + "/phantompeak", 3 | threads : Integer.toString(ESSENTIAL_THREADS), // number of threads to use 4 | minshift: Integer.toString(-500), // left 'x' coordinate in plot 5 | maxshift: Integer.toString(1500), // right 'x' coordinate in plot 6 | binsize : Integer.toString(5), // stepsize for cc calculation 7 | readlen : Integer.toString(ESSENTIAL_READLEN), // read length 8 | extra : "" // extra parms to pass to the tool 9 | ] 10 | 11 | load PIPELINE_ROOT + "/modules/ChIPseq/phantompeak.groovy" 12 | 13 | -------------------------------------------------------------------------------- /modules/ChIPseq/upsetPlot.groovy: -------------------------------------------------------------------------------- 1 | upsetPlot = { 2 | doc title: "upset plot", 3 | desc: "prepare combination matrix and UpSet Plot for peak data", 4 | constraints: "calculation of combination matrix may take hours for larger projects", 5 | bpipe_version:"tested with bpipe 0.9.9.8", 6 | author:"Frank Rühle" 7 | 8 | var subdir : "" 9 | output.dir = UPSET_vars.outdir + "/$subdir" 10 | 11 | def UPSET_FLAGS = 12 | (UPSET_vars.files ? " peakData=" + UPSET_vars.files + "/$subdir" : "") + 13 | (UPSET_vars.targets ? " targets=" + UPSET_vars.targets : "") + 14 | (UPSET_vars.outdir ? " out=" + UPSET_vars.outdir + "/$subdir" : "") + 15 | (UPSET_vars.mode ? " mode=" + UPSET_vars.mode : "") + 16 | (UPSET_vars.peakOverlapMode ? " peakOverlapMode=" + UPSET_vars.peakOverlapMode : "") + 17 | (UPSET_vars.setsize ? " setsize=" + UPSET_vars.setsize : "") + 18 | (UPSET_vars.addBarAnnotation ? " addBarAnnotation=" + UPSET_vars.addBarAnnotation : "") + 19 | (UPSET_vars.extra ? " " + UPSET_vars.extra : "") 20 | 21 | def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"]) 22 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 23 | 24 | produce("upsetPlot.RData") { 25 | exec """ 26 | ${TOOL_ENV} && 27 | ${PREAMBLE} && 28 | 29 | Rscript ${PIPELINE_ROOT}/tools/upsetPlot/upsetPlot.R $UPSET_FLAGS; 30 | ""","upsetPlot" 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /modules/ChIPseq/upsetPlot.header: -------------------------------------------------------------------------------- 1 | UPSET_vars=[ 2 | outdir : RESULTS + "/upsetPlot", 3 | files : RESULTS + "/macs2", 4 | targets : "targets.txt", // targets file describing the samples 5 | mode : "intersect", 6 | peakOverlapMode : "peaknumber", 7 | setsize : "25", 8 | addBarAnnotation : false, 9 | extra : "" 10 | ] 11 | 12 | load PIPELINE_ROOT + "/modules/ChIPseq/upsetPlot.groovy" 13 | 14 | -------------------------------------------------------------------------------- /modules/DNAseq/bwa.header: -------------------------------------------------------------------------------- 1 | BWA_vars=[ 2 | outdir : MAPPED, 3 | ref : ESSENTIAL_BWA_REF, 4 | threads : Integer.toString(ESSENTIAL_THREADS), 5 | 6 | // additional settings 7 | samtools_threads: Integer.toString(ESSENTIAL_THREADS), 8 | extra : "" // extra parms sent to the tool 9 | ] 10 | 11 | 12 | load PIPELINE_ROOT + "/modules/DNAseq/bwa.groovy" 13 | 14 | // expected parameter types 15 | class BWA_vars_schema { 16 | String outdir 17 | String ref 18 | String threads 19 | String samtools_threads 20 | String extra 21 | 22 | // check for the presence of mandatory params 23 | boolean asBoolean() { 24 | outdir && ref 25 | } 26 | } 27 | 28 | validate_schema(BWA_vars_schema, BWA_vars) 29 | -------------------------------------------------------------------------------- /modules/DNAseq/collectVariantCallingMetrics.header: -------------------------------------------------------------------------------- 1 | CollectVariantCallingMetrics_vars=[ 2 | outdir : QC + "/GATK_VariantCallingMetrics", 3 | java_flags : "-Xmx20000m", 4 | bwa_ref : ESSENTIAL_BWA_REF, 5 | known_variants: ESSENTIAL_KNOWN_VARIANTS, 6 | extra : "" 7 | ] 8 | 9 | load PIPELINE_ROOT + "/modules/DNAseq/collectVariantCallingMetrics.groovy" 10 | 11 | // metrics explained here: https://broadinstitute.github.io/picard/picard-metric-definitions.html 12 | -------------------------------------------------------------------------------- /modules/DNAseq/gatherBQSRReports.groovy: -------------------------------------------------------------------------------- 1 | GatherBQSRReports = { 2 | doc title: "GATK GatherBQSRReports", 3 | desc: "This tool gathers scattered BQSR recalibration reports into a single file.", 4 | constraints: "", 5 | bpipe_version: "tested with 0.9.9.8.slurm", 6 | author: "Frank Rühle" 7 | 8 | output.dir = GatherBQSRReports_vars.outdir 9 | 10 | def GatherBQSRReports_vars_FLAGS = 11 | (GatherBQSRReports_vars.extra ? " " + GatherBQSRReports_vars.extra : "" ) 12 | 13 | def TOOL_ENV = prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && " + 14 | prepare_tool_env("gatk", tools["gatk"]["version"], tools["gatk"]["runenv"]) 15 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix.prefix).getName()) 16 | 17 | produce("gatheredBQSR.report") { 18 | 19 | exec """ 20 | ${TOOL_ENV} && 21 | ${PREAMBLE} && 22 | 23 | gatk --java-options "${GatherBQSRReports_vars.java_flags}" GatherBQSRReports $GatherBQSRReports_vars_FLAGS --tmp-dir \${TMP} -O $output \$(for f in \$(ls ${output.dir}/*.table);do echo " -I " "\$f"; done) 24 | ""","GatherBQSRReports" 25 | } 26 | forward input 27 | } 28 | 29 | 30 | -------------------------------------------------------------------------------- /modules/DNAseq/gatherBQSRReports.header: -------------------------------------------------------------------------------- 1 | GatherBQSRReports_vars=[ 2 | outdir : BaseRecalibration_vars.statsdir, 3 | java_flags : "-Xmx20000m", 4 | extra : "" 5 | ] 6 | 7 | load PIPELINE_ROOT + "/modules/DNAseq/gatherBQSRReports.groovy" 8 | 9 | 10 | -------------------------------------------------------------------------------- /modules/DNAseq/genomicsDBImport.header: -------------------------------------------------------------------------------- 1 | GenomicsDBImport_vars=[ 2 | outdir : RESULTS, 3 | workspace_name: "GenomicsDBImport", 4 | java_flags : "-Xmx4g -Xms4g", 5 | call_region : ESSENTIAL_CALL_REGION, 6 | sample_map : "sample.map", // will be generated from targets file 7 | extra : "" 8 | ] 9 | 10 | load PIPELINE_ROOT + "/modules/DNAseq/genomicsDBImport.groovy" 11 | 12 | 13 | -------------------------------------------------------------------------------- /modules/DNAseq/genotypeGVCFs.groovy: -------------------------------------------------------------------------------- 1 | GenotypeGVCFs = { 2 | doc title: "GATK GenotypeGVCFs", 3 | desc: "Perform joint genotyping on single input file containing one or more samples pre-called with HaplotypeCaller", 4 | constraints: "input sample file must possess genotype likelihoods produced by HaplotypeCaller with '-ERC GVCF' or '-ERC BP_RESOLUTION'", 5 | bpipe_version: "tested with bpipe 0.9.9.8.slurm", 6 | author: "Frank Rühle" 7 | 8 | output.dir = GenotypeGVCFs_vars.outdir 9 | 10 | def GenotypeGVCFs_FLAGS = 11 | (GenotypeGVCFs_vars.bwa_ref ? " -R " + GenotypeGVCFs_vars.bwa_ref : "" ) + 12 | (GenotypeGVCFs_vars.extra ? " " + GenotypeGVCFs_vars.extra : "" ) 13 | 14 | def TOOL_ENV = prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && " + 15 | prepare_tool_env("gatk", tools["gatk"]["version"], tools["gatk"]["runenv"]) 16 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1).getName()) 17 | 18 | println "Used workspace from GenomicsDBImport: " + input.dir 19 | 20 | produce(GenotypeGVCFs_vars.vcf_name) { 21 | exec """ 22 | ${TOOL_ENV} && 23 | ${PREAMBLE} && 24 | 25 | gatk --java-options "${GenotypeGVCFs_vars.java_flags}" GenotypeGVCFs $GenotypeGVCFs_FLAGS --tmp-dir \${TMP} -V gendb://$input.dir -O $output 26 | 27 | ""","GenotypeGVCFs" 28 | } 29 | } 30 | 31 | 32 | -------------------------------------------------------------------------------- /modules/DNAseq/genotypeGVCFs.header: -------------------------------------------------------------------------------- 1 | GenotypeGVCFs_vars=[ 2 | outdir : RESULTS + "/GenotypeGVCFs", 3 | vcf_name :"joinedsamples.vcf.gz", 4 | java_flags : "-Xmx4g", 5 | bwa_ref : ESSENTIAL_BWA_REF, 6 | extra : "" 7 | ] 8 | 9 | load PIPELINE_ROOT + "/modules/DNAseq/genotypeGVCFs.groovy" 10 | 11 | 12 | -------------------------------------------------------------------------------- /modules/DNAseq/realignment.header: -------------------------------------------------------------------------------- 1 | IndelRealignment_vars=[ 2 | outdir : MAPPED, 3 | java_flags : "-Xmx5000m", 4 | threads : Integer.toString(ESSENTIAL_THREADS), 5 | mills_variants: ESSENTIAL_MILLS_VARIANTS, 6 | bwa_ref : ESSENTIAL_BWA_REF 7 | ] 8 | 9 | load PIPELINE_ROOT + "/modules/DNAseq/realignment.groovy" 10 | 11 | -------------------------------------------------------------------------------- /modules/DNAseq/recalibration.header: -------------------------------------------------------------------------------- 1 | BaseRecalibration_vars=[ 2 | outdir : MAPPED, 3 | statsdir : QC + "/BQSR", 4 | bwa_ref : ESSENTIAL_BWA_REF, 5 | known_variants: ESSENTIAL_KNOWN_VARIANTS, 6 | extra : "" 7 | ] 8 | 9 | load PIPELINE_ROOT + "/modules/DNAseq/recalibration.groovy" 10 | 11 | -------------------------------------------------------------------------------- /modules/DNAseq/snpEff.header: -------------------------------------------------------------------------------- 1 | snpEff_vars=[ 2 | outdir : RESULTS + "/snpEff", 3 | java_flags : "-Xmx8g", 4 | config : "", // specify if not default 5 | genome_version : ESSENTIAL_SNPEFF_GENOME, // the corresponding snpEff database 6 | output_format : "gatk", // either of vcf, gatk, bed, bedAnn 7 | extra : "" 8 | ] 9 | 10 | load PIPELINE_ROOT + "/modules/DNAseq/snpEff.groovy" 11 | 12 | 13 | -------------------------------------------------------------------------------- /modules/DNAseq/validateVariants.groovy: -------------------------------------------------------------------------------- 1 | ValidateVariants = { 2 | doc title: "GATK ValidateVariants", 3 | desc: "This tool validates the adherence of a file to VCF format including information contained within the fields REF, CHR_COUNTS, IDS, ALLELES.", 4 | constraints: "", 5 | bpipe_version: "tested with 0.9.9.8.slurm", 6 | author: "Frank Rühle" 7 | 8 | output.dir = ValidateVariants_vars.outdir 9 | 10 | def ValidateVariants_vars_FLAGS = 11 | (ValidateVariants_vars.bwa_ref ? " -R " + ValidateVariants_vars.bwa_ref : "" ) + 12 | (ValidateVariants_vars.known_variants ? " --dbsnp " + ValidateVariants_vars.known_variants : "" ) + 13 | (ValidateVariants_vars.extra ? " " + ValidateVariants_vars.extra : "" ) 14 | 15 | def TOOL_ENV = prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && " + 16 | prepare_tool_env("gatk", tools["gatk"]["version"], tools["gatk"]["runenv"]) 17 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix.prefix).getName()) 18 | 19 | transform (".vcf.gz") to (".report") { 20 | 21 | exec """ 22 | ${TOOL_ENV} && 23 | ${PREAMBLE} && 24 | 25 | gatk --java-options "${ValidateVariants_vars.java_flags}" ValidateVariants $ValidateVariants_vars_FLAGS -V $input > $output 26 | ""","ValidateVariants" 27 | } 28 | forward input 29 | } 30 | 31 | -------------------------------------------------------------------------------- /modules/DNAseq/validateVariants.header: -------------------------------------------------------------------------------- 1 | ValidateVariants_vars=[ 2 | outdir : QC + "/GATK_ValidateVariants", 3 | java_flags : "-Xmx20000m", 4 | bwa_ref : ESSENTIAL_BWA_REF, 5 | known_variants: ESSENTIAL_KNOWN_VARIANTS, 6 | extra : "" 7 | ] 8 | 9 | load PIPELINE_ROOT + "/modules/DNAseq/validateVariants.groovy" 10 | 11 | 12 | -------------------------------------------------------------------------------- /modules/DNAseq/variantFiltration.header: -------------------------------------------------------------------------------- 1 | VariantFiltration_vars=[ 2 | outdir : RESULTS + "/VariantFiltration", 3 | bwa_ref : ESSENTIAL_BWA_REF, 4 | java_flags : "-Xmx8g", 5 | 6 | // Note that these expressions will tag any sites as failing the filter where the value matches the expression. 7 | // For explanations of filter criteria see https://gatk.broadinstitute.org/hc/en-us/articles/360035890471 8 | snp_filter_QD : "QD < 2.0", 9 | snp_filter_QUAL : "", 10 | snp_filter_SOR : "SOR > 3.0", 11 | snp_filter_FS : "FS > 60.0", 12 | snp_filter_MQ : "MQ < 40.0", 13 | snp_filter_MQRankSum : "MQRankSum < -12.5", 14 | snp_filter_ReadPosRankSum : "ReadPosRankSum < -8.0", 15 | 16 | indel_filter_QD : "QD < 2.0", 17 | indel_filter_QUAL : "", 18 | indel_filter_SOR : "SOR > 10.0", 19 | indel_filter_FS : "FS > 200.0", 20 | indel_filter_MQ : "", 21 | indel_filter_MQRankSum : "", 22 | indel_filter_ReadPosRankSum : "ReadPosRankSum < -20.0" 23 | ] 24 | 25 | load PIPELINE_ROOT + "/modules/DNAseq/variantFiltration.groovy" 26 | 27 | -------------------------------------------------------------------------------- /modules/DNAseq/variant_score_recalibration.header: -------------------------------------------------------------------------------- 1 | VariantScoreRecalibration_vars=[ 2 | outdir : RESULTS + "/VQSR", 3 | java_flags : "-Xmx24g -Xms24g", 4 | known_variants : ESSENTIAL_KNOWN_VARIANTS, 5 | hapmap_variants : ESSENTIAL_HAPMAP_VARIANTS, 6 | omni_variants : ESSENTIAL_OMNI_VARIANTS, 7 | mills_variants : ESSENTIAL_MILLS_VARIANTS, 8 | thousand_genomes_variants: ESSENTIAL_THOUSAND_GENOMES_VARIANTS, 9 | snp_filter_level : "99.7", 10 | indel_filter_level : "99.7", 11 | max_gaussians_indels : 4, 12 | max_gaussians_snps : 6, 13 | bwa_ref : ESSENTIAL_BWA_REF 14 | ] 15 | 16 | load PIPELINE_ROOT + "/modules/DNAseq/variant_score_recalibration.groovy" 17 | 18 | -------------------------------------------------------------------------------- /modules/DNAseq/variantcallHC.groovy: -------------------------------------------------------------------------------- 1 | VariantCallHC = { 2 | doc title: "GATK Variant Calling HC", 3 | desc: "Call variants in BAM files using GATK HaplotypeCaller", 4 | constraints: "Requires BWA ( paramteter -M ) produced BAM file, with correct chromosome order and ReadGroup attached.", 5 | bpipe_version: "tested with bpipe 0.9.9.8.slurm", 6 | author: "Oliver Drechsel, modified by Frank Rühle" 7 | 8 | output.dir = VariantCallHC_vars.outdir 9 | 10 | def HaplotypeCaller_FLAGS = 11 | (VariantCallHC_vars.erc ? " -ERC " + VariantCallHC_vars.erc : "" ) + 12 | (VariantCallHC_vars.call_region ? " -L " + VariantCallHC_vars.call_region : "" ) + 13 | (VariantCallHC_vars.bwa_ref ? " -R " + VariantCallHC_vars.bwa_ref : "" ) + 14 | (VariantCallHC_vars.known_variants ? " --dbsnp " + VariantCallHC_vars.known_variants : "" ) + 15 | (VariantCallHC_vars.extra ? " " + VariantCallHC_vars.extra : "" ) 16 | 17 | def TOOL_ENV = prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && " + 18 | prepare_tool_env("gatk", tools["gatk"]["version"], tools["gatk"]["runenv"]) 19 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 20 | 21 | transform (".bam") to (".g.vcf.gz") { 22 | exec """ 23 | ${TOOL_ENV} && 24 | ${PREAMBLE} && 25 | 26 | gatk --java-options "${VariantCallHC_vars.java_flags}" HaplotypeCaller $HaplotypeCaller_FLAGS -I $input -O $output 27 | 28 | ""","VariantCallHC" 29 | } 30 | } 31 | 32 | -------------------------------------------------------------------------------- /modules/DNAseq/variantcallHC.header: -------------------------------------------------------------------------------- 1 | VariantCallHC_vars=[ 2 | outdir : RESULTS + "/VariantCallHC", 3 | java_flags : "-Xmx20000m", 4 | erc : "GVCF", // Mode for emitting reference confidence scores (NONE, "BP_RESOLUTION" or "GVCF") 5 | call_region : ESSENTIAL_CALL_REGION, 6 | bwa_ref : ESSENTIAL_BWA_REF, 7 | known_variants: ESSENTIAL_KNOWN_VARIANTS, 8 | extra : "" 9 | ] 10 | 11 | load PIPELINE_ROOT + "/modules/DNAseq/variantcallHC.groovy" 12 | 13 | -------------------------------------------------------------------------------- /modules/DNAseq/variantcallUG.groovy: -------------------------------------------------------------------------------- 1 | VariantCallUG = { 2 | doc title: "GATK Variant Calling UG", 3 | desc: "Call variants in BAM files using GATK UnifiedGenotyper", 4 | constraints: "Requires BWA ( paramteter -M ) produced BAM file, with correct chromosome order and ReadGroup attached.", 5 | bpipe_version: "tested with bpipe 0.9.9.3.slurm", 6 | author: "Oliver Drechsel" 7 | 8 | output.dir = VariantCallUG_vars.outdir 9 | 10 | def UnifiedGenotyper_FLAGS = 11 | " -glm BOTH " + 12 | (VariantCallUG_vars.call_region ? " -L " + VariantCallUG_vars.call_region : "" ) + 13 | (VariantCallUG_vars.threads ? " -nt " + VariantCallUG_vars.threads : "" ) + // this is not a typo! 14 | (VariantCallUG_vars.threads ? " -nct " + VariantCallUG_vars.threads : "" ) + // check tool multithread options 15 | (VariantCallUG_vars.bwa_ref ? " -R " + VariantCallUG_vars.bwa_ref : "" ) 16 | 17 | def TOOL_ENV = prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && " + 18 | prepare_tool_env("gatk", tools["gatk"]["version"], tools["gatk"]["runenv"]) 19 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 20 | 21 | transform (".dupmarked.realigned.recalibrated.bam") to (".UG.vcf.gz") { 22 | // usage parameters https://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_gatk_tools_walkers_genotyper_UnifiedGenotyper.php 23 | exec """ 24 | ${TOOL_ENV} && 25 | ${PREAMBLE} && 26 | 27 | java ${VariantCallUG_vars.java_flags} -Djava.io.tmpdir=\${TMP} -jar \${gatk} -T UnifiedGenotyper $UnifiedGenotyper_FLAGS -I $input -o $output 28 | ""","VariantCallUG" 29 | } 30 | } 31 | 32 | -------------------------------------------------------------------------------- /modules/DNAseq/variantcallUG.header: -------------------------------------------------------------------------------- 1 | VariantCallUG_vars=[ 2 | outdir : RESULTS, 3 | java_flags : "-Xmx20000m", 4 | threads : Integer.toString(ESSENTIAL_THREADS), 5 | call_region : ESSENTIAL_CALL_REGION, 6 | bwa_ref : ESSENTIAL_BWA_REF 7 | ] 8 | 9 | load PIPELINE_ROOT + "/modules/DNAseq/variantcallUG.groovy" 10 | 11 | -------------------------------------------------------------------------------- /modules/DNAseq/varianteval.groovy: -------------------------------------------------------------------------------- 1 | VariantEval = { 2 | doc title: "GATK VariantEval", 3 | desc: "Variant evaluation (% in dbSNP, genotype concordance, Ti/Tv ratios, and a lot more)", 4 | constraints: "VariantEval is a BETA tool and is not yet ready for use in production", 5 | bpipe_version: "tested with 0.9.9.8.slurm", 6 | author: "Oliver Drechsel, modified by Frank Rühle" 7 | 8 | output.dir = VariantEval_vars.outdir 9 | 10 | def VariantEval_FLAGS = 11 | (VariantEval_vars.bwa_ref ? " -R " + VariantEval_vars.bwa_ref : "" ) + 12 | (VariantEval_vars.known_variants ? " --dbsnp " + VariantEval_vars.known_variants : "" ) + 13 | (VariantEval_vars.extra ? " " + VariantEval_vars.extra : "" ) 14 | 15 | def TOOL_ENV = prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && " + 16 | prepare_tool_env("gatk", tools["gatk"]["version"], tools["gatk"]["runenv"]) 17 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix.prefix).getName()) 18 | 19 | transform (".vcf.gz") to (".report") { 20 | 21 | exec """ 22 | ${TOOL_ENV} && 23 | ${PREAMBLE} && 24 | 25 | gatk --java-options "${VariantEval_vars.java_flags}" VariantEval $VariantEval_FLAGS --eval $input -O $output 26 | ""","VariantEval" 27 | } 28 | forward input 29 | } 30 | 31 | -------------------------------------------------------------------------------- /modules/DNAseq/varianteval.header: -------------------------------------------------------------------------------- 1 | VariantEval_vars=[ 2 | outdir : QC + "/GATK_varianteval", 3 | java_flags : "-Xmx20000m", 4 | bwa_ref : ESSENTIAL_BWA_REF, 5 | known_variants: ESSENTIAL_KNOWN_VARIANTS, 6 | extra : "" 7 | ] 8 | 9 | load PIPELINE_ROOT + "/modules/DNAseq/varianteval.groovy" 10 | 11 | // usage parameters https://gatk.broadinstitute.org/hc/en-us/articles/4418051376155-VariantEval-BETA- 12 | -------------------------------------------------------------------------------- /modules/DNAseq/variantfuseHC.header: -------------------------------------------------------------------------------- 1 | HaplotypeCaller_vars=[ 2 | outdir : RESULTS, 3 | java_flags : "-Xmx20000m", 4 | threads : Integer.toString(ESSENTIAL_THREADS), 5 | bwa_ref : ESSENTIAL_BWA_REF, 6 | known_variants: ESSENTIAL_KNOWN_VARIANTS, 7 | refconf : "--emitRefConfidence GVCF", 8 | indextype : "--variant_index_type LINEAR", 9 | indexparm : "-variant_index_parameter 128000", 10 | extra : "" 11 | ] 12 | 13 | load PIPELINE_ROOT + "/modules/DNAseq/variantfuseHC.groovy" 14 | 15 | -------------------------------------------------------------------------------- /modules/NGS/bam2bw.groovy: -------------------------------------------------------------------------------- 1 | bam2bw = { 2 | doc title: "bam2bw", 3 | desc: "Convert BAM file to bigWig", 4 | constraints: "none.", 5 | bpipe_version: "tested with bpipe 0.9.8.7", 6 | author: "Sergi Sayols" 7 | 8 | output.dir=bam2bw_vars.outdir 9 | 10 | def TOOL_ENV = prepare_tool_env("bedtools", tools["bedtools"]["version"], tools["bedtools"]["runenv"]) + " && " + 11 | prepare_tool_env("kentutils", tools["kentutils"]["version"], tools["kentutils"]["runenv"]) + " && " + 12 | prepare_tool_env("samtools", tools["samtools"]["version"], tools["samtools"]["runenv"]) 13 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 14 | 15 | transform(".bam") to ("_scaled.bw") { 16 | exec """ 17 | ${TOOL_ENV} && 18 | ${PREAMBLE} && 19 | 20 | BASEOUTPUT=`basename $output` && 21 | CHRSIZES=\${TMP}/\$(basename ${input.prefix}).bam2bw.chrsizes && 22 | samtools idxstats ${input} | cut -f1-2 > \${CHRSIZES} && 23 | TOTAL_MAPPED=\$( samtools flagstat $input | head -n5 | tail -n1 | cut -f1 -d" ") && 24 | SCALE=\$(echo "1000000/\$TOTAL_MAPPED" | bc -l) && 25 | genomeCoverageBed -bg -split -scale \${SCALE} -ibam ${input} | sortBed -i - > \${TMP}/\${BASEOUTPUT%.bw}.bedgraph && 26 | bedGraphToBigWig \${TMP}/\${BASEOUTPUT%.bw}.bedgraph \${CHRSIZES} \${TMP}/\${BASEOUTPUT} && 27 | cp -f \${TMP}/\${BASEOUTPUT} $output 28 | ""","bam2bw" 29 | } 30 | } 31 | 32 | -------------------------------------------------------------------------------- /modules/NGS/bam2bw.header: -------------------------------------------------------------------------------- 1 | bam2bw_vars=[ 2 | outdir: TRACKS 3 | ] 4 | 5 | load PIPELINE_ROOT + "/modules/NGS/bam2bw.groovy" 6 | 7 | -------------------------------------------------------------------------------- /modules/NGS/bamcoverage.groovy: -------------------------------------------------------------------------------- 1 | bamCoverage = { 2 | doc title: "bamCoverage", 3 | desc: "bamCoverage wrapper", 4 | constraints: "normalised bigwig track for RNA/ChipSeq PE data", 5 | bpipe_version: "tested with bpipe 0.9.8.7", 6 | author: "Nastasja Kreim" 7 | 8 | var subdir : "" 9 | output.dir = bamCoverage_vars.outdir + "/$subdir" 10 | 11 | def BAMCOVERAGE_FLAGS = 12 | (bamCoverage_vars.cores ? " --numberOfProcessors " + bamCoverage_vars.cores : "") + 13 | (bamCoverage_vars.fragments ? " --extendReads " + (bamCoverage_vars.paired ? "" : bamCoverage_vars.fraglength + " ") : "") + 14 | (bamCoverage_vars.extra ? " " + bamCoverage_vars.extra : "") 15 | 16 | def TOOL_ENV = prepare_tool_env("deeptools", tools["deeptools"]["version"], tools["deeptools"]["runenv"]) 17 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 18 | 19 | transform(".bam") to(".bw") { 20 | exec """ 21 | ${TOOL_ENV} && 22 | ${PREAMBLE} && 23 | 24 | bamCoverage $BAMCOVERAGE_FLAGS --bam $input -o ${output}; 25 | ""","bamCoverage" 26 | } 27 | } 28 | 29 | -------------------------------------------------------------------------------- /modules/NGS/bamcoverage.header: -------------------------------------------------------------------------------- 1 | bamCoverage_vars=[ 2 | outdir : TRACKS, 3 | cores : Integer.toString(ESSENTIAL_THREADS), 4 | paired : RUN_IN_PAIRED_END_MODE, // run in se or pe mode 5 | fragments : (binding.hasVariable('ESSENTIAL_FRAGMENT_USAGE') && binding.hasVariable('ESSENTIAL_FRAGLEN') ? ESSENTIAL_FRAGMENT_USAGE == "yes" : false), 6 | fraglength: (binding.hasVariable('ESSENTIAL_FRAGLEN') ? Integer.toString(ESSENTIAL_FRAGLEN) : "200"), 7 | // If you want to exclude chromsomes for normalisation e.g. rDNA or mitochondrion add 8 | // the following parameter --ignoreForNormalization \"chrM, rDNA\". 9 | // If you like to use offsets, blacklist regions, center reads or anything like it please 10 | // refer to the deepTools manual, there is even a special modus for Nucleosome detection in Mnase data 11 | // for deeptools versions >v3 you have to use --normalizeUsing RPKM since the API changed 12 | extra : "--outFileFormat bigwig" + " " + ESSENTIAL_BAMCOVERAGE 13 | ] 14 | 15 | load PIPELINE_ROOT + "/modules/NGS/bamcoverage.groovy" 16 | 17 | // expected parameter types 18 | class bamCoverage_vars_schema { 19 | String outdir 20 | String cores 21 | Boolean paired 22 | Boolean fragments 23 | String fraglength 24 | String extra 25 | 26 | // check for the presence of mandatory params 27 | boolean asBoolean() { 28 | outdir 29 | } 30 | } 31 | 32 | validate_schema(bamCoverage_vars_schema, bamCoverage_vars) 33 | -------------------------------------------------------------------------------- /modules/NGS/bamindexer.groovy: -------------------------------------------------------------------------------- 1 | BAMindexer = { 2 | doc title: "BAMindexer", 3 | desc: "Call samtools to index a bam file", 4 | constraints: "", 5 | bpipe_version: "tested with bpipe 0.9.8.7", 6 | author: "Sergi Sayols, Nastasja Kreim" 7 | 8 | def File f = new File(input1) 9 | output.dir = f.getParent() 10 | 11 | def TOOL_ENV = prepare_tool_env("samtools", tools["samtools"]["version"], tools["samtools"]["runenv"]) 12 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 13 | 14 | transform(".bam\$") to(".bam.bai") { 15 | exec """ 16 | ${TOOL_ENV} && 17 | ${PREAMBLE} && 18 | 19 | samtools index $input 20 | ""","BAMindexer" 21 | } 22 | 23 | forward input 24 | } 25 | 26 | -------------------------------------------------------------------------------- /modules/NGS/bamindexer.header: -------------------------------------------------------------------------------- 1 | BAMindexer_vars=[ 2 | ] 3 | 4 | load PIPELINE_ROOT + "/modules/NGS/bamindexer.groovy" 5 | 6 | -------------------------------------------------------------------------------- /modules/NGS/bamqc.groovy: -------------------------------------------------------------------------------- 1 | BamQC = { 2 | doc title: "BamQC", 3 | desc: "Quality control of bam file", 4 | constraints: "", 5 | bpipe_version: "tested with bpipe 0.9.9.3", 6 | author: "Giuseppe Petrosino" 7 | 8 | output.dir = BamQC_vars.outdir 9 | def BAMQC_FLAGS = 10 | (BamQC_vars.extra ? " " + BamQC_vars.extra : "") 11 | 12 | def TOOL_ENV = prepare_tool_env("bamqc", tools["bamqc"]["version"], tools["bamqc"]["runenv"]) 13 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 14 | 15 | transform(".bam") to ("_bamqc.zip") { 16 | exec """ 17 | ${TOOL_ENV} && 18 | ${PREAMBLE} && 19 | 20 | bamqc $BAMQC_FLAGS -o $output.dir $input 21 | ""","BamQC" 22 | } 23 | 24 | forward input 25 | } 26 | -------------------------------------------------------------------------------- /modules/NGS/bamqc.header: -------------------------------------------------------------------------------- 1 | BamQC_vars=[ 2 | outdir: QC + "/bamqc", 3 | extra : "--extract --quiet" 4 | ] 5 | 6 | load PIPELINE_ROOT + "/modules/NGS/bamqc.groovy" 7 | 8 | -------------------------------------------------------------------------------- /modules/NGS/downsamplebam.groovy: -------------------------------------------------------------------------------- 1 | DownsampleBAM = { 2 | doc title: "DownsampleBAM", 3 | desc: "Call samtools tools to downsample a given bam file to roughly a given number of mapped reads", 4 | constraints: "Samtools tools version >= 1.3", 5 | bpipe_version: "tested with bpipe 0.9.9.5", 6 | author: "Nastasja Kreim" 7 | 8 | output.dir = DownsampleBAM_vars.outdir 9 | 10 | def TOOL_ENV = prepare_tool_env("samtools", tools["samtools"]["version"], tools["samtools"]["runenv"]) 11 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 12 | 13 | transform(".bam") to (".down.bam") { 14 | exec """ 15 | ${TOOL_ENV} && 16 | ${PREAMBLE} && 17 | 18 | BASE=\$(basename $input) && 19 | samtools view -F 0x04 -bh $input -o \${TMP}/\${BASE}_mapped.bam && 20 | TOTAL_MAPPED=\$(samtools flagstat \${TMP}/\${BASE}_mapped.bam | grep mapped | head -n 1 | awk '{print \$1 }') && 21 | echo mapped_info \$TOTAL_MAPPED && 22 | if [[ ${DownsampleBAM_vars.amount} > \$TOTAL_MAPPED ]]; then 23 | echo "Downsample amount higher than amount of mapped reads. Keeping all reads!" && 24 | cp \${TMP}/\${BASE}_mapped.bam $output; 25 | else 26 | PROBABILITY=\$(echo "${DownsampleBAM_vars.seed} + ${DownsampleBAM_vars.amount}/\$TOTAL_MAPPED" | bc -l); 27 | echo Probability \$PROBABILITY && 28 | samtools view -bs \$PROBABILITY -o $output \${TMP}/\${BASE}_mapped.bam; 29 | fi 30 | ""","DownsampleBAM" 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /modules/NGS/downsamplebam.header: -------------------------------------------------------------------------------- 1 | DownsampleBAM_vars=[ 2 | outdir: MAPPED, 3 | amount: 100000, 4 | seed : 1 5 | ] 6 | 7 | load PIPELINE_ROOT + "/modules/NGS/downsamplebam.groovy" 8 | 9 | -------------------------------------------------------------------------------- /modules/NGS/downsamplefastqPE.groovy: -------------------------------------------------------------------------------- 1 | DownsamplefastqPE = { 2 | doc title: "downsample", 3 | desc: "downsample wrapper for fastq files (paired end)", 4 | constraints: "", 5 | bpipe_version: "tested with bpipe 0.9.8.7", 6 | author: "Nastasja Kreim" 7 | 8 | output.dir = DownsamplefastqPE_vars.outdir 9 | def OUTPUTFILES = new ArrayList() 10 | inputs.eachWithIndex { item, index -> 11 | File f = new File(item) 12 | OUTPUTFILES.add((f.getName() =~ /.fastq.gz/).replaceFirst(".down.fastq.gz")) 13 | println OUTPUTFILES[index] 14 | } 15 | 16 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 17 | 18 | produce(OUTPUTFILES){ 19 | exec """ 20 | paste <(zcat $input1) <(zcat $input2) | awk '{ printf("%s",\$0); n++; if(n%4==0) { printf("\\n");} else { printf("\\t\\t");} }' | shuf | head -n ${DownsamplefastqPE_vars.amount} | sed 's/\\t\\t/\\n/g' | awk -v r1=$output1.prefix -v r2=$output2.prefix 'BEGIN {FS="\\t"}{print \$1 >r1; print \$2>r2 }' && 21 | gzip $output1.prefix && 22 | gzip $output2.prefix; 23 | ""","Downsamplefastq" 24 | } 25 | } 26 | 27 | -------------------------------------------------------------------------------- /modules/NGS/downsamplefastqPE.header: -------------------------------------------------------------------------------- 1 | DownsamplefastqPE_vars=[ 2 | outdir: PROJECT + "/downsampled", 3 | amount: 2500000 4 | ] 5 | 6 | load PIPELINE_ROOT + "/modules/NGS/downsamplefastqPE.groovy" 7 | 8 | -------------------------------------------------------------------------------- /modules/NGS/downsamplefastqSE.groovy: -------------------------------------------------------------------------------- 1 | DownsamplefastqSE = { 2 | doc title: "downsample", 3 | desc: "downsample wrapper for fastq files (single end)", 4 | constraints: "", 5 | bpipe_version: "tested with bpipe 0.9.8.7", 6 | author: "Nastasja Kreim" 7 | 8 | output.dir = DownsamplefastqSE_vars.outdir 9 | def OUTPUTFILES = new ArrayList() 10 | inputs.eachWithIndex { item, index -> 11 | File f = new File(item) 12 | OUTPUTFILES.add((f.getName() =~ /.fastq.gz/).replaceFirst(".down.fastq.gz")) 13 | OUTPUTFILES.add(item) 14 | println OUTPUTFILES[index] 15 | } 16 | 17 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 18 | 19 | produce(OUTPUTFILES) { 20 | exec """ 21 | paste <(zcat $input) | awk '{ printf("%s",\$0); n++; if(n%4==0) { printf("\\n");} else { printf("\\t\\t");} }' | shuf | head -n ${DownsamplefastqSE_vars.amount} | sed 's/\\t\\t/\\n/g' | awk -v r1=$output1.prefix 'BEGIN {FS="\\t"}{print \$1 >r1}' && 22 | gzip $output1.prefix 23 | ""","Downsamplefastq" 24 | } 25 | } 26 | 27 | -------------------------------------------------------------------------------- /modules/NGS/downsamplefastqSE.header: -------------------------------------------------------------------------------- 1 | downsample_vars=[ 2 | outdir: PROJECT + "/downsampled", 3 | amount: 100000 4 | ] 5 | 6 | load PIPELINE_ROOT + "/modules/NGS/downsamplefastqSE.groovy" 7 | 8 | -------------------------------------------------------------------------------- /modules/NGS/extend.groovy: -------------------------------------------------------------------------------- 1 | extend = { 2 | doc title: "extend", 3 | desc: "Extend read length to the average fragment size", 4 | constraints: "", 5 | bpipe_version: "tested with bpipe 0.9.8.7", 6 | author: "Sergi Sayols" 7 | 8 | output.dir=extend_vars.outdir 9 | 10 | def SAMTOOLS_SORT_FLAGS = "-O bam -@ " + extend_vars.samtools_threads 11 | 12 | def TOOL_ENV = prepare_tool_env("bedtools", tools["bedtools"]["version"], tools["bedtools"]["runenv"]) + " && " + 13 | prepare_tool_env("samtools", tools["samtools"]["version"], tools["samtools"]["runenv"]) 14 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 15 | 16 | transform(".bam") to ("_ext.bam") { 17 | exec """ 18 | ${TOOL_ENV} && 19 | ${PREAMBLE} && 20 | 21 | CHRSIZES="\${TMP}/\$(basename ${input.prefix}).extend.chrsizes" && 22 | samtools idxstats ${input} | cut -f1-2 > "\${CHRSIZES}" && 23 | bedtools bamtobed -split -i $input | bedtools slop -g "\${CHRSIZES}" -l 0 -r ${extend_vars.fraglen} -s | bedtools bedtobam -ubam -g "\${CHRSIZES}" | samtools sort $SAMTOOLS_SORT_FLAGS -T \${TMP}/\$(basename $output.prefix) - > $output && 24 | samtools index $output 25 | ""","extend" 26 | } 27 | } 28 | 29 | -------------------------------------------------------------------------------- /modules/NGS/extend.header: -------------------------------------------------------------------------------- 1 | extend_vars=[ 2 | outdir : MAPPED, 3 | fraglen : ESSENTIAL_FRAGLEN - ESSENTIAL_READLEN, //the average fragment length 4 | samtools_threads: Integer.toString(ESSENTIAL_THREADS) 5 | ] 6 | 7 | load PIPELINE_ROOT + "/modules/NGS/extend.groovy" 8 | 9 | // expected parameter types 10 | class extend_vars_schema { 11 | String outdir 12 | Integer fraglen 13 | String samtools_threads 14 | 15 | // check for the presence of mandatory params 16 | boolean asBoolean() { 17 | outdir && fraglen > 0 18 | } 19 | } 20 | 21 | validate_schema(extend_vars_schema, extend_vars) 22 | -------------------------------------------------------------------------------- /modules/NGS/fastqc.groovy: -------------------------------------------------------------------------------- 1 | FastQC = { 2 | doc title: "FastQC", 3 | desc: "Quality control of input file", 4 | constraints: "Only supports compressed FASTQ files", 5 | bpipe_version: "tested with bpipe 0.9.8.7", 6 | author: "Sergi Sayols, Frank Rühle" 7 | 8 | var subdir : "" 9 | output.dir = FastQC_vars.outdir + "/$subdir" 10 | 11 | def FASTQC_FLAGS = 12 | (FastQC_vars.extra ? " " + FastQC_vars.extra : "") 13 | 14 | def TOOL_ENV = prepare_tool_env("fastqc", tools["fastqc"]["version"], tools["fastqc"]["runenv"]) 15 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 16 | 17 | transform("*.fastq.gz") to ("_fastqc.zip") { 18 | exec """ 19 | ${TOOL_ENV} && 20 | ${PREAMBLE} && 21 | 22 | fastqc --extract $FASTQC_FLAGS -o $output.dir $inputs 23 | ""","FastQC" 24 | } 25 | 26 | forward inputs 27 | } 28 | -------------------------------------------------------------------------------- /modules/NGS/fastqc.header: -------------------------------------------------------------------------------- 1 | FastQC_vars=[ 2 | outdir: QC + "/fastqc", 3 | extra : "--quiet" 4 | ] 5 | 6 | load PIPELINE_ROOT + "/modules/NGS/fastqc.groovy" 7 | 8 | -------------------------------------------------------------------------------- /modules/NGS/fastqscreen.groovy: -------------------------------------------------------------------------------- 1 | FastqScreen = { 2 | doc title: "FastqScreen", 3 | desc: "Quality control of input file against various contaminants", 4 | constraints: "Only supports compressed FASTQ files", 5 | bpipe_version: "tested with bpipe 0.9.8.7", 6 | author: "Nastasja Kreim, modified by Frank Ruehle" 7 | 8 | var subdir : "" 9 | output.dir = FastqScreen_vars.outdir + "/$subdir" 10 | def FASTQSCREEN_FLAGS = 11 | (FastqScreen_vars.threads ? " --threads " + FastqScreen_vars.threads : "") + 12 | (FastqScreen_vars.extra ? " " + FastqScreen_vars.extra : "") 13 | 14 | def TOOL_ENV = prepare_tool_env("fastqscreen", tools["fastqscreen"]["version"], tools["fastqscreen"]["runenv"]) 15 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 16 | 17 | transform("*.fastq.gz") to("_fastqscreen.done") { 18 | exec """ 19 | ${TOOL_ENV} && 20 | ${PREAMBLE} && 21 | 22 | if [ ! -e "$output.prefix" ]; then 23 | mkdir $output.prefix; 24 | fi && 25 | fastqreference=${FastqScreen_vars.conf}; 26 | references=(\${fastqreference//,/ }); 27 | for i in "\${!references[@]}"; do 28 | reference=(\${references[i]//::/ }); 29 | echo -e "DATABASE\t\${reference[0]}\t\${reference[1]}" >> $output.prefix/fastqscreen.conf; 30 | done; 31 | fastq_screen $FASTQSCREEN_FLAGS --conf $output.prefix/fastqscreen.conf --outdir $output.prefix $inputs; 32 | touch $outputs 33 | ""","FastqScreen" 34 | } 35 | 36 | forward inputs 37 | } 38 | 39 | -------------------------------------------------------------------------------- /modules/NGS/fastqscreen.header: -------------------------------------------------------------------------------- 1 | FastqScreen_vars=[ 2 | outdir : QC + "/fastqscreen", 3 | threads: Integer.toString(ESSENTIAL_THREADS), 4 | //the fastqscreen_conf defines your references, with these we will create a fastqscreen conf script and then run the fastqscreen 5 | //this could be e.g. 6 | conf : ESSENTIAL_FASTQSCREEN, 7 | //fastqscreen additional param e.g. subset or bowtie /bowtie 2 parameters 8 | extra : "--nohits --subset 100000" 9 | ] 10 | 11 | load PIPELINE_ROOT + "/modules/NGS/fastqscreen.groovy" 12 | 13 | -------------------------------------------------------------------------------- /modules/NGS/filterchromosomes.groovy: -------------------------------------------------------------------------------- 1 | FilterChr = { 2 | doc title: "FilterChr", 3 | desc: "When mapping to full genome, including unassembled contigs, remove those extra contiguous before proceeding for further analysis. The goal is to increase speed and decrease disk space usage. Source: https://www.biostars.org/p/171791/#171819", 4 | constraints: "Requires a file with the list of chromosomes to keep.", 5 | bpipe_version: "tested with bpipe 0.9.9.3.slurm", 6 | author: "António Domingues" 7 | 8 | output.dir=FilterChr_vars.outdir 9 | 10 | def TOOL_ENV = prepare_tool_env("samtools", tools["samtools"]["version"], tools["samtools"]["runenv"]) 11 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 12 | 13 | transform(".bam") to (".chrOnly.bam") { 14 | exec """ 15 | ${TOOL_ENV} && 16 | ${PREAMBLE} && 17 | 18 | chroms=\$(cut -f1 ${FilterChr_vars.file}) && 19 | samtools view -@ ${FilterChr_vars.threads} -b $input \$chroms > $output && 20 | samtools index $output 21 | ""","FilterChr" 22 | } 23 | } 24 | 25 | -------------------------------------------------------------------------------- /modules/NGS/filterchromosomes.header: -------------------------------------------------------------------------------- 1 | FilterChr_vars=[ 2 | outdir : MAPPED, 3 | threads: Integer.toString(ESSENTIAL_THREADS), 4 | file : ESSENTIAL_PROJECT + "/chromosomes2keep.txt" 5 | ] 6 | 7 | load PIPELINE_ROOT + "/modules/NGS/filterchromosomes.groovy" 8 | 9 | -------------------------------------------------------------------------------- /modules/NGS/insertsize.groovy: -------------------------------------------------------------------------------- 1 | InsertSize = { 2 | doc title: "InsertSize", 3 | desc: "Call picard tools create insert size values", 4 | constraints: "", 5 | bpipe_version: "tested with bpipe 0.9.8.7", 6 | author: "Nastasja Kreim" 7 | 8 | var subdir : "" 9 | output.dir = InsertSize_vars.outdir + "/$subdir" 10 | 11 | def INSERTSIZE_FLAGS = 12 | (InsertSize_vars.extra ? " " + InsertSize_vars.extra : "") 13 | 14 | def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"]) + " && " + 15 | prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && "+ 16 | prepare_tool_env("picard", tools["picard"]["version"], tools["picard"]["runenv"]) 17 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 18 | 19 | transform(".bam") to ("_insertsizemetrics.tsv") { 20 | exec """ 21 | ${TOOL_ENV} && 22 | ${PREAMBLE} && 23 | 24 | java ${InsertSize_vars.java_flags} -jar \${PICARD} CollectInsertSizeMetrics $INSERTSIZE_FLAGS INPUT=$input OUTPUT=$output HISTOGRAM_FILE=${output.prefix}_hist.pdf 25 | ""","InsertSize" 26 | } 27 | } 28 | 29 | -------------------------------------------------------------------------------- /modules/NGS/insertsize.header: -------------------------------------------------------------------------------- 1 | InsertSize_vars=[ 2 | outdir : QC + "/insertsize", //location of the OUTPUT Dir 3 | java_flags: "-Xmx5000m", //set the java heap size 4 | extra : "ASSUME_SORTED=true VALIDATION_STRINGENCY=LENIENT" //sometimes the sorted flag is not set and we should not care if we have reads which overhang chromosomes 5 | ] 6 | 7 | load PIPELINE_ROOT + "/modules/NGS/insertsize.groovy" 8 | 9 | -------------------------------------------------------------------------------- /modules/NGS/markdups.groovy: -------------------------------------------------------------------------------- 1 | MarkDups = { 2 | doc title: "MarkDups", 3 | desc: "Call picard tools to mark with/without removing duplicated reads from a bam file", 4 | constraints: "Picard tools version >= 1.141. Expects an env var called `picard` with the path to picard's jar", 5 | bpipe_version: "tested with bpipe 0.9.8.7", 6 | author: "Sergi Sayols" 7 | 8 | output.dir = MarkDups_vars.outdir 9 | def MARKDUPS_FLAGS = 10 | " REMOVE_DUPLICATES=" + (MarkDups_vars.remove_dups ? "TRUE" : "FALSE") + 11 | " ASSUME_SORTED=" + (MarkDups_vars.assume_sorted ? "TRUE" : "FALSE") + 12 | (MarkDups_vars.extra ? " " + MarkDups_vars.extra : "" ) 13 | 14 | def TOOL_ENV = prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && " + 15 | prepare_tool_env("picard", tools["picard"]["version"], tools["picard"]["runenv"]) 16 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 17 | 18 | transform(".bam") to (".dupmarked.bam") { 19 | exec """ 20 | ${TOOL_ENV} && 21 | ${PREAMBLE} && 22 | 23 | java ${MarkDups_vars.java_flags} -jar \${PICARD} MarkDuplicates $MARKDUPS_FLAGS INPUT=$input OUTPUT=$output METRICS_FILE=${input.prefix}_dupmarked_dupmetrics.tsv 24 | ""","MarkDups" 25 | } 26 | } 27 | 28 | -------------------------------------------------------------------------------- /modules/NGS/markdups.header: -------------------------------------------------------------------------------- 1 | MarkDups_vars=[ 2 | outdir : MAPPED, 3 | java_flags : "-Xmx5000m", 4 | remove_dups : false, 5 | assume_sorted: true, 6 | extra : "" 7 | ] 8 | 9 | load PIPELINE_ROOT + "/modules/NGS/markdups.groovy" 10 | 11 | // expected parameter types 12 | class MarkDups_vars_schema { 13 | String outdir 14 | String java_flags 15 | Boolean remove_dups 16 | Boolean assume_sorted 17 | String extra 18 | 19 | // check for the presence of mandatory params 20 | boolean asBoolean() { 21 | outdir 22 | } 23 | } 24 | 25 | validate_schema(MarkDups_vars_schema, MarkDups_vars) 26 | -------------------------------------------------------------------------------- /modules/NGS/markdups2.groovy: -------------------------------------------------------------------------------- 1 | MarkDups2 = { 2 | doc title: "MarkDups2", 3 | desc: "Call bamUtil dedup tool to mark with/without removing duplicated reads from a bam file", 4 | constraints: "bamUtil tool version >= 1.0.13", 5 | bpipe_version: "tested with bpipe 0.9.9.3", 6 | author: "Giuseppe Petrosino" 7 | 8 | output.dir=MarkDups2_vars.outdir 9 | 10 | def TOOL_ENV = prepare_tool_env("bamutil", tools["bamutil"]["version"], tools["bamutil"]["runenv"]) 11 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 12 | 13 | transform(".bam") to (".dupmarked.bam") { 14 | exec """ 15 | ${TOOL_ENV} && 16 | ${PREAMBLE} && 17 | 18 | bam dedup --in $input --out $output --log ${input.prefix}_dupmetrics.log --noPhoneHome 19 | ""","MarkDups2" 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /modules/NGS/markdups2.header: -------------------------------------------------------------------------------- 1 | MarkDups2_vars=[ 2 | outdir: MAPPED 3 | ] 4 | 5 | load PIPELINE_ROOT + "/modules/NGS/markdups2.groovy" 6 | 7 | -------------------------------------------------------------------------------- /modules/NGS/mergebam.groovy: -------------------------------------------------------------------------------- 1 | MergeBam = { 2 | doc title: "Merge bam files", 3 | desc: "Merges bam files following any given pipeline defined pattern", 4 | constraints: "Unless modified, the name for the merged replicates will be determined by removing the pattern _rep[1-9] from the name of the first input. Change the code bellow if the pattern of your samples is different", 5 | bpipe_version: "tested with bpipe 0.9.9.3", 6 | author: "Antonio Domingues" 7 | 8 | output.dir = MergeBam_vars.outdir 9 | def EXP = input1.split("/")[-1].replaceAll(".bam", "").replaceAll("_rep\\d+", "") 10 | 11 | def TOOL_ENV = prepare_tool_env("samtools", tools["samtools"]["version"], tools["samtools"]["runenv"]) 12 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 13 | 14 | // run the chunk 15 | produce(EXP + ".merged.bam") { 16 | exec """ 17 | ${TOOL_ENV} && 18 | ${PREAMBLE} && 19 | 20 | echo $inputs && 21 | samtools merge $output $inputs 22 | ""","MergeBam" 23 | } 24 | } 25 | 26 | -------------------------------------------------------------------------------- /modules/NGS/mergebam.header: -------------------------------------------------------------------------------- 1 | MergeBam_vars=[ 2 | outdir: MAPPED + "/merged" 3 | ] 4 | 5 | load PIPELINE_ROOT + "/modules/NGS/mergebam.groovy" 6 | 7 | -------------------------------------------------------------------------------- /modules/NGS/multiqc.groovy: -------------------------------------------------------------------------------- 1 | MultiQC = { 2 | doc title: "MultiQC", 3 | desc: "MultiQC is a reporting tool that parses summary statistics from results and log files generated by other bioinformatics tools", 4 | constraints: "", 5 | bpipe_version: "tested with bpipe 0.9.9.7", 6 | author: "Giuseppe Petrosino" 7 | 8 | output.dir = MultiQC_vars.outdir 9 | def MultiQC_FLAGS = 10 | (MultiQC_vars.extra ? " " + MultiQC_vars.extra : "") 11 | 12 | def TOOL_ENV = prepare_tool_env("multiqc", tools["multiqc"]["version"], tools["multiqc"]["runenv"]) 13 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 14 | 15 | produce("multiqc_report.html") { 16 | exec """ 17 | ${TOOL_ENV} && 18 | ${PREAMBLE} && 19 | 20 | multiqc $ESSENTIAL_PROJECT $MultiQC_FLAGS -o $output.dir 21 | ""","MULTIQC" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /modules/NGS/multiqc.header: -------------------------------------------------------------------------------- 1 | MultiQC_vars=[ 2 | outdir: QC + "/multiqc", 3 | extra : "--ignore .bpipe/" 4 | ] 5 | 6 | load PIPELINE_ROOT + "/modules/NGS/multiqc.groovy" 7 | 8 | -------------------------------------------------------------------------------- /modules/NGS/rmdups.groovy: -------------------------------------------------------------------------------- 1 | RmDups = { 2 | doc title: "RmDups", 3 | desc: "Call picard tools to mark with/without removing duplicated reads from a bam file", 4 | constraints: "Picard tools version >= 1.141. Expects an env var called `picard` with the path to picard's jar", 5 | bpipe_version: "tested with bpipe 0.9.8.7", 6 | author: "Sergi Sayols" 7 | 8 | output.dir = RmDups_vars.outdir 9 | def RMDUPS_FLAGS = 10 | " REMOVE_DUPLICATES=" + (RmDups_vars.remove_dups ? "TRUE" : "FALSE") + 11 | " ASSUME_SORTED=" + (RmDups_vars.assume_sorted ? "TRUE" : "FALSE") + 12 | (RmDups_vars.extra ? " " + RmDups_vars.extra : "" ) 13 | 14 | def TOOL_ENV = prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && " + 15 | prepare_tool_env("picard", tools["picard"]["version"], tools["picard"]["runenv"]) 16 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 17 | 18 | transform(".bam") to (".duprm.bam") { 19 | exec """ 20 | ${TOOL_ENV} && 21 | ${PREAMBLE} && 22 | 23 | java ${RmDups_vars.java_flags} -jar \${PICARD} MarkDuplicates $RMDUPS_FLAGS INPUT=$input OUTPUT=$output METRICS_FILE=${input.prefix}_duprm_dupmetrics.tsv TMP_DIR=\${TMP} 24 | ""","RmDups" 25 | } 26 | } 27 | 28 | -------------------------------------------------------------------------------- /modules/NGS/rmdups.header: -------------------------------------------------------------------------------- 1 | RmDups_vars=[ 2 | outdir : MAPPED, 3 | java_flags : "-Xmx5000m", 4 | remove_dups : true, 5 | assume_sorted: true, 6 | extra : "" 7 | ] 8 | 9 | load PIPELINE_ROOT + "/modules/NGS/rmdups.groovy" 10 | 11 | // expected parameter types 12 | class RmDups_vars_schema { 13 | String outdir 14 | String java_flags 15 | Boolean remove_dups 16 | Boolean assume_sorted 17 | String extra 18 | 19 | // check for the presence of mandatory params 20 | boolean asBoolean() { 21 | outdir 22 | } 23 | } 24 | 25 | validate_schema(RmDups_vars_schema, RmDups_vars) 26 | -------------------------------------------------------------------------------- /modules/NGS/samtoolscov.groovy: -------------------------------------------------------------------------------- 1 | samtoolscov = { 2 | doc title: "samtoolscov", 3 | desc: "Call samtools to generate coverage statistics", 4 | constraints: "", 5 | bpipe_version: "tested with bpipe 0.9.9.8", 6 | author: "Frank Rühle" 7 | 8 | output.dir = samtoolscov_vars.outdir 9 | def SAMTOOLSCOV_FLAGS = 10 | (samtoolscov_vars.extra ? " " + samtoolscov_vars.extra : "") 11 | 12 | def TOOL_ENV = prepare_tool_env("samtools", tools["samtools"]["version"], tools["samtools"]["runenv"]) 13 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 14 | 15 | transform(".bam\$") to(".coverage.txt") { 16 | exec """ 17 | ${TOOL_ENV} && 18 | ${PREAMBLE} && 19 | 20 | samtools coverage $SAMTOOLSCOV_FLAGS -o $output $input 21 | ""","samtoolscov" 22 | } 23 | 24 | forward input 25 | } 26 | 27 | -------------------------------------------------------------------------------- /modules/NGS/samtoolscov.header: -------------------------------------------------------------------------------- 1 | samtoolscov_vars=[ 2 | outdir: QC + "/samtoolscov", 3 | extra : " " 4 | ] 5 | 6 | load PIPELINE_ROOT + "/modules/NGS/samtoolscov.groovy" 7 | 8 | -------------------------------------------------------------------------------- /modules/NGS/strandSpecificBW.header: -------------------------------------------------------------------------------- 1 | strandBigWig_vars=[ 2 | outdir : TRACKS + "/strandspecific", 3 | threads : Integer.toString(ESSENTIAL_THREADS), 4 | stranded : ESSENTIAL_STRANDED, 5 | binSize : "10", 6 | normalizeUsing : "CPM", 7 | skipNonCoveredRegions: true, 8 | outFileFormat : "bedgraph", 9 | extra : "" 10 | ] 11 | 12 | load PIPELINE_ROOT + "/modules/NGS/strandSpecificBW.groovy" 13 | 14 | // expected parameter types 15 | class strandBigWig_vars_schema { 16 | String outdir 17 | String threads 18 | String stranded 19 | String binSize 20 | String normalizeUsing 21 | Boolean skipNonCoveredRegions 22 | String outFileFormat 23 | String extra 24 | 25 | // check for the presence of mandatory params 26 | boolean asBoolean() { 27 | outdir && stranded 28 | } 29 | } 30 | 31 | validate_schema(strandBigWig_vars_schema, strandBigWig_vars) 32 | -------------------------------------------------------------------------------- /modules/NGS/trackhub.groovy: -------------------------------------------------------------------------------- 1 | trackhub = { 2 | doc title: "Trackhub", 3 | desc: "Generate UCSC track hub to display project tracks", 4 | constraints: "Uses configuration file, which should have been generated earlier", 5 | bpipe_version: "tested with bpipe 0.9.9.3", 6 | author: "Martin Oti" 7 | 8 | output.dir = trackhub_vars.tracksdir // location of "trackhub.done" file 9 | def TRACKHUB_FLAGS = 10 | (trackhub_vars.config ? "TRACKHUB_CONFIG=" + trackhub_vars.config : "") 11 | 12 | def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"]) + " && " + 13 | prepare_tool_env("kentutils", tools["kentutils"]["version"], tools["kentutils"]["runenv"]) 14 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 15 | 16 | transform(".yaml") to (".done") { 17 | exec """ 18 | ${TOOL_ENV} && 19 | ${PREAMBLE} && 20 | 21 | Rscript ${PIPELINE_ROOT}/tools/trackhub/Make_Trackhub.R $TRACKHUB_FLAGS 22 | ""","trackhub" 23 | } 24 | } 25 | 26 | -------------------------------------------------------------------------------- /modules/NGS/trackhub.header: -------------------------------------------------------------------------------- 1 | trackhub_vars=[ 2 | tracksdir : TRACKS, // folder with tracks & trackhub configuration file 3 | config: TRACKS + "/trackhub.yaml" // trackhub configuration file 4 | ] 5 | 6 | load PIPELINE_ROOT + "/modules/NGS/trackhub.groovy" 7 | 8 | -------------------------------------------------------------------------------- /modules/NGS/trackhub_config.header: -------------------------------------------------------------------------------- 1 | trackhub_config_vars=[ 2 | ftpurlbase: "https://hpc1.imb.uni-mainz.de/public",// public FTP base URL 3 | ftpbase : "/fsimb/services/ftp/public/", // public FTP root folder 4 | ucsccfg : "/fsimb/common/tools/ucsc/config/", // folder with UCSC tools configuration files (e.g. narrowPeak.as/broadPeak.as) 5 | targets : PROJECT + "/targets.txt", // targets file describing the samples for ChIP-seq 6 | peaksdir : RESULTS + "/macs2", // location of peak files for ChIP-seq (comment out if no peak files) 7 | tracksdir : TRACKS, // location of track files for putting into trackhub 8 | config : TRACKS + "/trackhub.yaml", // trackhub configuration file 9 | project : PROJECT, 10 | db : ESSENTIAL_DB, // UCSC genome assembly, e.g. "hg19 11 | chromsizes: ESSENTIAL_CHROMSIZES, // chromosome sizes file 12 | stranded : ESSENTIAL_STRANDED, // stranded sequencing or not, for strans-specific bigwig creation 13 | tracks : TRACKS // full path of project subdirectory containing tracks 14 | ] 15 | 16 | load PIPELINE_ROOT + "/modules/NGS/trackhub_config.groovy" 17 | 18 | -------------------------------------------------------------------------------- /modules/RNAseq/GO_Enrichment.groovy: -------------------------------------------------------------------------------- 1 | GO_Enrichment = { 2 | doc title: "GO_Enrichment", 3 | desc: "Gene Ontology enrichment analysis", 4 | constraints: "", 5 | bpipe_version: "", 6 | author: "" 7 | 8 | output.dir = GO_Enrichment_vars.outdir 9 | def GO_Enrichment_FLAGS = 10 | (GO_Enrichment_vars.log2fold ? " log2Fold=" + GO_Enrichment_vars.log2fold : "" ) + 11 | (GO_Enrichment_vars.padj ? " padj=" + GO_Enrichment_vars.padj : "" ) + 12 | (GO_Enrichment_vars.org ? " organism=" + GO_Enrichment_vars.org : "" ) + 13 | (GO_Enrichment_vars.univ ? " univ=" + GO_Enrichment_vars.univ : "" ) + 14 | (GO_Enrichment_vars.type ? " type=" + GO_Enrichment_vars.type : "" ) + 15 | (GO_Enrichment_vars.category ? " plotCategory=" + GO_Enrichment_vars.category : "" ) + 16 | (GO_Enrichment_vars.outdir ? " out=" + GO_Enrichment_vars.outdir : "" ) + 17 | (GO_Enrichment_vars.cores ? " cores=" + GO_Enrichment_vars.cores : "" ) + 18 | (GO_Enrichment_vars.extra ? " " + GO_Enrichment_vars.extra : "" ) 19 | 20 | def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"]) 21 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 22 | 23 | transform(".RData") to("_GO.done") { 24 | exec """ 25 | ${TOOL_ENV} && 26 | ${PREAMBLE} && 27 | 28 | touch $output && 29 | Rscript ${PIPELINE_ROOT}/tools/GO_Enrichment/GO_Enrichment.R rData=$input $GO_Enrichment_FLAGS && 30 | if [ \$? -ne 0 ]; then rm $output; fi; 31 | ""","GO_Enrichment" 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /modules/RNAseq/GO_Enrichment.header: -------------------------------------------------------------------------------- 1 | GO_Enrichment_vars=[ 2 | rdata : DE_DESeq2_vars.outdir, 3 | log2fold: Double.toString(Math.log(ESSENTIAL_DESEQ2_FC)/Math.log(2)), 4 | padj : ESSENTIAL_DESEQ2_FDR, 5 | org : ESSENTIAL_ORG, 6 | univ : "expressed", 7 | type : "gene_name", 8 | category: "20", 9 | outdir : RESULTS + "/GO_Analysis", 10 | cores : Integer.toString(ESSENTIAL_THREADS), 11 | extra : "" 12 | ] 13 | 14 | load PIPELINE_ROOT + "/modules/RNAseq/GO_Enrichment.groovy" 15 | 16 | // expected parameter types 17 | class GO_Enrichment_vars_schema { 18 | String rdata 19 | String log2fold 20 | BigDecimal padj 21 | String org 22 | String univ 23 | String type 24 | String category 25 | String outdir 26 | String cores 27 | String extra 28 | 29 | // check for the presence of mandatory params 30 | boolean asBoolean() { 31 | outdir && padj >= 0 && padj <= 1 32 | } 33 | } 34 | 35 | validate_schema(GO_Enrichment_vars_schema, GO_Enrichment_vars) 36 | -------------------------------------------------------------------------------- /modules/RNAseq/dupradar.header: -------------------------------------------------------------------------------- 1 | dupRadar_vars=[ 2 | outdir : QC + "/dupRadar", //output dir. If you change it here, change it in the module file also 3 | stranded: ESSENTIAL_STRANDED, // strandness 4 | paired : ESSENTIAL_PAIRED, // is a paired end experiment 5 | threads : Integer.toString(ESSENTIAL_THREADS), // number of threads to be used 6 | gtf : ESSENTIAL_GENESGTF, // gene model 7 | extra : "", // extra parms sent to the tool 8 | ] 9 | 10 | load PIPELINE_ROOT + "/modules/RNAseq/dupradar.groovy" 11 | 12 | -------------------------------------------------------------------------------- /modules/RNAseq/filter2htseq.groovy: -------------------------------------------------------------------------------- 1 | filter2htseq = { 2 | doc title: "filter_featureCounts_to_htseq", 3 | desc: "filter featureCount output to fit HTSeq format, extract column 1 and 7 as well as skipping the header", 4 | constraints: "none.", 5 | bpipe_version: "tested with bpipe 0.9.8.7", 6 | author: "Oliver Drechsel" 7 | 8 | output.dir = filter2htseq_vars.outdir 9 | 10 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 11 | 12 | transform(".raw_readcounts.tsv") to (".readcounts.tsv") { 13 | exec """ 14 | ${PREAMBLE} && 15 | 16 | tail -n +3 $input | awk '{print \$1\"\\t\"\$7}' > $output 17 | """ 18 | } 19 | } 20 | 21 | -------------------------------------------------------------------------------- /modules/RNAseq/filter2htseq.header: -------------------------------------------------------------------------------- 1 | filter2htseq_vars=[ 2 | outdir: subread_count_vars.outdir 3 | ] 4 | 5 | load PIPELINE_ROOT + "/modules/RNAseq/filter2htseq.groovy" 6 | 7 | -------------------------------------------------------------------------------- /modules/RNAseq/genebodycov.groovy: -------------------------------------------------------------------------------- 1 | geneBodyCov = { 2 | doc title: "geneBodyCoverage", 3 | desc: "Calculate the RNA-seq coverage over gene body. Useful to check the 5' or 3' coverage bias", 4 | constraints: "", 5 | bpipe_version: "tested with bpipe 0.9.8.7", 6 | author: "Sergi Sayols" 7 | 8 | output.dir = geneBodyCov_vars.outdir 9 | def GENEBODYCOV_FLAGS = 10 | (geneBodyCov_vars.format ? " -f " + geneBodyCov_vars.format : "" ) + 11 | (geneBodyCov_vars.bed ? " -r " + geneBodyCov_vars.bed : "" ) + 12 | (geneBodyCov_vars.extra ? " " + geneBodyCov_vars.extra : "" ) 13 | 14 | def TOOL_ENV = prepare_tool_env("rseqc", tools["rseqc"]["version"], tools["rseqc"]["runenv"]) 15 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 16 | 17 | // run the chunk 18 | transform(".bam") to (".geneBodyCoverage.curves.png", ".geneBodyCoverage.r", ".geneBodyCoverage.txt") { 19 | exec """ 20 | ${TOOL_ENV} && 21 | ${PREAMBLE} && 22 | 23 | geneBody_coverage.py -i $input -o ${output3.prefix.prefix} $GENEBODYCOV_FLAGS 24 | ""","geneBodyCov" 25 | } 26 | forward input 27 | } 28 | -------------------------------------------------------------------------------- /modules/RNAseq/genebodycov.header: -------------------------------------------------------------------------------- 1 | geneBodyCov_vars=[ 2 | outdir: QC + "/geneBodyCov", 3 | bed : ESSENTIAL_GENESBED, // gene model 4 | format: "png", // image format 5 | extra : "" // extra parms to sent to the tool 6 | ] 7 | 8 | load PIPELINE_ROOT + "/modules/RNAseq/genebodycov.groovy" 9 | 10 | -------------------------------------------------------------------------------- /modules/RNAseq/genebodycov2.groovy: -------------------------------------------------------------------------------- 1 | geneBodyCov2 = { 2 | doc title: "geneBodyCoverage2", 3 | desc: """Calculate the RNA-seq coverage over gene body. 4 | Useful to check the 5' or 3' coverage bias""", 5 | constraints: "", 6 | bpipe_version: "tested with bpipe 0.9.9.9", 7 | author: "Sergi Sayols" 8 | 9 | output.dir = geneBodyCov2_vars.outdir 10 | def GENEBODYCOV2_FLAGS = 11 | (geneBodyCov2_vars.gtf ? " gtf=" + geneBodyCov2_vars.gtf : "" ) + 12 | (geneBodyCov2_vars.paired ? " paired=" + geneBodyCov2_vars.paired : "" ) + 13 | (geneBodyCov2_vars.stranded ? " stranded=" + geneBodyCov2_vars.stranded : "" ) + 14 | (geneBodyCov2_vars.outdir ? " outdir=" + geneBodyCov2_vars.outdir : "" ) + 15 | (geneBodyCov2_vars.threads ? " threads=" + geneBodyCov2_vars.threads : "" ) 16 | 17 | def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"]) 18 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 19 | 20 | // run the chunk 21 | transform(".bam") to ("_geneBodyCov.png") { 22 | exec """ 23 | ${TOOL_ENV} && 24 | ${PREAMBLE} && 25 | 26 | if [[ ! -e "$output.dir" ]]; then 27 | mkdir -p "$output.dir"; 28 | fi && 29 | 30 | Rscript ${PIPELINE_ROOT}/tools/geneBodyCov/geneBodyCov.R bam=$input $GENEBODYCOV2_FLAGS 31 | ""","geneBodyCov2" 32 | } 33 | forward input 34 | } 35 | -------------------------------------------------------------------------------- /modules/RNAseq/genebodycov2.header: -------------------------------------------------------------------------------- 1 | geneBodyCov2_vars=[ 2 | gtf : ESSENTIAL_GENESGTF, // the gencode annotation GTF (can be compressed) 3 | paired : ESSENTIAL_PAIRED, // paired end yes|no 4 | stranded: ESSENTIAL_STRANDED, // strandness yes|no|reverse 5 | outdir : QC + "/geneBodyCov", 6 | threads : Integer.toString(ESSENTIAL_THREADS) // number of cores to use 7 | ] 8 | 9 | load PIPELINE_ROOT + "/modules/RNAseq/genebodycov2.groovy" 10 | 11 | -------------------------------------------------------------------------------- /modules/RNAseq/inferexperiment.groovy: -------------------------------------------------------------------------------- 1 | inferexperiment = { 2 | doc title: "inferexperiment", 3 | desc: "Calculate the strand-specificity of the library", 4 | constraints: "", 5 | bpipe_version: "tested with bpipe 0.9.8.7", 6 | author: "Nastasja Kreim" 7 | 8 | output.dir = inferexperiment_vars.outdir 9 | 10 | def INFEREXPERIMENT_FLAGS = 11 | (inferexperiment_vars.bed ? " -r " + inferexperiment_vars.bed : "" ) + 12 | (inferexperiment_vars.extra ? " " + inferexperiment_vars.extra : "" ) 13 | 14 | def TOOL_ENV = prepare_tool_env("rseqc", tools["rseqc"]["version"], tools["rseqc"]["runenv"]) 15 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 16 | 17 | // run the chunk 18 | transform(".bam") to (input.prefix + "_inferexperiment.txt") { 19 | exec """ 20 | ${TOOL_ENV} && 21 | ${PREAMBLE} && 22 | 23 | infer_experiment.py -i $input $INFEREXPERIMENT_FLAGS > $output 24 | ""","inferexperiment" 25 | } 26 | 27 | forward input 28 | } 29 | 30 | -------------------------------------------------------------------------------- /modules/RNAseq/inferexperiment.header: -------------------------------------------------------------------------------- 1 | inferexperiment_vars=[ 2 | outdir: QC + "/inferexperiment", 3 | bed : ESSENTIAL_GENESBED, //this variable is essential for the module to run do not set it to the empty string! 4 | extra : "-s 4000000" //add other options here in this case it is the samples size (how many reads should be samples from the bam file) 5 | ] 6 | 7 | load PIPELINE_ROOT + "/modules/RNAseq/inferexperiment.groovy" 8 | 9 | -------------------------------------------------------------------------------- /modules/RNAseq/prermats.header: -------------------------------------------------------------------------------- 1 | PRERMATS_vars=[ 2 | outdir : RESULTS + "/PRERMATS", 3 | targets : "targets.txt", 4 | contrasts: "contrasts.txt", 5 | suffix : "_targets_rMATS.txt" 6 | ] 7 | 8 | load PIPELINE_ROOT + "/modules/RNAseq/prermats.groovy" 9 | 10 | -------------------------------------------------------------------------------- /modules/RNAseq/qualimap.groovy: -------------------------------------------------------------------------------- 1 | qualimap = { 2 | doc title: "Qualimap", 3 | desc: "Call qualimap to do rnaseq qualitycontrol", 4 | author: "Nastasja Kreim" 5 | 6 | output.dir = qualimap_vars.outdir 7 | // no|yes|reverse 8 | if(qualimap_vars.stranded == "no") { 9 | qualimap_vars.protocol = "non-strand-specific" 10 | } 11 | else if (qualimap_vars.stranded == "yes") { 12 | qualimap_vars.protocol = "strand-specific-forward" 13 | } 14 | else { 15 | qualimap_vars.protocol = "strand-specific-reverse" 16 | } 17 | if(qualimap_vars.paired){ 18 | qualimap_vars.extra = qualimap_vars.extra + " -pe" 19 | } 20 | 21 | def TOOL_ENV = prepare_tool_env("qualimap", tools["qualimap"]["version"], tools["qualimap"]["runenv"]) 22 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 23 | 24 | transform(".bam") to("_counts.txt") { 25 | exec """ 26 | ${TOOL_ENV} && 27 | ${PREAMBLE} && 28 | 29 | unset DISPLAY; 30 | echo $output.prefix; 31 | qualimap rnaseq -bam $input -outdir ${output.prefix}_qualimap -outformat html -gtf ${qualimap_vars.genesgtf} -oc $output -p ${qualimap_vars.protocol} ${qualimap_vars.extra} 32 | ""","qualimap" 33 | } 34 | 35 | forward input 36 | } 37 | 38 | -------------------------------------------------------------------------------- /modules/RNAseq/qualimap.header: -------------------------------------------------------------------------------- 1 | qualimap_vars=[ 2 | outdir : QC + "/qualimap", 3 | stranded: ESSENTIAL_STRANDED, // options are no/yes/reverse 4 | genesgtf: ESSENTIAL_GENESGTF, 5 | paired : (ESSENTIAL_PAIRED == "yes"), 6 | extra : "--java-mem-size=10G" 7 | ] 8 | 9 | load PIPELINE_ROOT + "/modules/RNAseq/qualimap.groovy" 10 | 11 | -------------------------------------------------------------------------------- /modules/RNAseq/rmats.header: -------------------------------------------------------------------------------- 1 | rMATS_vars=[ 2 | outdir : RESULTS + "/rMATS", 3 | suffix : PRERMATS_vars.suffix, 4 | sep : "-", 5 | paired : (ESSENTIAL_PAIRED == "yes"), 6 | stranded : ESSENTIAL_STRANDED, 7 | gtf : ESSENTIAL_GENESGTF, 8 | length : ESSENTIAL_READLENGTH, 9 | threads : Integer.toString(ESSENTIAL_THREADS), 10 | varreadlen: true, // allow reads with lengths that differ from ESSENTIAL_READLENGTH 11 | allowclip : true, // allow alignments with soft or hard clipping to be used 12 | novelss : true, // enable detection of novel (unannotated) splice sites 13 | extra : " --cstat 0.0001" // set e.g. --paired-stats if samples are paired and a paired stats model should be used 14 | ] 15 | 16 | maser_vars=[ 17 | gtf : ESSENTIAL_GENESGTF, // Needs to be an ensembl annotation for the plots to work correctly 18 | db : ESSENTIAL_DB, 19 | ftype : "JCEC", // tells which type of splicing events to consider: juncton counts (JC) or junction-exon counts (JCEC) 20 | mincov : "5", // ignore splicing events with read coverage below this count 21 | fdr : "0.01", // FDR cut-off to select statistically significant splicing events identified by rMATS 22 | dpsi : "0.1" // minimum percentage spliced in (PSI) to include in plots 23 | ] 24 | 25 | load PIPELINE_ROOT + "/modules/RNAseq/rmats.groovy" 26 | 27 | -------------------------------------------------------------------------------- /modules/RNAseq/rnaseqc.groovy: -------------------------------------------------------------------------------- 1 | rnaseqc = { 2 | doc title: "RNA-SeQC: Basic quality control for RNA-seq", 3 | desc: "efficient RNA-seq quality control and quantification for large cohorts", 4 | constraints: "", 5 | author: "Sivarajan Karunanithi" 6 | 7 | output.dir = rnaseqc_vars.outdir 8 | def RNASEQC_FLAGS = 9 | (rnaseqc_vars.legacy ? " --legacy" : "") + 10 | (rnaseqc_vars.extra ? " " + rnaseqc_vars.extra : "") 11 | 12 | def TOOL_ENV = prepare_tool_env("rnaseqc", tools["rnaseqc"]["version"], tools["rnaseqc"]["runenv"]) 13 | def PREAMBLE = get_preamble(stage: stageName, outdir: output.dir, input: new File(input1.prefix).getName()) 14 | 15 | //run the chunk 16 | transform(".bam") to (".bam.gene_reads.gct") { 17 | exec """ 18 | ${TOOL_ENV} && 19 | ${PREAMBLE} && 20 | 21 | rnaseqc ${rnaseqc_vars.gtf} $input $output.dir ${RNASEQC_FLAGS}; 22 | ""","rnaseqc" 23 | 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /modules/RNAseq/rnaseqc.header: -------------------------------------------------------------------------------- 1 | rnaseqc_vars=[ 2 | outdir: RESULTS + "/RNA-SeQC", 3 | legacy: "--legacy", 4 | gtf : ESSENTIAL_GENESGTF, 5 | extra : "-u -v" 6 | ] 7 | 8 | load PIPELINE_ROOT + "modules/RNAseq/rnaseqc.groovy" 9 | -------------------------------------------------------------------------------- /modules/RNAseq/starfusion.groovy: -------------------------------------------------------------------------------- 1 | STAR_Fusion = { 2 | doc title: "STAR-Fusion", 3 | desc: "detection of fusion transcripts from RNA-Seq data", 4 | constraints: "tab-delimited summary file identifying the fusion pairs. Works only with PE data", 5 | bpipe_version: "tested with bpipe 0.9.9", 6 | author: "Giuseppe Petrosino" 7 | 8 | output.dir = STAR_Fusion_vars.outdir 9 | 10 | def File f = new File(input1) 11 | def OUTPUTFILE = (f.getName() =~ /.R1.fastq.gz/).replaceFirst("") 12 | 13 | def STARFUSION_FLAGS = 14 | (STAR_Fusion_vars.threads ? " --CPU " + STAR_Fusion_vars.threads : "") + 15 | (STAR_Fusion_vars.genome_lib ? " --genome_lib_dir " + STAR_Fusion_vars.genome_lib : "") + 16 | (STAR_Fusion_vars.extra ? " " + STAR_Fusion_vars.extra : "") 17 | 18 | def TOOL_ENV = prepare_tool_env("starfusion", tools["starfusion"]["version"], tools["starfusion"]["runenv"]) 19 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 20 | 21 | produce(OUTPUTFILE + "_starfusion.done") { // change it to whatever STAR-Fusion produces, and remove the touch $output, it's useless! 22 | exec """ 23 | ${TOOL_ENV} && 24 | ${PREAMBLE} && 25 | 26 | STAR-Fusion $STARFUSION_FLAGS --tmpdir \${TMP}/\$(basename $output.prefix) --left_fq $input1 --right_fq $input2 --output_dir $output.prefix && 27 | touch $output 28 | ""","STAR_Fusion" 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /modules/RNAseq/starfusion.header: -------------------------------------------------------------------------------- 1 | STAR_Fusion_vars=[ 2 | outdir : FUSION, 3 | threads : "--CPU " + Integer.toString(ESSENTIAL_THREADS), 4 | genome_lib : "--genome_lib_dir " + ESSENTIAL_STARFUSION_LIB, 5 | extra : "" 6 | ] 7 | 8 | load PIPELINE_ROOT + "/modules/RNAseq/starfusion.groovy" 9 | 10 | -------------------------------------------------------------------------------- /modules/RNAseq/stringtie.header: -------------------------------------------------------------------------------- 1 | StringTie_vars=[ 2 | outdir : RESULTS + "/stringtie", 3 | gtf : ESSENTIAL_GENESGTF, 4 | stranded: ESSENTIAL_STRANDED, 5 | threads : Integer.toString(ESSENTIAL_THREADS), 6 | extra : "-f 0.1 -B -e" 7 | ] 8 | 9 | load PIPELINE_ROOT + "/modules/RNAseq/stringtie.groovy" 10 | 11 | -------------------------------------------------------------------------------- /modules/RNAseq/subread.header: -------------------------------------------------------------------------------- 1 | subread_count_vars=[ 2 | outdir : RESULTS + "/subread-count", 3 | stranded: ESSENTIAL_STRANDED, //whether the data is from a strand-specific assay (illumina SR: always reverse) 4 | paired : (ESSENTIAL_PAIRED == "yes"), //paired end design 5 | feature : "exon", //annotation feature to count mapped reads ("exon" by default) 6 | genesgtf: ESSENTIAL_GENESGTF, 7 | threads : Integer.toString(ESSENTIAL_THREADS), 8 | extra : "" //extra parms to sent to the tool 9 | ] 10 | 11 | load PIPELINE_ROOT + "/modules/RNAseq/subread.groovy" 12 | 13 | -------------------------------------------------------------------------------- /modules/RNAseq/subread2rnatypes.header: -------------------------------------------------------------------------------- 1 | subread2rnatypes_vars=[ 2 | outdir : QC + "/RNAtypes", 3 | stranded : ESSENTIAL_STRANDED, //whether the data is from a strand-specific assay (illumina SR: always reverse) 4 | paired : (ESSENTIAL_PAIRED == "yes"), //paired end design 5 | genesgtf : ESSENTIAL_GENESGTF, 6 | feature : "exon", // type of feature that is to be counted in 7 | accumulate: ESSENTIAL_FEATURETYPE, // type of annotation counts should be accumulated on. Usually that would be gene_id, but in this case we choose gene_biotype 8 | threads : Integer.toString(ESSENTIAL_THREADS), 9 | extra : "" // extra parms to sent to the tool 10 | ] 11 | 12 | load PIPELINE_ROOT + "/modules/RNAseq/subread2rnatypes.groovy" 13 | 14 | -------------------------------------------------------------------------------- /modules/RNAseq/tpm.groovy: -------------------------------------------------------------------------------- 1 | tpm = { 2 | doc title: "tpm", 3 | desc: "Calculation TPMs based on raw counts", 4 | constraints: "", 5 | bpipe_version: "", 6 | author: "Anke Busch" 7 | 8 | output.dir = tpm_vars.outdir 9 | def TPM_FLAGS = 10 | (tpm_vars.genesgtf ? " -g " + tpm_vars.genesgtf : "") + 11 | (tpm_vars.feature ? " -f " + tpm_vars.feature : "") + 12 | (tpm_vars.extra ? " " + tpm_vars.extra : "") 13 | 14 | def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"]) 15 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 16 | 17 | // run the chunk 18 | transform(".readcounts.tsv") to (".tpm.tsv") { 19 | exec """ 20 | ${TOOL_ENV} && 21 | ${PREAMBLE} && 22 | 23 | Rscript ${PIPELINE_ROOT}/tools/TPMs/TPMs.R -c $input -o $output $TPM_FLAGS 24 | 25 | ""","tpm" 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /modules/RNAseq/tpm.header: -------------------------------------------------------------------------------- 1 | tpm_vars=[ 2 | outdir : RESULTS + "/TPMs", 3 | genesgtf: ESSENTIAL_GENESGTF, 4 | feature : "exon", // considered features in genes 5 | extra : "" // extra parms to sent to the tool 6 | ] 7 | 8 | load PIPELINE_ROOT + "/modules/RNAseq/tpm.groovy" 9 | 10 | -------------------------------------------------------------------------------- /modules/RNAseqVariantCalling/add_read_group.groovy: -------------------------------------------------------------------------------- 1 | AddRG = { 2 | doc title: "AddReadGroup", 3 | desc: "Adds reads groups to bam as part of the GATK pipeline", 4 | constraints: "Picard tools version >= 1.141" 5 | author: "Antonio Domingues" 6 | 7 | output.dir = AddRG_vars.outdir 8 | 9 | File f = new File(input1) 10 | def EXP = (f.getName() =~ /.bam/).replaceFirst("") 11 | 12 | def TOOL_ENV = prepare_tool_env("picard", tools["picard"]["version"], tools["picard"]["runenv"]) 13 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 14 | 15 | transform(".bam") to (".rg.bam"){ 16 | exec """ 17 | ${TOOL_ENV} && 18 | ${PREAMBLE} && 19 | 20 | java $JAVA_FLAGS -jar \${PICARD} AddOrReplaceReadGroups I=$input O=$output SO=coordinate RGID=${EXP} RGLB=${EXP} RGPL=illumina RGPU=genomics RGSM=${EXP} 21 | ""","AddRG" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /modules/RNAseqVariantCalling/add_read_group.header: -------------------------------------------------------------------------------- 1 | AddRG_vars=[ 2 | outdir : STAR_pe_2nd_vars.outdir, 3 | java_flags: "-Xmx5000m" 4 | ] 5 | 6 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/add_read_group.groovy" 7 | 8 | -------------------------------------------------------------------------------- /modules/RNAseqVariantCalling/base_recalibration.groovy: -------------------------------------------------------------------------------- 1 | BaseRecalibration = { 2 | doc title: "GATK BaseRecalibrator", 3 | desc: "Recalibrate Base Qualities in BAM files, using GATK.", 4 | constraints: "GATK version >= 3.5", 5 | author: "Antonio Domingues" 6 | 7 | output.dir = BaseRecalibration_vars.outdir 8 | 9 | def BaseRecalibrator_FLAGS = 10 | (BaseRecalibration_vars.vcf_ref ? " -knownSites " + BaseRecalibration_vars.vcf_ref : "" ) + 11 | (BaseRecalibration_vars.threads ? " -nct " + BaseRecalibration_vars.threads : "" ) + 12 | (BaseRecalibration_vars.genome_ref ? " -R " + BaseRecalibration_vars.genome_ref : "" ) 13 | 14 | def PrintReads_FLAGS = 15 | (BaseRecalibration_vars.threads ? " -nct " + BaseRecalibration_vars.threads : "" ) + 16 | (BaseRecalibration_vars.genome_ref ? " -R " + BaseRecalibration_vars.genome_ref : "" ) 17 | 18 | def TOOL_ENV = prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && " + 19 | prepare_tool_env("gatk", tools["gatk"]["version"], tools["gatk"]["runenv"]) 20 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 21 | 22 | transform (".bam") to (".recalibration.table", ".recalibrated.bam"){ 23 | exec """ 24 | ${TOOL_ENV} && 25 | ${PREAMBLE} && 26 | 27 | java ${BaseRecalibration_vars.java_flags} -Djava.io.tmpdir=\${TMP} -jar \${gatk} -T BaseRecalibrator $BaseRecalibrator_FLAGS -I $input -o $output1 && 28 | java ${BaseRecalibration_vars.java_flags} -Djava.io.tmpdir=\${TMP} -jar \${gatk} -T PrintReads $PrintReads_FLAGS -I $input -BQSR $output1 -o $output2 29 | ""","BaseRecalibration" 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /modules/RNAseqVariantCalling/base_recalibration.header: -------------------------------------------------------------------------------- 1 | BaseRecalibration_vars=[ 2 | outdir : STAR_pe_2nd_vars.outdir, 3 | java_flags: "-Xmx2400m", 4 | threads : Integer.toString(ESSENTIAL_THREADS), 5 | genome_ref: ESSENTIAL_GENOME_REF, 6 | vcf_ref : ESSENTIAL_VCF_REF 7 | ] 8 | 9 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/base_recalibration.groovy" 10 | 11 | -------------------------------------------------------------------------------- /modules/RNAseqVariantCalling/create_star_index_sjdb.header: -------------------------------------------------------------------------------- 1 | GenerateStarIndexFromSJ_vars=[ 2 | outdir : MAPPED + "/sjdbStarIndex", 3 | sjdbfile : MAPPED + "/sjdbStarIndex/SJ.out.tab.Pass1.sjdb", 4 | outdir_2nd_index: FilterAndMergeSJtab_vars.outdir, 5 | threads : Integer.toString(ESSENTIAL_THREADS), 6 | genome_ref: ESSENTIAL_GENOME_REF, 7 | maxram : STAR_pe_vars.maxram, 8 | bufsize : STAR_pe_vars.bufsize, 9 | overhang : STAR_pe_vars.overhang, 10 | extra : "" 11 | ] 12 | 13 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/create_star_index_sjdb.groovy" 14 | 15 | -------------------------------------------------------------------------------- /modules/RNAseqVariantCalling/mark_dups.groovy: -------------------------------------------------------------------------------- 1 | MarkDups = { 2 | doc title: "MarkDups", 3 | desc: "Call picard tools to mark with/without removing duplicated reads from a bam file", 4 | constraints: "Picard tools version >= 1.141" 5 | author: "Sergi Sayols, modified by Antonio Domingues" 6 | 7 | output.dir = MarkDups_vars.outdir 8 | def MarkDups_FLAGS = 9 | " REMOVE_DUPLICATES=" + (MarkDups_vars.remove_dups ? "TRUE" : "FALSE") + 10 | " CREATE_INDEX=" + (MarkDups_vars.index ? "TRUE" : "FALSE") + 11 | " ASSUME_SORTED=" + (MarkDups_vars.assume_sorted ? "TRUE" : "FALSE") + 12 | (MarkDups_vars.validation ? " VALIDATION_STRINGENCY=" + MarkDups_vars.validation : "") + 13 | (MarkDups_vars.extra ? " " + MarkDups_vars.extra : "") 14 | 15 | def TOOL_ENV = prepare_tool_env("picard", tools["picard"]["version"], tools["picard"]["runenv"]) 16 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 17 | 18 | transform(".rg.bam") to (".rg.duprm.bam"){ 19 | exec """ 20 | ${TOOL_ENV} && 21 | ${PREAMBLE} && 22 | 23 | java ${MarkDups_vars.java_flags} -jar \${PICARD} MarkDuplicates $MarkDups_FLAGS INPUT=$input OUTPUT=$output METRICS_FILE=${input.prefix}_dupmetrics.tsv 24 | ""","MarkDups" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /modules/RNAseqVariantCalling/mark_dups.header: -------------------------------------------------------------------------------- 1 | MarkDups_vars=[ 2 | outdir : MAPPED, 3 | java_flags : "-Xmx5000m", 4 | remove_dups : false, 5 | index : true, 6 | assume_sorted: true, 7 | validation : "SILENT", 8 | extra : "" 9 | ] 10 | 11 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/mark_dups.groovy" 12 | 13 | // expected parameter types 14 | class MarkDups_vars_schema { 15 | String outdir 16 | String java_flags 17 | Boolean remove_dups 18 | Boolean index 19 | Boolean assume_sorted 20 | String validation 21 | String extra 22 | 23 | // check for the presence of mandatory params 24 | boolean asBoolean() { 25 | outdir 26 | } 27 | } 28 | 29 | validate_schema(MarkDups_vars_schema, MarkDups_vars) 30 | -------------------------------------------------------------------------------- /modules/RNAseqVariantCalling/merge_SJ_tab.groovy: -------------------------------------------------------------------------------- 1 | FilterAndMergeSJtab = { 2 | doc title: "FilterAndMergeSJtab", 3 | desc: "GATK variant calling suggests 2-step STAR mapping for RNA-seq. In this steps all splice junctions files are collected, filtered and merged. Based on https://code.google.com/p/rna-star/issues/detail?id=7", 4 | constraints: "STAR STAR_2.4.2a", 5 | author: "Antonio Domingues" 6 | 7 | output.dir = FilterAndMergeSJtab_vars.outdir 8 | 9 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 10 | 11 | produce("SJ.out.tab.Pass1.sjdb"){ 12 | exec """ 13 | ${PREAMBLE} && 14 | 15 | cat $inputs | awk 'BEGIN {OFS="\t"; strChar[0]="."; strChar[1]="+"; strChar[2]="-";} {if(\$5>0){print \$1,\$2,\$3,strChar[\$4]}}' > $output 16 | ""","FilterAndMergeSJtab" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /modules/RNAseqVariantCalling/merge_SJ_tab.header: -------------------------------------------------------------------------------- 1 | FilterAndMergeSJtab_vars=[ 2 | outdir: MAPPED + "/sjdbStarIndex" 3 | ] 4 | 5 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/merge_SJ_tab.groovy" 6 | 7 | -------------------------------------------------------------------------------- /modules/RNAseqVariantCalling/splitNcigar.groovy: -------------------------------------------------------------------------------- 1 | SplitNCigarReads = { 2 | doc title: "GATK SplitNCigarReads", 3 | desc: "Splits reads into exon segments (getting rid of Ns but maintaining grouping information) and hard-clip any sequences overhanging into the intronic regions", 4 | constraints: "GATK version >= 3.5", 5 | author: "Antonio Domingues" 6 | 7 | output.dir = SplitNCigarReads_vars.outdir 8 | 9 | def SplitNCigarReads_FLAGS = 10 | (SplitNCigarReads_vars.gatk_ref ? " -R " + SplitNCigarReads_vars.gatk_ref : "") + 11 | (SplitNCigarReads_vars.read_filter_flag ? " -rf " + SplitNCigarReads_vars.read_filter_flag : "") + 12 | (SplitNCigarReads_vars.map_q_from_flag ? " -RMQF " + SplitNCigarReads_vars.map_q_from_flag : "") + 13 | (SplitNCigarReads_vars.map_q_to_flag ? " -RMQT " + SplitNCigarReads_vars.map_q_to_flag : "") + 14 | (SplitNCigarReads_vars.unsafe_flag ? " -U " + SplitNCigarReads_vars.unsafe_flag : "") 15 | 16 | def TOOL_ENV = prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && " + 17 | prepare_tool_env("gatk", tools["gatk"]["version"], tools["gatk"]["runenv"]) 18 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 19 | 20 | transform (".duprm.bam") to (".duprm.split.bam"){ 21 | exec """ 22 | ${TOOL_ENV} && 23 | ${PREAMBLE} && 24 | 25 | java ${VariantCallHC_vars.java_flags} -Djava.io.tmpdir=\${TMP} -jar \${gatk} -T SplitNCigarReads $SplitNCigarReads_FLAGS -I $input -o $output 26 | ""","SplitNCigarReads" 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /modules/RNAseqVariantCalling/splitNcigar.header: -------------------------------------------------------------------------------- 1 | SplitNCigarReads_vars=[ 2 | outdir : STAR_pe_2nd_vars.outdir, 3 | gatk_ref : ESSENTIAL_GENOME_REF, 4 | gatk_threads : Integer.toString(ESSENTIAL_THREADS), 5 | java_flags : "2400m", 6 | read_filter_flag: "ReassignOneMappingQuality", 7 | map_q_from_flag : 255, 8 | map_q_to_flag : 60, 9 | unsafe_flag : "ALLOW_N_CIGAR_READS" 10 | ] 11 | 12 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/splitNcigar.groovy" 13 | 14 | // expected parameter types 15 | class SplitNCigarReads_vars_schema { 16 | String outdir 17 | String gatk_ref 18 | String gatk_threads 19 | String java_flags 20 | String read_filter_flag 21 | Integer map_q_from_flag 22 | Integer map_q_to_flag 23 | String unsafe_flag 24 | 25 | // check for the presence of mandatory params 26 | boolean asBoolean() { 27 | outdir && gatk_ref && map_q_from_flag >= 0 && map_q_to_flag >= 0 28 | } 29 | } 30 | 31 | validate_schema(SplitNCigarReads_vars_schema, SplitNCigarReads_vars) 32 | -------------------------------------------------------------------------------- /modules/RNAseqVariantCalling/star1pass.header: -------------------------------------------------------------------------------- 1 | STAR_pe_vars=[ 2 | outdir : MAPPED + "/1stPass", 3 | logdir : LOGS + "/STAR_1stPass", 4 | threads : Integer.toString(ESSENTIAL_THREADS), 5 | ref : ESSENTIAL_STAR_REF, 6 | maxram : "31000000000", // around 30Gb for mammals 7 | bufsize : "150000000", // buffer size 8 | mm : "2", // number of mismatches allowed 9 | multimap : "10", // max multimap positions per read 10 | minintro : "21", // minimum intron size 11 | overhang : Integer.toString(ESSENTIAL_READLENGTH - 1), 12 | extra : "" 13 | ] 14 | 15 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/star1pass.groovy" 16 | 17 | // expected parameter types 18 | class STAR_pe_vars_schema { 19 | String outdir 20 | String logdir 21 | String threads 22 | String ref 23 | String maxram 24 | String bufsize 25 | String mm 26 | String multimap 27 | String minintro 28 | String overhang 29 | String extra 30 | 31 | // check for the presence of mandatory params 32 | boolean asBoolean() { 33 | outdir && ref 34 | } 35 | } 36 | 37 | validate_schema(STAR_pe_vars_schema, STAR_pe_vars) 38 | -------------------------------------------------------------------------------- /modules/RNAseqVariantCalling/star2pass.header: -------------------------------------------------------------------------------- 1 | STAR_pe_2nd_vars=[ 2 | outdir : MAPPED + "/2ndPass", 3 | logdir : LOGS + "/STAR_2ndPass", 4 | threads : Integer.toString(ESSENTIAL_THREADS), 5 | ref : FilterAndMergeSJtab_vars.outdir, 6 | maxram : "31000000000", // around 30Gb for mammals 7 | bufsize : "150000000", // buffer size 8 | mm : "2", // number of mismatches allowed 9 | multimap : "10", // max multimap positions per read 10 | minintro : "21", // minimum intron size 11 | filter_sec: true, // filter out secondary alignments from the bam file? 12 | samtools_threads: Integer.toString(ESSENTIAL_THREADS), 13 | overhang : Integer.toString(ESSENTIAL_READLENGTH - 1), 14 | extra : "" 15 | ] 16 | 17 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/star2pass.groovy" 18 | 19 | // expected parameter types 20 | class STAR_pe_2nd_vars_schema { 21 | String outdir 22 | String logdir 23 | String threads 24 | String ref 25 | String maxram 26 | String bufsize 27 | String mm 28 | String multimap 29 | String minintro 30 | Boolean filter_sec 31 | String samtools_threads 32 | String overhang 33 | String extra 34 | 35 | // check for the presence of mandatory params 36 | boolean asBoolean() { 37 | outdir && ref 38 | } 39 | } 40 | 41 | validate_schema(STAR_pe_2nd_vars_schema, STAR_pe_2nd_vars) 42 | -------------------------------------------------------------------------------- /modules/RNAseqVariantCalling/variantCall_HC.groovy: -------------------------------------------------------------------------------- 1 | VariantCallHC = { 2 | doc title: "GATK HaplotypeCaller", 3 | desc: "Call variants, using GATK HaplotypeCaller.", 4 | constraints: "GATK version >= 3.5", 5 | author: "Antonio Domingues" 6 | 7 | output.dir = VariantCallHC_vars.outdir 8 | 9 | def HaplotypeCaller_FLAGS = 10 | " -dontUseSoftClippedBases" + 11 | (VariantCallHC_vars.threads ? " -nct " + VariantCallHC_vars.threads : "" ) + 12 | (VariantCallHC_vars.gatk_ref ? " -R " + VariantCallHC_vars.gatk_ref : "" ) + 13 | (VariantCallHC_vars.vcf_ref ? " --dbsnp " + VariantCallHC_vars.vcf_ref : "" ) + 14 | (VariantCallHC_vars.min_score_call ? " -stand_call_conf " + VariantCallHC_vars.min_score_call : "") + 15 | (VariantCallHC_vars.min_score_emit ? " -stand_emit_conf " + VariantCallHC_vars.min_score_emit : "") 16 | 17 | def TOOL_ENV = prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && " + 18 | prepare_tool_env("gatk", tools["gatk"]["version"], tools["gatk"]["runenv"]) 19 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 20 | 21 | transform (".rg.duprm.split.recalibrated.bam") to (".HC.vcf.gz") { 22 | exec """ 23 | ${TOOL_ENV} && 24 | ${PREAMBLE} && 25 | 26 | java ${VariantCallHC_vars.java_flags} -Djava.io.tmpdir=\${TMP} -jar \${gatk} -T HaplotypeCaller $HaplotypeCaller_FLAGS -I $input -o $output 27 | ""","VariantCallHC" 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /modules/RNAseqVariantCalling/variantCall_HC.header: -------------------------------------------------------------------------------- 1 | VariantCallHC_vars=[ 2 | outdir : RESULTS + "/HC", 3 | java_flags : "-Xmx2400m", 4 | gatk_ref : ESSENTIAL_GENOME_REF, 5 | vcf_ref : ESSENTIAL_VCF_REF, 6 | threads : Integer.toString(ESSENTIAL_THREADS), 7 | min_score_call: 20, 8 | min_score_emit: 20 9 | ] 10 | 11 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/variantCall_HC.groovy" 12 | 13 | // expected parameter types 14 | class VariantCallHC_vars_schema { 15 | String outdir 16 | String java_flags 17 | String gatk_ref 18 | String vcf_ref 19 | String threads 20 | Integer min_score_call 21 | Integer min_score_emit 22 | 23 | // check for the presence of mandatory params 24 | boolean asBoolean() { 25 | outdir && min_score_call >= 0 && min_score_emit >= 0 26 | } 27 | } 28 | 29 | validate_schema(VariantCallHC_vars_schema, VariantCallHC_vars) 30 | -------------------------------------------------------------------------------- /modules/RNAseqVariantCalling/variant_filtration.groovy: -------------------------------------------------------------------------------- 1 | VariantFiltration = { 2 | doc title: "GATK HaplotypeCaller", 3 | desc: "Filter variants following bast practices:http://gatkforums.broadinstitute.org/gatk/discussion/3891/calling-variants-in-rnaseq. Note that values are hardcoded.", 4 | constraints: "GATK version >= 3.5", 5 | author: "Antonio Domingues" 6 | 7 | output.dir = VariantFiltration_vars.outdir 8 | 9 | def VariantFiltration_FLAGS = 10 | " -window 35" + 11 | " -cluster 3" + 12 | " -filterName FS" + 13 | " -filter \"FS > 30.0\"" + 14 | " -filterName QD" + 15 | " -filter \"QD < 2.0\"" + 16 | (VariantFiltration_vars.ref ? " -R " + VariantFiltration_vars.ref : "") 17 | 18 | def TOOL_ENV = prepare_tool_env("java", tools["java"]["version"], tools["java"]["runenv"]) + " && " + 19 | prepare_tool_env("gatk", tools["gatk"]["version"], tools["gatk"]["runenv"]) 20 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 21 | 22 | transform (".vcf.gz") to (".filtered.vcf.gz") { 23 | exec """ 24 | ${TOOL_ENV} && 25 | ${PREAMBLE} && 26 | 27 | java ${VariantFiltration_vars.java_flags} -Djava.io.tmpdir=\${TMP} -jar \${gatk} -T VariantFiltration -V $input -o $output $VariantFiltration_FLAGS 28 | ""","VariantFiltration" 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /modules/RNAseqVariantCalling/variant_filtration.header: -------------------------------------------------------------------------------- 1 | VariantFiltration_vars=[ 2 | outdir : RESULTS + "/HC", 3 | java_flags: "-Xmx2400m", 4 | ref : ESSENTIAL_GENOME_REF 5 | ] 6 | 7 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/variant_filtration.groovy" 8 | 9 | -------------------------------------------------------------------------------- /modules/breaktag/bwa.groovy: -------------------------------------------------------------------------------- 1 | bwa = { 2 | doc title: "BWA SR/PE alignment", 3 | desc: "Align SR/PE reads with BWA", 4 | constraints: "none", 5 | author: "Sergi Sayols" 6 | 7 | output.dir = BWA_vars.outdir 8 | 9 | def File f = new File(input1) 10 | def OUTFILE = (f.getName() =~ /(.R1)*.filt.fastq.gz/).replaceFirst(".bam") 11 | 12 | def BWA_INPUT = (BWA_vars.paired ? "$input1 $input2" : "$input") 13 | def BWA_FLAGS = 14 | (BWA_vars.threads ? " -t " + BWA_vars.threads : "" ) + 15 | (BWA_vars.extra ? " " + BWA_vars.extra : "" ) 16 | 17 | def SAMTOOLS_VIEW_FLAGS = "-bhSu" + 18 | (BWA_vars.minqual ? " -q " + BWA_vars.minqual : "") 19 | def SAMTOOLS_SORT_FLAGS = 20 | (BWA_vars.samtools_threads ? " -@ " + BWA_vars.samtools_threads : "" ) 21 | 22 | def TOOL_ENV = prepare_tool_env("bwa", tools["bwa"]["version"], tools["bwa"]["runenv"]) + " && " + 23 | prepare_tool_env("samtools", tools["samtools"]["version"], tools["samtools"]["runenv"]) 24 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 25 | 26 | produce(OUTFILE) { 27 | exec """ 28 | ${TOOL_ENV} && 29 | ${PREAMBLE} && 30 | 31 | bwa mem $BWA_FLAGS $BWA_vars.ref $BWA_INPUT | \ 32 | samtools view $SAMTOOLS_VIEW_FLAGS - | \ 33 | samtools sort $SAMTOOLS_SORT_FLAGS -T \${TMP}/${OUTFILE}_sort - > ${output} && 34 | 35 | samtools index ${output} 36 | ""","BWA_pe" 37 | } 38 | } 39 | 40 | -------------------------------------------------------------------------------- /modules/breaktag/bwa.header: -------------------------------------------------------------------------------- 1 | BWA_vars=[ 2 | outdir : MAPPED, 3 | paired : RUN_IN_PAIRED_END_MODE, 4 | ref : ESSENTIAL_BWA_REF, 5 | threads : Integer.toString(ESSENTIAL_THREADS), 6 | minqual : Integer.toString(ESSENTIAL_QUALITY), 7 | samtools_threads: Integer.toString(ESSENTIAL_THREADS), 8 | extra : "-v 1" // output only errors to stderr 9 | ] 10 | 11 | load PIPELINE_ROOT + "/modules/breaktag/bwa.groovy" 12 | 13 | // expected parameter types 14 | class BWA_vars_schema { 15 | String outdir 16 | Boolean paired 17 | String ref 18 | String threads 19 | String minqual 20 | String samtools_threads 21 | String extra 22 | 23 | // check for the presence of mandatory params 24 | boolean asBoolean() { 25 | outdir && ref && minqual.toInteger() >= 0 26 | } 27 | } 28 | 29 | validate_schema(BWA_vars_schema, BWA_vars) 30 | -------------------------------------------------------------------------------- /modules/breaktag/collect_stats.groovy: -------------------------------------------------------------------------------- 1 | collect_stats = { 2 | doc title: "collect stats", 3 | desc: "collect breaktag DSB stats", 4 | constraints: "none", 5 | author: "Sergi Sayols" 6 | 7 | output.dir = collect_stats_vars.outdir 8 | 9 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 10 | 11 | transform(".strandless.bed.gz") to(".txt") { 12 | exec """ 13 | ${PREAMBLE} && 14 | zcat $input | awk '{i+=\$5} END {print "breaks: ", i; print "loci: ", NR;}' > $output 15 | """ 16 | } 17 | forward input 18 | } 19 | 20 | -------------------------------------------------------------------------------- /modules/breaktag/collect_stats.header: -------------------------------------------------------------------------------- 1 | collect_stats_vars=[ 2 | outdir: RESULTS + "/stats" 3 | ] 4 | 5 | load PIPELINE_ROOT + "/modules/breaktag/collect_stats.groovy" 6 | 7 | // expected parameter types 8 | class collect_stats_vars_schema { 9 | String outdir 10 | 11 | // check for the presence of mandatory params 12 | boolean asBoolean() { 13 | outdir 14 | } 15 | } 16 | 17 | validate_schema(collect_stats_vars_schema, collect_stats_vars) 18 | -------------------------------------------------------------------------------- /modules/breaktag/count_breaks.header: -------------------------------------------------------------------------------- 1 | count_breaks_vars=[ 2 | outdir : RESULTS + "/counts", 3 | paired : RUN_IN_PAIRED_END_MODE, 4 | threads : Integer.toString(ESSENTIAL_THREADS) 5 | ] 6 | 7 | load PIPELINE_ROOT + "/modules/breaktag/count_breaks.groovy" 8 | 9 | // expected parameter types 10 | class count_breaks_vars_schema { 11 | String outdir 12 | Boolean paired 13 | String threads 14 | 15 | // check for the presence of mandatory params 16 | boolean asBoolean() { 17 | outdir 18 | } 19 | } 20 | 21 | validate_schema(count_breaks_vars_schema, count_breaks_vars) 22 | -------------------------------------------------------------------------------- /modules/breaktag/count_breaks_strandless.groovy: -------------------------------------------------------------------------------- 1 | count_breaks_strandless = { 2 | doc title: "Count breaks per position", 3 | desc: "Count breaks regardless of the strand where the read points them to be", 4 | constraints: "Expect to have perl installed", 5 | author: "Sergi Sayols" 6 | 7 | output.dir = count_breaks_strandless_vars.outdir 8 | 9 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 10 | 11 | transform(".bed.gz") to(".strandless.bed.gz") { 12 | exec """ 13 | ${PREAMBLE} && 14 | 15 | zcat $input | \ 16 | perl -aln -e 'if(\$F[0]==\$F0[0] && \$F[1]==\$F0[1] && \$F[2]==\$F0[2]){ \$F0[4]+=\$F[4]; } else { \$F0[5]="*"; print join("\t", @F0); @F0=@F; } END{ \$F[5]="*"; print join("\t", @F) }' | \ 17 | tail -n +2 | \ 18 | gzip -c > $output 19 | ""","count_breaks_strandless" 20 | } 21 | } 22 | 23 | 24 | -------------------------------------------------------------------------------- /modules/breaktag/count_breaks_strandless.header: -------------------------------------------------------------------------------- 1 | count_breaks_strandless_vars=[ 2 | outdir: RESULTS + "/counts" 3 | ] 4 | 5 | load PIPELINE_ROOT + "/modules/breaktag/count_breaks_strandless.groovy" 6 | 7 | // expected parameter types 8 | class count_breaks_strandless_vars_schema { 9 | String outdir 10 | 11 | // check for the presence of mandatory params 12 | boolean asBoolean() { 13 | outdir 14 | } 15 | } 16 | 17 | validate_schema(count_breaks_strandless_vars_schema, count_breaks_strandless_vars) 18 | -------------------------------------------------------------------------------- /modules/breaktag/pattern_filtering.header: -------------------------------------------------------------------------------- 1 | pattern_filtering_vars=[ 2 | outdir : RAWDATA + "/filt", 3 | paired : RUN_IN_PAIRED_END_MODE, 4 | targets: TARGETS 5 | ] 6 | 7 | load PIPELINE_ROOT + "/modules/breaktag/pattern_filtering.groovy" 8 | 9 | // expected parameter types 10 | class pattern_filtering_vars_schema { 11 | String outdir 12 | Boolean paired 13 | String targets 14 | 15 | // check for the presence of mandatory params 16 | boolean asBoolean() { 17 | outdir && targets 18 | } 19 | } 20 | 21 | validate_schema(pattern_filtering_vars_schema, pattern_filtering_vars) 22 | -------------------------------------------------------------------------------- /modules/miscellaneous/collect_tool_versions.groovy: -------------------------------------------------------------------------------- 1 | collectToolVersions = { 2 | doc title: "collectToolVersions", 3 | desc: "so far, a dumb dump of the `tools` map", 4 | constraints: "needs the tool map defined in PIPELINE_ROOT/pipelines//tools.groovy", 5 | bpipe_version: "tested with bpipe 0.9.9.8", 6 | author: "Sergi Sayols" 7 | 8 | output.dir = collectToolVersions_vars.outdir 9 | 10 | produce("tool_versions.txt") { 11 | File f = new File(collectToolVersions_vars.outdir + "/tool_versions.txt") 12 | f.write "tool\tenv\tversion\n" 13 | tools.each { tool, x -> f << "$tool\t$x.runenv\t$x.version\n" } 14 | } 15 | } 16 | 17 | -------------------------------------------------------------------------------- /modules/miscellaneous/collect_tool_versions.header: -------------------------------------------------------------------------------- 1 | collectToolVersions_vars=[ 2 | outdir: LOGS 3 | ] 4 | 5 | load PIPELINE_ROOT + "/modules/miscellaneous/collect_tool_versions.groovy" 6 | 7 | -------------------------------------------------------------------------------- /modules/scRNAseq/CRmotifCounts.header: -------------------------------------------------------------------------------- 1 | CRmotifCounts_vars=[ 2 | outdir : RESULTS + "/CRmotifCounts", 3 | project : PROJECT, 4 | res : RESULTS, 5 | cellranger_aggr_id : (binding.variables.containsKey("cellranger_aggr_vars") ? cellranger_aggr_vars.id : (binding.variables.containsKey("cellrangeratac_aggr_vars") ? cellrangeratac_aggr_vars.id : (binding.variables.containsKey("cellrangerarc_aggr_vars") ? cellrangerarc_aggr_vars.id : "aggr"))), // folder name for results from cellranger / cellranger-atac / cellranger-arc aggr 6 | extra : "" //extra parms to sent to the tool 7 | ] 8 | 9 | load PIPELINE_ROOT + "/modules/scRNAseq/CRmotifCounts.groovy" 10 | 11 | // expected parameter types 12 | class CRmotifCounts_vars_schema { 13 | String outdir 14 | String project 15 | String res 16 | String cellranger_aggr_id 17 | String extra 18 | 19 | // check for the presence of mandatory params 20 | boolean asBoolean() { 21 | outdir 22 | } 23 | } 24 | 25 | validate_schema(CRmotifCounts_vars_schema, CRmotifCounts_vars) 26 | -------------------------------------------------------------------------------- /modules/scRNAseq/CTannoMarker.header: -------------------------------------------------------------------------------- 1 | CTannoMarker_vars=[ 2 | outdir : RESULTS + "/CTanno", 3 | project : PROJECT, 4 | res : RESULTS, 5 | assay : (RUN_BATCHCORRECT ? "integrated" : "SCT"), 6 | clusterVar : "clusters_wnn", 7 | dbfile : "https://raw.githubusercontent.com/IanevskiAleksandr/sc-type/master/ScTypeDB_full.xlsx", // file should contain four columns (tissueType - tissue type, cellName - cell type, geneSymbolmore1 - positive marker genes, geneSymbolmore2 - marker genes not expected to be expressed by a cell type, shortName - short form of cell type) 8 | tissue : "Brain", // e.g. Immune system, Pancreas, Liver, Eye, Kidney, Brain, Lung, Adrenal, Heart, Intestine, Muscle, Placenta, Spleen, Stomach, Thymus 9 | ctcolumn : "shortName", // Can be any column-name available in the dbfile, which should contain the celltype names in full form or as an abbreviation. 10 | extra : "" //extra parms to sent to the tool 11 | ] 12 | 13 | load PIPELINE_ROOT + "/modules/scRNAseq/CTannoMarker.groovy" 14 | 15 | // expected parameter types 16 | class CTannoMarker_vars_schema { 17 | String outdir 18 | String project 19 | String res 20 | String assay 21 | String clusterVar 22 | String dbfile 23 | String tissue 24 | String ctcolumn 25 | String extra 26 | 27 | // check for the presence of mandatory params 28 | boolean asBoolean() { 29 | outdir 30 | } 31 | } 32 | 33 | validate_schema(CTannoMarker_vars_schema, CTannoMarker_vars) 34 | -------------------------------------------------------------------------------- /modules/scRNAseq/DNAaccess.header: -------------------------------------------------------------------------------- 1 | DNAaccess_vars=[ 2 | outdir : RESULTS + "/DNAaccess", 3 | project : PROJECT, 4 | res : RESULTS, 5 | featureCutoff : "q5", // either percentile specified as 'q' followed by the minimum percentile or minimum number of cells containing the feature 6 | skipFirstLSIcomp : "1", // If we see a very strong correlation between the first LSI component and read depth, this component should be removed. 7 | extra : "" //extra parms to sent to the tool 8 | ] 9 | 10 | load PIPELINE_ROOT + "/modules/scRNAseq/DNAaccess.groovy" 11 | 12 | // expected parameter types 13 | class DNAaccess_vars_schema { 14 | String outdir 15 | String project 16 | String res 17 | String featureCutoff 18 | String skipFirstLSIcomp 19 | String extra 20 | 21 | // check for the presence of mandatory params 22 | boolean asBoolean() { 23 | outdir && project && res 24 | } 25 | } 26 | 27 | validate_schema(DNAaccess_vars_schema, DNAaccess_vars) 28 | 29 | -------------------------------------------------------------------------------- /modules/scRNAseq/SCTransform.groovy: -------------------------------------------------------------------------------- 1 | SCTransform = { 2 | doc title: "SCTransform", 3 | desc: "normalize gene expression data in Seurat object", 4 | constraints: "", 5 | bpipe_version: "tested with bpipe 0.9.9.8", 6 | author: "Frank Rühle" 7 | 8 | output.dir = SCTransform_vars.outdir 9 | 10 | def SCTransform_FLAGS = 11 | (SCTransform_vars.outdir ? " outdir=" + SCTransform_vars.outdir : "") + 12 | (SCTransform_vars.project ? " project=" + SCTransform_vars.project : "") + 13 | (SCTransform_vars.res ? " res=" + SCTransform_vars.res : "") + 14 | (SCTransform_vars.extra ? " " + SCTransform_vars.extra : "") 15 | 16 | def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"]) 17 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 18 | 19 | // The SCTransform module is not using any of its inputs, but needs to check their 20 | // time stamp in order to know, if SCTransform should run (in case of pre-existing 21 | // results). This can be done by outputting/echo'ing all inputs. In order to not 22 | // confuse the pipeline user, this output is written to /dev/null 23 | // --- THE echo COMMAND BELOW MUST NOT BE REMOVED --- 24 | 25 | // run the chunk 26 | produce("SCTransform.RData") { 27 | exec """ 28 | ${TOOL_ENV} && 29 | ${PREAMBLE} && 30 | echo $inputs > /dev/null && 31 | 32 | Rscript ${PIPELINE_ROOT}/tools/sc_norm/SCTransform.R $SCTransform_FLAGS 33 | ""","SCTransform" 34 | } 35 | } 36 | 37 | -------------------------------------------------------------------------------- /modules/scRNAseq/SCTransform.header: -------------------------------------------------------------------------------- 1 | SCTransform_vars=[ 2 | outdir : RESULTS + "/SCTransform", 3 | project : PROJECT, 4 | res : RESULTS, 5 | extra : "" //extra parms to sent to the tool 6 | ] 7 | 8 | load PIPELINE_ROOT + "/modules/scRNAseq/SCTransform.groovy" 9 | 10 | // expected parameter types 11 | class SCTransform_vars_schema { 12 | String outdir 13 | String project 14 | String res 15 | String extra 16 | 17 | // check for the presence of mandatory params 18 | boolean asBoolean() { 19 | outdir && project && res 20 | } 21 | } 22 | 23 | validate_schema(SCTransform_vars_schema, SCTransform_vars) 24 | -------------------------------------------------------------------------------- /modules/scRNAseq/addumibarcodetofastq.groovy: -------------------------------------------------------------------------------- 1 | AddUMIBarcodeToFastq = { 2 | doc title: "Adds UMI and Barcode of to the fastq header", 3 | desc: "adds UMI and barcode of the second read in MARS-Seq samples to the fastq header using umitools", 4 | constraints: "", 5 | bpipe_version: "tested with bpipe 0.9.9.2", 6 | author: "Nastasja Kreim, Frank Rühle" 7 | 8 | output.dir = AddUMIBarcodeToFastq_vars.outdir 9 | 10 | def File f = new File(input1) 11 | def OUTPUTFILE = (f.getName() =~ /(.R1)*.fastq.gz/).replaceFirst("") 12 | 13 | def umi_tools_FLAGS = 14 | (AddUMIBarcodeToFastq_vars.bcpattern ? " --bc-pattern=" + AddUMIBarcodeToFastq_vars.bcpattern : "") + 15 | (AddUMIBarcodeToFastq_vars.barcodelist ? " --whitelist=" + AddUMIBarcodeToFastq_vars.barcodelist + " --filter-cell-barcode" : "") + 16 | (AddUMIBarcodeToFastq_vars.extra ? " " + AddUMIBarcodeToFastq_vars.extra : "") 17 | 18 | def TOOL_ENV = prepare_tool_env("umitools", tools["umitools"]["version"], tools["umitools"]["runenv"]) 19 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 20 | 21 | produce(OUTPUTFILE + ".umibarcode.fastq.gz"){ 22 | exec """ 23 | ${TOOL_ENV} && 24 | ${PREAMBLE} && 25 | 26 | umi_tools extract $umi_tools_FLAGS -I $input2 --stdout \${TMP}/\$(basename ${input2.prefix}).barcode.fastq.gz --read2-in $input1 --read2-out=\${TMP}/\$(basename ${OUTPUTFILE}).umibarcode.fastq.gz && 27 | 28 | rm \${TMP}/\$(basename ${input2.prefix}).barcode.fastq.gz && 29 | mv \${TMP}/\$(basename ${OUTPUTFILE}).umibarcode.fastq.gz $output 30 | ""","AddUMIBarcodeToFastq" 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /modules/scRNAseq/addumibarcodetofastq.header: -------------------------------------------------------------------------------- 1 | AddUMIBarcodeToFastq_vars=[ 2 | outdir : PROJECT + "/rawdata_processed", 3 | bcpattern : ESSENTIAL_BCPATTERN, // pattern of the umi and the barcode in the second read. The C are the barcode bases the Ns are the UMI bases 4 | barcodelist: ESSENTIAL_WHITELIST, // list of valid barcodes 5 | extra : "" 6 | ] 7 | 8 | load PIPELINE_ROOT + "/modules/scRNAseq/addumibarcodetofastq.groovy" 9 | 10 | // expected parameter types 11 | class AddUMIBarcodeToFastq_vars_schema { 12 | String outdir 13 | String bcpattern 14 | String barcodelist 15 | String extra 16 | 17 | // check for the presence of mandatory params 18 | boolean asBoolean() { 19 | outdir && bcpattern 20 | } 21 | } 22 | 23 | validate_schema(AddUMIBarcodeToFastq_vars_schema, AddUMIBarcodeToFastq_vars) 24 | -------------------------------------------------------------------------------- /modules/scRNAseq/assignSouporcellCluster.header: -------------------------------------------------------------------------------- 1 | assignSouporcellCluster_vars=[ 2 | outdir : RESULTS + "/demux_gt" + "/assignSouporcellCluster", 3 | souporcelldir : RESULTS + "/demux_gt", 4 | targets : "targets.txt", //targets file 5 | extra : "" // extra parameter 6 | ] 7 | 8 | load PIPELINE_ROOT + "/modules/scRNAseq/assignSouporcellCluster.groovy" 9 | 10 | // expected parameter types 11 | class assignSouporcellCluster_vars_schema { 12 | String outdir 13 | String souporcelldir 14 | String targets 15 | String extra 16 | 17 | // check for the presence of mandatory params 18 | boolean asBoolean() { 19 | outdir && souporcelldir && targets 20 | } 21 | } 22 | 23 | validate_schema(assignSouporcellCluster_vars_schema, assignSouporcellCluster_vars) 24 | -------------------------------------------------------------------------------- /modules/scRNAseq/cellranger_aggr.header: -------------------------------------------------------------------------------- 1 | cellranger_aggr_vars=[ 2 | outdir : RESULTS, 3 | id : "aggr", 4 | normalize : "mapped", // "none" or "mapped" (default). Equalize the average mapped read depth per cell between GEM wells by sub-sampling before merging. 5 | cores : "8", 6 | localmem : "64", 7 | extra : "" // extra parms to sent to the tool 8 | ] 9 | 10 | load PIPELINE_ROOT + "/modules/scRNAseq/cellranger_aggr.groovy" 11 | 12 | // expected parameter types 13 | class cellranger_aggr_vars_schema { 14 | String outdir 15 | String id 16 | String normalize 17 | String cores 18 | String localmem 19 | String extra 20 | 21 | // check for the presence of mandatory params 22 | boolean asBoolean() { 23 | outdir 24 | } 25 | } 26 | 27 | validate_schema(cellranger_aggr_vars_schema, cellranger_aggr_vars) 28 | -------------------------------------------------------------------------------- /modules/scRNAseq/cellranger_count.header: -------------------------------------------------------------------------------- 1 | cellranger_count_vars=[ 2 | outdir : MAPPED, 3 | transcriptome : ESSENTIAL_TENX_TRANSCRIPTOME, 4 | expect_cells : ESSENTIAL_TENX_EXPECTED_CELLS, 5 | nuclei : (ESSENTIAL_TENX_NUCLEI == "yes"), 6 | cores : "8", 7 | localmem : "64", 8 | extra : " --chemistry=SC3Pv3" // extra parms to sent to the tool 9 | ] 10 | 11 | load PIPELINE_ROOT + "/modules/scRNAseq/cellranger_count.groovy" 12 | 13 | // expected parameter types 14 | class cellranger_count_vars_schema { 15 | String outdir 16 | String transcriptome 17 | Integer expect_cells 18 | Boolean nuclei 19 | String cores 20 | String localmem 21 | String extra 22 | 23 | // check for the presence of mandatory params 24 | boolean asBoolean() { 25 | outdir && transcriptome 26 | } 27 | } 28 | 29 | validate_schema(cellranger_count_vars_schema, cellranger_count_vars) 30 | -------------------------------------------------------------------------------- /modules/scRNAseq/cellrangerarc_aggr.header: -------------------------------------------------------------------------------- 1 | cellrangerarc_aggr_vars=[ 2 | outdir : RESULTS, 3 | reference : ESSENTIAL_TENX_REFERENCE, 4 | id : "aggr", 5 | normalize : "depth", // "none" or "depth" (default). Equalize the average mapped read depth per cell between GEM wells by sub-sampling before merging. 6 | cores : "8", 7 | localmem : "64", 8 | extra : "" // extra parms to sent to the tool 9 | ] 10 | 11 | load PIPELINE_ROOT + "/modules/scRNAseq/cellrangerarc_aggr.groovy" 12 | 13 | // expected parameter types 14 | class cellrangerarc_aggr_vars_schema { 15 | String outdir 16 | String reference 17 | String id 18 | String normalize 19 | String cores 20 | String localmem 21 | String extra 22 | 23 | // check for the presence of mandatory params 24 | boolean asBoolean() { 25 | outdir && reference 26 | } 27 | } 28 | 29 | validate_schema(cellrangerarc_aggr_vars_schema, cellrangerarc_aggr_vars) 30 | -------------------------------------------------------------------------------- /modules/scRNAseq/cellrangerarc_count.header: -------------------------------------------------------------------------------- 1 | cellrangerarc_count_vars=[ 2 | outdir : MAPPED, 3 | reference : ESSENTIAL_TENX_REFERENCE, 4 | fastqdir : ESSENTIAL_TENX_FASTQDIR, 5 | cores : "32", 6 | localmem : "128", 7 | extra : "" // extra parms to sent to the tool 8 | ] 9 | 10 | load PIPELINE_ROOT + "/modules/scRNAseq/cellrangerarc_count.groovy" 11 | 12 | // expected parameter types 13 | class cellrangerarc_count_vars_schema { 14 | String outdir 15 | String reference 16 | String fastqdir 17 | String cores 18 | String localmem 19 | String extra 20 | 21 | // check for the presence of mandatory params 22 | boolean asBoolean() { 23 | outdir && reference && fastqdir 24 | } 25 | } 26 | 27 | validate_schema(cellrangerarc_count_vars_schema, cellrangerarc_count_vars) 28 | -------------------------------------------------------------------------------- /modules/scRNAseq/cellrangeratac_aggr.header: -------------------------------------------------------------------------------- 1 | cellrangeratac_aggr_vars=[ 2 | outdir : RESULTS, 3 | id : "aggr", 4 | reference : ESSENTIAL_TENX_REFERENCE, 5 | normalize : "depth", // "none" or "depth" (default). Equalize the average mapped read depth per cell between GEM wells by sub-sampling before merging. 6 | cores : "8", 7 | localmem : "64", 8 | extra : "" // extra parms to sent to the tool 9 | ] 10 | 11 | load PIPELINE_ROOT + "/modules/scRNAseq/cellrangeratac_aggr.groovy" 12 | 13 | // expected parameter types 14 | class cellrangeratac_aggr_vars_schema { 15 | String outdir 16 | String id 17 | String reference 18 | String normalize 19 | String cores 20 | String localmem 21 | String extra 22 | 23 | // check for the presence of mandatory params 24 | boolean asBoolean() { 25 | outdir && reference 26 | } 27 | } 28 | 29 | validate_schema(cellrangeratac_aggr_vars_schema, cellrangeratac_aggr_vars) 30 | -------------------------------------------------------------------------------- /modules/scRNAseq/cellrangeratac_count.header: -------------------------------------------------------------------------------- 1 | cellrangeratac_count_vars=[ 2 | outdir : MAPPED, 3 | reference : ESSENTIAL_TENX_REFERENCE, 4 | cores : "8", 5 | localmem : "64", 6 | extra : "" // extra parms to sent to the tool 7 | ] 8 | 9 | load PIPELINE_ROOT + "/modules/scRNAseq/cellrangeratac_count.groovy" 10 | 11 | // expected parameter types 12 | class cellrangeratac_count_vars_schema { 13 | String outdir 14 | String reference 15 | String cores 16 | String localmem 17 | String extra 18 | 19 | // check for the presence of mandatory params 20 | boolean asBoolean() { 21 | outdir && reference 22 | } 23 | } 24 | 25 | validate_schema(cellrangeratac_count_vars_schema, cellrangeratac_count_vars) 26 | -------------------------------------------------------------------------------- /modules/scRNAseq/demux_gt.header: -------------------------------------------------------------------------------- 1 | demux_gt_vars=[ 2 | outdir : RESULTS + "/demux_gt", 3 | targets : "targets.txt", //targets file 4 | ref : ESSENTIAL_TENX_TRANSCRIPTOME + "/fasta/genome.fa", // reference genome 5 | cellranger_output : MAPPED, // result files from cellranger to demultiplex 6 | threads : "30", 7 | extra : "" // extra parameter to send to Cite-Seq-Count 8 | ] 9 | 10 | load PIPELINE_ROOT + "/modules/scRNAseq/demux_gt.groovy" 11 | 12 | // expected parameter types 13 | class demux_gt_vars_schema { 14 | String outdir 15 | String targets 16 | String ref 17 | String cellranger_output 18 | String threads 19 | String extra 20 | 21 | // check for the presence of mandatory params 22 | boolean asBoolean() { 23 | outdir && targets && ref && cellranger_output 24 | } 25 | } 26 | 27 | validate_schema(demux_gt_vars_schema, demux_gt_vars) 28 | 29 | -------------------------------------------------------------------------------- /modules/scRNAseq/diffPeaks.header: -------------------------------------------------------------------------------- 1 | diffPeaks_vars=[ 2 | outdir : RESULTS + "/DNAaccess", 3 | project : PROJECT, 4 | res : RESULTS, 5 | assay : "ATAC", 6 | minCells : "20", // minimum number of cells to include cluster in analysis 7 | clusterVar : "clusters_wnn", // variable name with clustering info for comparison by group and cluster ("clusters_atac" or "clusters_wnn") 8 | CTannoSelected : ESSENTIAL_CELLTYPE_ANNO[0], // select celltype annotation column for comparison by group and celltype 9 | test : "LR", // statistical test to use. One of "wilcox", "bimod", "roc" , "t", "negbinom", "poisson", "LR", "MAST" and "DESeq2". 10 | latentVars : "nFeature_ATAC", // Variables to test, used only when test is one of 'LR', 'negbinom', 'poisson' or 'MAST'. 11 | extra : "" //extra parms to sent to the tool 12 | ] 13 | 14 | load PIPELINE_ROOT + "/modules/scRNAseq/diffPeaks.groovy" 15 | 16 | // expected parameter types 17 | class diffPeaks_vars_schema { 18 | String outdir 19 | String project 20 | String res 21 | String assay 22 | String minCells 23 | String clusterVar 24 | String CTannoSelected 25 | String test 26 | String latentVars 27 | String extra 28 | 29 | // check for the presence of mandatory params 30 | boolean asBoolean() { 31 | outdir && project && res && CTannoSelected 32 | } 33 | } 34 | 35 | validate_schema(diffPeaks_vars_schema, diffPeaks_vars) 36 | 37 | -------------------------------------------------------------------------------- /modules/scRNAseq/motifActivity.header: -------------------------------------------------------------------------------- 1 | motifActivity_vars=[ 2 | outdir : RESULTS + "/motifActivity", 3 | project : PROJECT, 4 | res : RESULTS, 5 | db : ESSENTIAL_DB, 6 | clusterVar : "clusters_wnn", // variable name with clustering info for comparison by group and cluster ("clusters_rna" or "clusters_wnn") 7 | CTannoSelected : ESSENTIAL_CELLTYPE_ANNO[0], // select celltype annotation column for comparison by group and celltype 8 | motif2plot : "MA0497.1", // optional motifs for activity plots. Give either a single motif name or a path to a txt file with multiple motifs one per line. 9 | extra : "" //extra parms to sent to the tool 10 | ] 11 | 12 | load PIPELINE_ROOT + "/modules/scRNAseq/motifActivity.groovy" 13 | 14 | // expected parameter types 15 | class motifActivity_vars_schema { 16 | String outdir 17 | String project 18 | String res 19 | String db 20 | String clusterVar 21 | String CTannoSelected 22 | String motif2plot 23 | String extra 24 | 25 | // check for the presence of mandatory params 26 | boolean asBoolean() { 27 | outdir && project && res && CTannoSelected 28 | } 29 | } 30 | 31 | validate_schema(motifActivity_vars_schema, motifActivity_vars) 32 | -------------------------------------------------------------------------------- /modules/scRNAseq/motifEnrich.header: -------------------------------------------------------------------------------- 1 | motifEnrich_vars=[ 2 | outdir : RESULTS + "/motifEnrichment", 3 | project : PROJECT, 4 | res : RESULTS, 5 | db : ESSENTIAL_DB, 6 | diffPeaks_dir : (binding.variables.containsKey("diffPeaks_vars") ? diffPeaks_vars.outdir : ""), 7 | pval_thresh : "0.05", // p-value threshold of differential accessible peaks to include 8 | min_peaks : "5", // skip enrichment if less peaks available in comparison 9 | extra : "" //extra parms to sent to the tool 10 | ] 11 | 12 | load PIPELINE_ROOT + "/modules/scRNAseq/motifEnrich.groovy" 13 | 14 | // expected parameter types 15 | class motifEnrich_vars_schema { 16 | String outdir 17 | String project 18 | String res 19 | String db 20 | String diffPeaks_dir 21 | String pval_thresh 22 | String min_peaks 23 | String extra 24 | 25 | // check for the presence of mandatory params 26 | boolean asBoolean() { 27 | outdir && project && res 28 | } 29 | } 30 | 31 | validate_schema(motifEnrich_vars_schema, motifEnrich_vars) 32 | -------------------------------------------------------------------------------- /modules/scRNAseq/peaks2genes.header: -------------------------------------------------------------------------------- 1 | peaks2genes_vars=[ 2 | outdir : RESULTS + "/peaks2genes", 3 | project : PROJECT, 4 | res : RESULTS, 5 | db : ESSENTIAL_DB, 6 | genes2use : "'c(\"HMCN1\", \"EVL\", \"EML1\")'", // Genes to test. If empty, use all genes from expression assay. 7 | genes2plot : "HMCN1", // Genes defining the region to show in the coverage plot 8 | groupCellsInPlot : "clusters_wnn", // name of metadata columns to group (color) the cells by in the Coverage plot (e.g. "clusters_wnn") 9 | plotUpstream : "100000", // Number of bases to extend the plotting region upstream. 10 | plotDownstream : "100000", // Number of bases to extend the plotting region downstream. 11 | extra : "" //extra parms to sent to the tool 12 | ] 13 | 14 | load PIPELINE_ROOT + "/modules/scRNAseq/peaks2genes.groovy" 15 | 16 | // expected parameter types 17 | class peaks2genes_vars_schema { 18 | String outdir 19 | String project 20 | String res 21 | String db 22 | String genes2use 23 | String genes2plot 24 | String groupCellsInPlot 25 | String plotUpstream 26 | String plotDownstream 27 | String extra 28 | 29 | // check for the presence of mandatory params 30 | boolean asBoolean() { 31 | outdir && project && res 32 | } 33 | } 34 | 35 | validate_schema(peaks2genes_vars_schema, peaks2genes_vars) 36 | -------------------------------------------------------------------------------- /modules/scRNAseq/sc_filter.header: -------------------------------------------------------------------------------- 1 | sc_filter_vars=[ 2 | outdir : QC + "/sc_qc", 3 | project : PROJECT, 4 | res : RESULTS, 5 | nCount_ATAC_min : "1000", 6 | nCount_ATAC_max : "50000", 7 | nCount_RNA_min : "1000", 8 | nCount_RNA_max : "30000", 9 | FRiPmin : "0.3", // Fraction of reads in peaks (min threshold) 10 | FRiBLmax : "0.05", // Fraction of reads in blacklisted regions (max treshold) 11 | nucleosome_sig_max : "2", // nucleosome signal (max threshold) 12 | TSS_enrich_min : "1", // TSS enrichment (min threshold) 13 | MT_perc_max : "20", // mitochondrial RNA count percentage (max threshold) 14 | extra : "" // extra parms to sent to the tool 15 | ] 16 | 17 | load PIPELINE_ROOT + "/modules/scRNAseq/sc_filter.groovy" 18 | 19 | // expected parameter types 20 | class sc_filter_vars_schema { 21 | String outdir 22 | String project 23 | String res 24 | String nCount_ATAC_min 25 | String nCount_ATAC_max 26 | String nCount_RNA_min 27 | String nCount_RNA_max 28 | String FRiPmin 29 | String FRiBLmax 30 | String nucleosome_sig_max 31 | String TSS_enrich_min 32 | String MT_perc_max 33 | String extra 34 | 35 | // check for the presence of mandatory params 36 | boolean asBoolean() { 37 | outdir && project && res 38 | } 39 | } 40 | 41 | validate_schema(sc_filter_vars_schema, sc_filter_vars) 42 | -------------------------------------------------------------------------------- /modules/scRNAseq/sc_integrateATAC.header: -------------------------------------------------------------------------------- 1 | sc_integrateATAC_vars=[ 2 | outdir : RESULTS + "/sc_integrateATAC", 3 | project : PROJECT, 4 | res : RESULTS, 5 | featureCutoff : (binding.variables.containsKey("DNAaccess_vars") ? DNAaccess_vars.featureCutoff : "q5"), // either percentile specified as 'q' followed by the minimum percentile or minimum number of cells containing the feature 6 | skipFirstLSIcomp: (binding.variables.containsKey("DNAaccess_vars") ? DNAaccess_vars.skipFirstLSIcomp : "1"), // If we see a very strong correlation between the first LSI component and read depth, this component should be removed. 7 | extra : "" //extra parms to sent to the tool 8 | ] 9 | // please keep in mind that the results of batch-correction or integration is supposed to be used only to define the clusters. ALL downstream steps like DE analysis should use the RNA or SCT slots of the seurat object 10 | 11 | load PIPELINE_ROOT + "/modules/scRNAseq/sc_integrateATAC.groovy" 12 | 13 | // expected parameter types 14 | class sc_integrateATAC_vars_schema { 15 | String outdir 16 | String project 17 | String res 18 | String featureCutoff 19 | String skipFirstLSIcomp 20 | String extra 21 | 22 | // check for the presence of mandatory params 23 | boolean asBoolean() { 24 | outdir && project && res 25 | } 26 | } 27 | 28 | validate_schema(sc_integrateATAC_vars_schema, sc_integrateATAC_vars) 29 | -------------------------------------------------------------------------------- /modules/scRNAseq/sc_integrateRNA.header: -------------------------------------------------------------------------------- 1 | sc_integrateRNA_vars=[ 2 | outdir : RESULTS + "/sc_integrateRNA", 3 | project : PROJECT, 4 | res : RESULTS, 5 | batch : "group", // Provide a the column name from targets file. If more than one batch needs to be addressed, please create a concatenated name of both and provide that as a column in the targets file and here. 6 | n_features : "3000", // Number of variable features to identify for integration (or batch-correcting) 7 | rdtype : "cca", // Dimensional reduction to perform when finding anchors. Can be one of: cca, rpca, rlsi 8 | extra : "" //extra parms to sent to the tool 9 | ] 10 | // please keep in mind that the results of batch-correction or integration is supposed to be used only to define the clusters. ALL downstream steps like DE analysis should use the RNA or SCT slots of the seurat object 11 | 12 | load PIPELINE_ROOT + "/modules/scRNAseq/sc_integrateRNA.groovy" 13 | 14 | // expected parameter types 15 | class sc_integrateRNA_vars_schema { 16 | String outdir 17 | String project 18 | String res 19 | String batch 20 | String n_features 21 | String rdtype 22 | String extra 23 | 24 | // check for the presence of mandatory params 25 | boolean asBoolean() { 26 | outdir && project && res && batch 27 | } 28 | } 29 | 30 | validate_schema(sc_integrateRNA_vars_schema, sc_integrateRNA_vars) 31 | -------------------------------------------------------------------------------- /modules/scRNAseq/sc_qc.groovy: -------------------------------------------------------------------------------- 1 | sc_qc = { 2 | doc title: "sc_qc", 3 | desc: "Quality control for single cell multiome experiment", 4 | constraints: "", 5 | bpipe_version: "tested with bpipe 0.9.9.8", 6 | author: "Frank Rühle" 7 | 8 | output.dir = sc_qc_vars.outdir 9 | 10 | def sc_qc_FLAGS = 11 | (sc_qc_vars.outdir ? " outdir=" + sc_qc_vars.outdir : "") + 12 | (sc_qc_vars.project ? " project=" + sc_qc_vars.project : "") + 13 | (sc_qc_vars.res ? " res=" + sc_qc_vars.res : "") + 14 | (sc_qc_vars.db ? " db=" + sc_qc_vars.db : "") + 15 | (sc_qc_vars.extra ? " " + sc_qc_vars.extra : "") 16 | 17 | def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"]) 18 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 19 | 20 | // The sc_qc module is not using any of its inputs, but needs to check their 21 | // time stamp in order to know, if sc_qc should run (in case of pre-existing 22 | // results). This can be done by outputting/echo'ing all inputs. In order to not 23 | // confuse the pipeline user, this output is written to /dev/null 24 | // --- THE echo COMMAND BELOW MUST NOT BE REMOVED --- 25 | 26 | // run the chunk 27 | produce("sc_qc.RData") { 28 | exec """ 29 | ${TOOL_ENV} && 30 | ${PREAMBLE} && 31 | echo $inputs > /dev/null && 32 | 33 | Rscript ${PIPELINE_ROOT}/tools/sc_qc/sc_qc_multiome.R $sc_qc_FLAGS 34 | ""","sc_qc" 35 | } 36 | } 37 | 38 | -------------------------------------------------------------------------------- /modules/scRNAseq/sc_qc.header: -------------------------------------------------------------------------------- 1 | sc_qc_vars=[ 2 | outdir : QC + "/sc_qc", 3 | project : PROJECT, 4 | res : RESULTS, //where the results lie 5 | db : ESSENTIAL_DB, 6 | extra : "" //extra parms to sent to the tool 7 | ] 8 | 9 | load PIPELINE_ROOT + "/modules/scRNAseq/sc_qc.groovy" 10 | 11 | // expected parameter types 12 | class sc_qc_vars_schema { 13 | String outdir 14 | String project 15 | String res 16 | String db 17 | String extra 18 | 19 | // check for the presence of mandatory params 20 | boolean asBoolean() { 21 | outdir && project && res 22 | } 23 | } 24 | 25 | validate_schema(sc_qc_vars_schema, sc_qc_vars) 26 | -------------------------------------------------------------------------------- /modules/scRNAseq/splitpipe_all.header: -------------------------------------------------------------------------------- 1 | splitpipe_all_vars=[ 2 | outdir : MAPPED, 3 | logdir : LOGS + "/splitpipe_all", 4 | targets : "targets.txt", // targets file describing the samples 5 | genome : ESSENTIAL_TENX_REFERENCE, 6 | chemistry : "v2", 7 | expect_cells : ESSENTIAL_TENX_EXPECTED_CELLS, 8 | threads : "8", 9 | extra : " " // extra parms to sent to the tool 10 | ] 11 | 12 | load PIPELINE_ROOT + "/modules/scRNAseq/splitpipe_all.groovy" 13 | 14 | // expected parameter types 15 | class splitpipe_all_vars_schema { 16 | String outdir 17 | String logdir 18 | String targets 19 | String genome 20 | String chemistry 21 | Integer expect_cells 22 | String threads 23 | String extra 24 | 25 | // check for the presence of mandatory params 26 | boolean asBoolean() { 27 | outdir && genome 28 | } 29 | } 30 | 31 | validate_schema(splitpipe_all_vars_schema, splitpipe_all_vars) 32 | -------------------------------------------------------------------------------- /modules/scRNAseq/splitpipe_comb.groovy: -------------------------------------------------------------------------------- 1 | splitpipe_comb = { 2 | doc title: "split-pipe comb", 3 | desc: "Combining multiple samples with split-pipe comb", 4 | constraints: "", 5 | bpipe_version: "tested with bpipe 0.9.9.8", 6 | author: "Frank Rühle" 7 | 8 | output.dir = splitpipe_comb_vars.outdir + "/" 9 | indir = splitpipe_comb_vars.indir + "/" 10 | 11 | def TOOL_ENV = prepare_tool_env("split_pipe", tools["split_pipe"]["version"], tools["split_pipe"]["runenv"]) 12 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 13 | 14 | produce("splitpipe_comb.done") { 15 | exec """ 16 | ${TOOL_ENV} && 17 | ${PREAMBLE} && 18 | 19 | split-pipe --mode comb --output_dir $output.dir --sublibraries \$(dirname $inputs.bam) && 20 | 21 | touch $output 22 | 23 | ""","splitpipe_comb" 24 | } 25 | } 26 | 27 | 28 | -------------------------------------------------------------------------------- /modules/scRNAseq/splitpipe_comb.header: -------------------------------------------------------------------------------- 1 | splitpipe_comb_vars=[ 2 | outdir : RESULTS + "/aggr/", 3 | indir : MAPPED, 4 | extra : "" // extra parms to sent to the tool 5 | ] 6 | 7 | load PIPELINE_ROOT + "/modules/scRNAseq/splitpipe_comb.groovy" 8 | 9 | // expected parameter types 10 | class splitpipe_comb_vars_schema { 11 | String outdir 12 | String indir 13 | String extra 14 | 15 | // check for the presence of mandatory params 16 | boolean asBoolean() { 17 | outdir && indir 18 | } 19 | } 20 | 21 | validate_schema(splitpipe_comb_vars_schema, splitpipe_comb_vars) 22 | -------------------------------------------------------------------------------- /modules/scRNAseq/subread.header: -------------------------------------------------------------------------------- 1 | subread_count_vars=[ 2 | outdir : RESULTS + "/subread-count", 3 | stranded: ESSENTIAL_STRANDED, //whether the data is from a strand-specific assay (illumina SR: always reverse) 4 | paired : (ESSENTIAL_PAIRED == "yes"), //paired end design 5 | genesgtf: ESSENTIAL_GENESGTF, 6 | threads : ESSENTIAL_THREADS, 7 | extra : "" // extra parms to sent to the tool 8 | ] 9 | 10 | load PIPELINE_ROOT + "/modules/scRNAseq/subread.groovy" 11 | 12 | // expected parameter types 13 | class subread_count_vars_schema { 14 | String outdir 15 | String stranded 16 | Boolean paired 17 | String genesgtf 18 | Integer threads 19 | String extra 20 | 21 | // check for the presence of mandatory params 22 | boolean asBoolean() { 23 | outdir 24 | } 25 | } 26 | 27 | validate_schema(subread_count_vars_schema, subread_count_vars) 28 | -------------------------------------------------------------------------------- /modules/scRNAseq/umicount.groovy: -------------------------------------------------------------------------------- 1 | umicount = { 2 | doc title: "Deduplication and Counting reads per gene", 3 | desc: "Deduplication and counting of mapped data and splitting accoring to cellbarcode with umi_tools", 4 | constraints: "", 5 | bpipe_version: "tested with bpipe 0.9.9.3", 6 | author: "Nastasja Kreim" 7 | 8 | output.dir = umicount_vars.outdir 9 | 10 | // create the log folder if it doesn't exists 11 | def umicount_LOGDIR = new File(umicount_vars.logdir) 12 | if (!umicount_LOGDIR.exists()) { 13 | umicount_LOGDIR.mkdirs() 14 | } 15 | 16 | def umicount_FLAGS = 17 | (umicount_vars.verbose ? "--verbose=1 " : "") + 18 | (umicount_vars.paired ? "--paired " : "") + 19 | (umicount_vars.param ? " " + umicount_vars.param : "") + 20 | (umicount_vars.extra ? " " + umicount_vars.extra : "") 21 | 22 | def TOOL_ENV = prepare_tool_env("umitools", tools["umitools"]["version"], tools["umitools"]["runenv"]) 23 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 24 | 25 | // run the chunk 26 | transform(".bam") to (".umicount.tsv.gz") { 27 | def SAMPLENAME = input.prefix 28 | exec """ 29 | ${TOOL_ENV} && 30 | ${PREAMBLE} && 31 | 32 | SAMPLENAME_BASE=\$(basename ${SAMPLENAME}) && 33 | umi_tools count $umicount_FLAGS -I $input -S $output -L ${umicount_LOGDIR}/\${SAMPLENAME_BASE}.umicount.log -E ${umicount_LOGDIR}/\${SAMPLENAME_BASE}.umicount.error 34 | ""","umicount" 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /modules/scRNAseq/umicount.header: -------------------------------------------------------------------------------- 1 | umicount_vars=[ 2 | outdir: RESULTS + "/umicount", 3 | logdir: LOGS + "/umicount", 4 | log : true, 5 | paired: (ESSENTIAL_PAIRED == "yes"), 6 | //This assumes that the labeling is done on the bam file e.g. by processing with featureCounts beforehand 7 | //Additionaly this is configured to fit marsseq paramers. It might be 8 | //necessary to add --read-length if you want to ensure that not only the position 9 | //+ UMI is used to deduplicate but also the read-length. For marsseq this opition 10 | //is not set because we expect reads with the same umi+starting position to be 11 | //PCR duplicates event if they are of different length 12 | param : "--per-gene --gene-tag=XT --assigned-status-tag=XS --per-cell --wide-format-cell-counts", 13 | extra : "--edit-distance-threshold=0 " //Spliced reads are treated different from unspliced 14 | ] 15 | 16 | load PIPELINE_ROOT + "/modules/scRNAseq/umicount.groovy" 17 | 18 | // expected parameter types 19 | class umicount_vars_schema { 20 | String outdir 21 | String logdir 22 | Boolean log 23 | Boolean paired 24 | String param 25 | String extra 26 | 27 | // check for the presence of mandatory params 28 | boolean asBoolean() { 29 | outdir && logdir 30 | } 31 | } 32 | 33 | validate_schema(umicount_vars_schema, umicount_vars) 34 | 35 | -------------------------------------------------------------------------------- /modules/scRNAseq/umidedup.groovy: -------------------------------------------------------------------------------- 1 | umidedup = { 2 | doc title: "deduplication based on UMIs", 3 | desc: "Deduplication of mapped data using UMIs with umi_tools", 4 | constraints: "", 5 | bpipe_version: "tested with bpipe 0.9.9.3", 6 | author: "Nastasja Kreim" 7 | 8 | output.dir = umidedup_vars.outdir 9 | def umidedup_FLAGS = 10 | (umidedup_vars.verbose ? "--verbose=1 " : "") + 11 | (umidedup_vars.paired ? "--paired " : "") + 12 | (umidedup_vars.param ? " " + umidedup_vars.param : "") + 13 | (umidedup_vars.extra ? " " + umidedup_vars.extra : "") 14 | 15 | def TOOL_ENV = prepare_tool_env("umitools", tools["umitools"]["version"], tools["umitools"]["runenv"]) 16 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 17 | 18 | // run the chunk 19 | transform(".bam") to (".umidedup.bam") { 20 | exec """ 21 | ${TOOL_ENV} && 22 | ${PREAMBLE} && 23 | 24 | umi_tools dedup $umidedup_FLAGS -I $input -S $output --output-stats=${output.prefix}.stats 25 | ""","umidedup" 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /modules/scRNAseq/umidedup.header: -------------------------------------------------------------------------------- 1 | umidedup_vars=[ 2 | outdir: RESULTS+ "/umidedup", 3 | log : "--verbose=1", 4 | //this assumes that the labeling is done on the bam file e.g. by processing with featureCounts beforehand 5 | //additionaly this is configured to fit marsseq paramers. It might be 6 | //necessary to add --read-length if you want to ensure that not only the position 7 | //+ UMI is used to deduplicate but also the read-length. For marsseq this opition 8 | //is not set because we expect reads with the same umi+starting position to be 9 | //PCR duplicates event if they are of different length 10 | param: "--per-cell", 11 | extra: " --spliced-is-unique --edit-distance-threshold=0 " //Spliced reads are treated different from unspliced 12 | ] 13 | 14 | load PIPELINE_ROOT + "/modules/scRNAseq/umidedup.groovy" 15 | 16 | // expected parameter types 17 | class umidedup_vars_schema { 18 | String outdir 19 | String log 20 | String param 21 | String extra 22 | 23 | // check for the presence of mandatory params 24 | boolean asBoolean() { 25 | outdir 26 | } 27 | } 28 | 29 | validate_schema(umidedup_vars_schema, umidedup_vars) 30 | 31 | -------------------------------------------------------------------------------- /modules/scRNAseq/wnn.header: -------------------------------------------------------------------------------- 1 | wnn_vars=[ 2 | outdir : RESULTS + "/wnn", 3 | project : PROJECT, 4 | res : RESULTS, 5 | knn : "20", // the number of multimodal neighbors to compute. 20 by default 6 | knnRange : "200", // The number of approximate neighbors to compute. 200 by default 7 | clusterAlg : "1", // Algorithm for modularity optimization (1 = Louvain; 2 = Louvain with multilevel refinement; 3 = SLM; 4 = Leiden). 8 | clusterRes : "0.8", // resolution parameter above (below) 1.0 for larger (smaller) number of communities (default 0.8). 9 | skipFirstLSIcomp : DNAaccess_vars.skipFirstLSIcomp, // skip first LSI component(s) if selected in DNAaccess.header 10 | batchCorrection : RUN_BATCHCORRECT, // We use this to determine in the WNN step which dimensionality reductions to use. 11 | extra : "" //extra parms to sent to the tool 12 | ] 13 | 14 | load PIPELINE_ROOT + "/modules/scRNAseq/wnn.groovy" 15 | 16 | // expected parameter types 17 | class wnn_vars_schema { 18 | String outdir 19 | String project 20 | String res 21 | String knn 22 | String knnRange 23 | String clusterAlg 24 | String clusterRes 25 | String skipFirstLSIcomp 26 | Boolean batchCorrection 27 | String extra 28 | 29 | // check for the presence of mandatory params 30 | boolean asBoolean() { 31 | outdir && project && res 32 | } 33 | } 34 | 35 | validate_schema(wnn_vars_schema, wnn_vars) 36 | -------------------------------------------------------------------------------- /modules/smallRNAseq/bowtie1.header: -------------------------------------------------------------------------------- 1 | bowtie1_sRNA_vars=[ 2 | mapped : MAPPED, // output dir 3 | logdir : LOGS_MY + "/mapping", // mapping logs 4 | threads : Integer.toString(ESSENTIAL_THREADS), // threads to use 5 | samtools_threads: Integer.toString(ESSENTIAL_THREADS), 6 | ref : ESSENTIAL_BOWTIE_REF, // prefix of the bowtie reference genome 7 | mm : "1", // number of mismatches allowed 8 | multireport : "1", // if a read has more than reportable alignments, one is reported at random. 9 | best : true, // bowtie best mode (implies --best --strata --tryhard). Doesn't apply to PE 10 | quals : "--phred33-quals", // phred33-quals. Use --phred64-quals for old sequencing runs 11 | extra : "" 12 | ] 13 | 14 | load PIPELINE_ROOT + "/modules/smallRNAseq/bowtie1.groovy" 15 | 16 | // expected parameter types 17 | class bowtie1_vars_schema { 18 | String mapped 19 | String logdir 20 | String threads 21 | String samtools_threads 22 | String ref 23 | String mm 24 | String multireport 25 | Boolean best 26 | String quals 27 | String extra 28 | 29 | // check for the presence of mandatory params 30 | boolean asBoolean() { 31 | mapped && ref 32 | } 33 | } 34 | 35 | validate_schema(bowtie1_vars_schema, bowtie1_sRNA_vars) 36 | -------------------------------------------------------------------------------- /modules/smallRNAseq/dedup.header: -------------------------------------------------------------------------------- 1 | FilterDuplicates_vars=[ 2 | outdir: TRIMMED, 3 | logdir: LOGS_MY + "/removeDup" 4 | ] 5 | 6 | load PIPELINE_ROOT + "/modules/smallRNAseq/dedup.groovy" 7 | 8 | -------------------------------------------------------------------------------- /modules/smallRNAseq/fastq_quality_filter.groovy: -------------------------------------------------------------------------------- 1 | FastQQualityFilter = { 2 | doc title: "Remove sequences", 3 | desc: "filter reads containing low-quality (Phred score below 20) bases in order to facilitate the PCR duplicates removal.", 4 | constraints: "Only supports compressed FASTQ files", 5 | author: "Antonio Domingues, Anke Busch" 6 | 7 | output.dir = FastQQualityFilter_vars.outdir 8 | 9 | // create the log folder if it doesn't exists 10 | def FASTQ_QUALITY_FILTER_LOGDIR = new File(FastQQualityFilter_vars.logdir) 11 | if (!FASTQ_QUALITY_FILTER_LOGDIR.exists()) { 12 | FASTQ_QUALITY_FILTER_LOGDIR.mkdirs() 13 | } 14 | 15 | def FASTQ_QUALITY_FILTER_FLAGS= 16 | (FastQQualityFilter_vars.min_qual ? " -q " + FastQQualityFilter_vars.min_qual : "") + 17 | (FastQQualityFilter_vars.min_percent ? " -p " + FastQQualityFilter_vars.min_percent : "") + 18 | (FastQQualityFilter_vars.qual_format ? " -Q " + FastQQualityFilter_vars.qual_format : "") + 19 | (FastQQualityFilter_vars.extra ? " " + FastQQualityFilter_vars.extra : "") 20 | 21 | def TOOL_ENV = prepare_tool_env("fastx", tools["fastx"]["version"], tools["fastx"]["runenv"]) 22 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 23 | 24 | transform(".fastq.gz") to (".highQ.fastq.gz") { 25 | def SAMPLENAME = input.prefix.prefix 26 | exec """ 27 | ${TOOL_ENV} && 28 | ${PREAMBLE} && 29 | 30 | SAMPLENAME_BASE=\$(basename ${SAMPLENAME}) && 31 | zcat $input | fastq_quality_filter $FASTQ_QUALITY_FILTER_FLAGS -o $output 2>&1 >> ${FastQQualityFilter_vars.logdir}/\${SAMPLENAME_BASE}.fastq_quality_filter.log 32 | ""","FastQQualityFilter" 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /modules/smallRNAseq/fastq_quality_filter.header: -------------------------------------------------------------------------------- 1 | FastQQualityFilter_vars=[ 2 | outdir : TRIMMED, 3 | logdir : LOGS_MY + "/filterQuality", 4 | min_qual : ESSENTIAL_MINIMAL_QUAL, // minimal quality of bases in reads to be kept 5 | min_percent: "100", // percentage of bases fulfilling the minimal quality requirement 6 | qual_format: "33", // format of the quality scores 7 | extra : "-v -z" 8 | ] 9 | 10 | load PIPELINE_ROOT + "/modules/smallRNAseq/fastq_quality_filter.groovy" 11 | 12 | -------------------------------------------------------------------------------- /modules/smallRNAseq/filter2htseq.groovy: -------------------------------------------------------------------------------- 1 | filter2htseq = { 2 | doc title: "filter2htseq", 3 | desc: "filter featureCount output to fit HTSeq format, extract column 1 and 7 as well as skipping the header", 4 | constraints: "none.", 5 | author: "Oliver Drechsel, Antonio Domingues, Anke Busch" 6 | 7 | var subdir : "" 8 | output.dir = filter2htseq_vars.outdir + "/$subdir" 9 | 10 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 11 | 12 | transform(".raw_readcounts.tsv") to (".readcounts.tsv") { 13 | exec """ 14 | ${PREAMBLE} && 15 | 16 | tail -n +3 $input | awk '{print \$1\"\\t\"\$7}' > $output 17 | ""","filter2htseq" 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /modules/smallRNAseq/filter2htseq.header: -------------------------------------------------------------------------------- 1 | filter2htseq_vars=[ 2 | outdir: RESULTS + "/subread-count" 3 | ] 4 | 5 | load PIPELINE_ROOT + "/modules/smallRNAseq/filter2htseq.groovy" 6 | 7 | -------------------------------------------------------------------------------- /modules/smallRNAseq/filter_smallrna_counts.groovy: -------------------------------------------------------------------------------- 1 | filter_smallRNA_counts = { 2 | doc title: "filter_smallRNA_counts", 3 | desc: "Extract count of a selected type of smallRNAs to separate count table files", 4 | constraints: "based on subread (featurecounts) run", 5 | author: "Anke Busch" 6 | 7 | var subdir : "" 8 | output.dir = filter_smallRNA_counts_vars.outdir + "/$subdir" 9 | 10 | def TOOL_ENV = prepare_tool_env("R", tools["R"]["version"], tools["R"]["runenv"]) 11 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 12 | 13 | transform(".readcounts.tsv") to ("." + filter_smallRNA_counts_vars.smallrna + ".readcounts.tsv") { 14 | 15 | exec """ 16 | 17 | ${TOOL_ENV} && 18 | ${PREAMBLE} && 19 | 20 | Rscript ${PIPELINE_ROOT}/tools/smallRNA_BCF/extract_smallRNA.R gtf=$filter_smallRNA_counts_vars.genesgtf input=$input outdir=$output.dir type=$filter_smallRNA_counts_vars.type smallrna=$filter_smallRNA_counts_vars.smallrna 21 | 22 | ""","filter_smallRNA_counts" 23 | } 24 | } 25 | 26 | -------------------------------------------------------------------------------- /modules/smallRNAseq/filter_smallrna_counts.header: -------------------------------------------------------------------------------- 1 | filter_smallRNA_counts_vars=[ 2 | outdir : RESULTS + "/subread-count", 3 | genesgtf : ESSENTIAL_GENESGTF, 4 | type : ESSENTIAL_FEATURETYPE, // describes type of RNA in gtf, gencode uses gene_type, ensembl uses gene_biotype 5 | smallrna : ESSENTIAL_SMALLRNA // type of smallRNA to be analyzed 6 | ] 7 | 8 | load PIPELINE_ROOT + "/modules/smallRNAseq/filter_smallrna_counts.groovy" 9 | 10 | 11 | -------------------------------------------------------------------------------- /modules/smallRNAseq/mirDeep2.groovy: -------------------------------------------------------------------------------- 1 | miRDeep2 = { 2 | doc title: "miRDeep2", 3 | desc: """Quantification of miRNAs performed in 2 steps: (1) Processes reads and mappping to the reference genome; (2) quantification of miRNA expression. This is step 2""", 4 | constraints: "Requires mirDeep2.", 5 | author: "Antonio Domingues, Anke Busch" 6 | 7 | def EXP = input1.split("/")[-1].replaceAll(".arf", "") 8 | output.dir = miRDeep2_vars.outdir + "/" + EXP 9 | 10 | def TOOL_ENV = prepare_tool_env("mirdeep2", tools["mirdeep2"]["version"], tools["mirdeep2"]["runenv"]) 11 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 12 | 13 | transform(".arf", ".fa") to (".tmp") { 14 | exec """ 15 | ${TOOL_ENV} && 16 | ${PREAMBLE} && 17 | 18 | reads_fa=`realpath $input2`; 19 | genome_fa=`realpath $miRDeep2_vars.genome_seq`; 20 | reads_vs_genome_arf=`realpath $input1`; 21 | mautre_ref_miRNAs_fa=`realpath $miRDeep2_vars.mature_mirna`; 22 | mature_other_miRNAs_fa="none"; 23 | hairpin_ref_miRNAs=`realpath $miRDeep2_vars.hairpin_mirna`; 24 | 25 | mkdir -p $output.dir && 26 | cd $output.dir && 27 | 28 | miRDeep2.pl \$reads_fa \$genome_fa \$reads_vs_genome_arf \$mautre_ref_miRNAs_fa \$mature_other_miRNAs_fa \$hairpin_ref_miRNAs -t $miRDeep2_vars.species -c -d -v -r ${EXP} -z ".${EXP}" 2> ${output.dir}/${EXP}.report.log && 29 | touch \$(basename $output) 30 | ""","miRDeep2" 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /modules/smallRNAseq/mirDeep2.header: -------------------------------------------------------------------------------- 1 | miRDeep2_vars=[ 2 | outdir : RESULTS + "/miRDeep2", 3 | genome_seq : ESSENTIAL_GENOME_REF, 4 | mature_mirna : ESSENTIAL_MATURE_MIRNA, 5 | hairpin_mirna: ESSENTIAL_HAIRPIN_MIRNA, 6 | species : ESSENTIAL_SPECIES 7 | ] 8 | 9 | load PIPELINE_ROOT + "/modules/smallRNAseq/mirDeep2.groovy" 10 | 11 | -------------------------------------------------------------------------------- /modules/smallRNAseq/mirDeep2_mapper.groovy: -------------------------------------------------------------------------------- 1 | miRDeep2Mapper = { 2 | doc title: "miRDeep2", 3 | desc: "Quantification of miRNAs performed in 2 steps: (1) Processes reads and mappping to the reference genome; (2) quantification of miRNA expression.", 4 | constraints: "Requires mirDeep2.", 5 | author: "Antonio Domingues, Anke Busch" 6 | 7 | output.dir = miRDeep2Mapper_vars.outdir 8 | 9 | def MIRDEEP2MAPPER_FLAGS= 10 | (miRDeep2Mapper_vars.genome_ref ? " -p " + miRDeep2Mapper_vars.genome_ref : "") + 11 | (miRDeep2Mapper_vars.extra ? " " + miRDeep2Mapper_vars.extra : "") 12 | 13 | def TOOL_ENV = prepare_tool_env("mirdeep2", tools["mirdeep2"]["version"], tools["mirdeep2"]["runenv"]) 14 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 15 | 16 | transform(".fastq.gz") to (".arf", ".fa") { 17 | exec """ 18 | ${TOOL_ENV} && 19 | ${PREAMBLE} && 20 | 21 | x="\${TMP}/\$(basename $input.prefix)" && 22 | gzip -cd $input > \$x && 23 | mapper.pl \$x $MIRDEEP2MAPPER_FLAGS -s $output2 -t $output1 &> ${output2.prefix}.mapper.log && 24 | rm \$x 25 | ""","miRDeep2Mapper" 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /modules/smallRNAseq/mirDeep2_mapper.header: -------------------------------------------------------------------------------- 1 | miRDeep2Mapper_vars=[ 2 | outdir : RESULTS + "/miRDeep2", 3 | genome_ref: ESSENTIAL_BOWTIE_REF, 4 | extra : "-e " + // please, add a description for this parm 5 | "-h " + // please, add a description for this parm 6 | "-m " + // please, add a description for this parm 7 | "-i " + // please, add a description for this parm 8 | "-j " + // please, add a description for this parm 9 | "-o 8" // please, add a description for this parm 10 | ] 11 | 12 | load PIPELINE_ROOT + "/modules/smallRNAseq/mirDeep2_mapper.groovy" 13 | 14 | -------------------------------------------------------------------------------- /modules/smallRNAseq/subread.groovy: -------------------------------------------------------------------------------- 1 | subread_count = { 2 | doc title: "subread_count", 3 | desc: "Counting reads in features with feature-count out of the subread package", 4 | constraints: "Default: strand specific counting.", 5 | author: "Oliver Drechsel, Antonio Domingues, Anke Busch" 6 | 7 | var subdir : "" 8 | output.dir = subread_count_vars.outdir + "/$subdir" 9 | 10 | def SUBREAD_FLAGS = 11 | "--donotsort " + 12 | (subread_count_vars.threads ? " -T " + subread_count_vars.threads : "") + 13 | (subread_count_vars.genesgtf ? " -a " + subread_count_vars.genesgtf : "") + 14 | (subread_count_vars.count_multimapping ? " -M " : "") + 15 | (subread_count_vars.count_ambiguous ? " -O " : "") + 16 | (subread_count_vars.feature ? " -t " + subread_count_vars.feature : "") + 17 | (subread_count_vars.attribute? " -g " + subread_count_vars.attribute: "") + 18 | (subread_count_vars.extra ? " " + subread_count_vars.extra : "") + 19 | (subread_count_vars.stranded == "no" ? " -s0 " : (subread_count_vars.stranded == "yes" ? " -s1 " : " -s2 ")) 20 | 21 | def TOOL_ENV = prepare_tool_env("subread", tools["subread"]["version"], tools["subread"]["runenv"]) 22 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 23 | 24 | // run the chunk 25 | transform(".bam") to (".raw_readcounts.tsv") { 26 | exec """ 27 | ${TOOL_ENV} && 28 | ${PREAMBLE} && 29 | 30 | featureCounts $SUBREAD_FLAGS -o $output $input 2> ${output.prefix}_subreadlog.stderr 31 | ""","subread_count" 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /modules/smallRNAseq/subread.header: -------------------------------------------------------------------------------- 1 | subread_count_vars=[ 2 | outdir : RESULTS + "/subread-count", 3 | stranded : ESSENTIAL_STRANDED, // whether the data is from a strand-specific assay (illumina SR: always reverse) 4 | count_multimapping: true, // also count multi-mapping reads 5 | count_ambiguous : true, // also count ambiguous reads 6 | feature : "exon", // annotation feature to count mapped reads ("exon" by default) 7 | attribute : "gene_id", // annotation attribute type used to group features ("gene_id" by default) 8 | genesgtf : ESSENTIAL_GENESGTF, 9 | threads : Integer.toString(ESSENTIAL_THREADS), 10 | extra : " " // extra params to sent to the tool 11 | ] 12 | 13 | load PIPELINE_ROOT + "/modules/smallRNAseq/subread.groovy" 14 | 15 | // expected parameter types 16 | class subread_count_vars_schema { 17 | String outdir 18 | String stranded 19 | Boolean count_multimapping 20 | Boolean count_ambiguous 21 | String feature 22 | String attribute 23 | String genesgtf 24 | String threads 25 | String extra 26 | 27 | // check for the presence of mandatory params 28 | boolean asBoolean() { 29 | outdir && genesgtf && stranded 30 | } 31 | } 32 | 33 | validate_schema(subread_count_vars_schema, subread_count_vars) 34 | -------------------------------------------------------------------------------- /modules/smallRNAseq/subread2rnatypes.header: -------------------------------------------------------------------------------- 1 | subread2rnatypes_vars=[ 2 | outdir : QC + "/RNAtypes", 3 | stranded : ESSENTIAL_STRANDED, //whether the data is from a strand-specific assay (illumina SR: always reverse) 4 | paired : (ESSENTIAL_PAIRED == "yes"), //paired end design 5 | genesgtf : ESSENTIAL_GENESGTF, 6 | feature : "exon", // type of feature that is to be counted in 7 | accumulate: ESSENTIAL_FEATURETYPE, // type of annotation counts should be accumulated on. Usually that would be gene_id, but in this case we choose gene_biotype 8 | threads : Integer.toString(ESSENTIAL_THREADS), 9 | extra : "-M -O " // extra parms to sent to the tool (-M also count multi-mapping reads, -O also count ambigiuous reads) 10 | ] 11 | 12 | load PIPELINE_ROOT + "/modules/RNAseq/subread2rnatypes.groovy" 13 | 14 | -------------------------------------------------------------------------------- /modules/smallRNAseq/subread_mirnamature.groovy: -------------------------------------------------------------------------------- 1 | subread_miRNAmature_count = { 2 | doc title: "subread_miRNAmature_count", 3 | desc: "Counting reads on mature miRNAs with featurecount of the subread package", 4 | constraints: "miRNA gff (from miRBase) needs to be available.", 5 | author: "Anke Busch" 6 | 7 | var subdir : "" 8 | output.dir = subread_miRNAmature_count_vars.outdir + "/$subdir" 9 | 10 | def SUBREAD_MIRNAMATURE_FLAGS = 11 | "--donotsort " + 12 | (subread_miRNAmature_count_vars.threads ? " -T " + subread_miRNAmature_count_vars.threads : "") + 13 | (subread_miRNAmature_count_vars.genesgff ? " -a " + subread_miRNAmature_count_vars.genesgff : "") + 14 | (subread_miRNAmature_count_vars.count_multimapping ? " -M " : "") + 15 | (subread_miRNAmature_count_vars.feature ? " -t " + subread_miRNAmature_count_vars.feature : "") + 16 | (subread_miRNAmature_count_vars.attribute? " -g " + subread_miRNAmature_count_vars.attribute: "") + 17 | (subread_miRNAmature_count_vars.extra ? " " + subread_miRNAmature_count_vars.extra : "") + 18 | (subread_miRNAmature_count_vars.stranded == "no" ? " -s0 " : (subread_miRNAmature_count_vars.stranded == "yes" ? " -s1 " : " -s2 ")) 19 | 20 | def TOOL_ENV = prepare_tool_env("subread", tools["subread"]["version"], tools["subread"]["runenv"]) 21 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 22 | 23 | // run the chunk 24 | transform(".bam") to (".miRNAmature.raw_readcounts.tsv") { 25 | exec """ 26 | ${TOOL_ENV} && 27 | ${PREAMBLE} && 28 | 29 | featureCounts $SUBREAD_MIRNAMATURE_FLAGS -o $output $input 2> ${output.prefix}_subreadlog.stderr 30 | ""","subread_miRNAmature_count" 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /modules/smallRNAseq/subread_mirnamature.header: -------------------------------------------------------------------------------- 1 | subread_miRNAmature_count_vars=[ 2 | outdir : RESULTS + "/subread-count", 3 | stranded : ESSENTIAL_STRANDED, // whether the data is from a strand-specific assay (illumina SR: always reverse) 4 | count_multimapping: true, // also count multi-mapping reads 5 | feature : "miRNA", // annotation feature to count mapped reads ("exon" by default) 6 | attribute : "Name", // annotation attribute type used to group features ("gene_id" by default) 7 | genesgff : ESSENTIAL_MIRNAGFF, 8 | threads : Integer.toString(ESSENTIAL_THREADS), 9 | extra : " " // extra params to sent to the tool 10 | ] 11 | 12 | load PIPELINE_ROOT + "/modules/smallRNAseq/subread_mirnamature.groovy" 13 | 14 | // expected parameter types 15 | class subread_miRNAmature_count_vars_schema { 16 | String outdir 17 | String stranded 18 | Boolean count_multimapping 19 | String feature 20 | String attribute 21 | String genesgff 22 | String threads 23 | String extra 24 | 25 | // check for the presence of mandatory params 26 | boolean asBoolean() { 27 | outdir && genesgff && stranded 28 | } 29 | } 30 | 31 | validate_schema(subread_miRNAmature_count_vars_schema, subread_miRNAmature_count_vars) 32 | -------------------------------------------------------------------------------- /modules/smallRNAseq/trim_umis.groovy: -------------------------------------------------------------------------------- 1 | TrimUMIs = { 2 | doc title: "Trim UMIs", 3 | desc: """Trims random barcodes that help in the identification of PCR duplicates and are in adapter-removed reads: NNNN-insert-NNNN.""", 4 | constraints: "Requires seqtk.", 5 | author: "Antonio Domingues, Anke Busch" 6 | 7 | output.dir = TrimUMIs_vars.outdir 8 | 9 | def TRIMFQ_FLAGS = 10 | (TrimUMIs_vars.left_trim ? " -b " + TrimUMIs_vars.left_trim : "") + 11 | (TrimUMIs_vars.right_trim ? " -e " + TrimUMIs_vars.right_trim : "") 12 | 13 | def TOOL_ENV = prepare_tool_env("seqtk", tools["seqtk"]["version"], tools["seqtk"]["runenv"]) 14 | def PREAMBLE = get_preamble(stage:stageName, outdir:output.dir, input:new File(input1.prefix).getName()) 15 | 16 | transform(".fastq.gz") to (".trimmed.fastq.gz") { 17 | exec """ 18 | ${TOOL_ENV} && 19 | ${PREAMBLE} && 20 | 21 | seqtk trimfq $TRIMFQ_FLAGS $input | gzip > $output 22 | ""","TrimUMIs" 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /modules/smallRNAseq/trim_umis.header: -------------------------------------------------------------------------------- 1 | TrimUMIs_vars=[ 2 | outdir : TRIMMED, 3 | left_trim : ESSENTIAL_UMI_LENGTH_LEFT, 4 | right_trim: ESSENTIAL_UMI_LENGTH_RIGHT 5 | ] 6 | 7 | load PIPELINE_ROOT + "/modules/smallRNAseq/trim_umis.groovy" 8 | 9 | -------------------------------------------------------------------------------- /pipelines/ChIPseq/contrasts_diffbind.txt: -------------------------------------------------------------------------------- 1 | contrast.name contrast mmatrix sub_experiment 2 | mutvswt (mut_Pol2-wt_Pol2) ~group 1 3 | -------------------------------------------------------------------------------- /pipelines/ChIPseq/targets.txt: -------------------------------------------------------------------------------- 1 | IP IPname INPUT INPUTname group Replicate PeakCaller 2 | wt_Pol2_1 wt_Pol2_1 wt_Input_1 wt_Input_1 wt_Pol2 1 macs 3 | wt_Pol2_2 wt_Pol2_2 wt_Input_2 wt_Input_2 wt_Pol2 2 macs 4 | mut_Pol2_1 mut_Pol2_1 mut_Input_1 mut_Input_1 mut_Pol2 1 macs 5 | mut_Pol2_2 mut_Pol2_2 mut_Input_2 mut_Input_2 mut_Pol2 2 macs 6 | -------------------------------------------------------------------------------- /pipelines/ChIPseq/tools.groovy: -------------------------------------------------------------------------------- 1 | // Tools custom versions and run environments 2 | // Overrides the defaults defined in ${PIPELINE_ROOT}/config/tools.groovy 3 | // Names should match tools_defaults.keys() in ${PIPELINE_ROOT}/config/tools.groovy 4 | // 5 | // The structure of this map is: 6 | // tools_custom = [ 7 | // R : [ runenv: "lmod", version: "3.6.0" ], 8 | // <...> 9 | // samtools: [ runenv: "lmod", version: "1.9" ] 10 | // ] 11 | // 12 | // Tips: 13 | // * Indentation is important in this file. Please, use 4 spaces for indent. *NO TABS*. 14 | load PIPELINE_ROOT + "/config/tools.groovy" // tools_defaults are specified here 15 | tools_custom = [ ] 16 | 17 | tools = new LinkedHashMap(tools_defaults) // create new tools map based on defaults 18 | tools.putAll(tools_custom) // override with users custom versions/runenvs 19 | -------------------------------------------------------------------------------- /pipelines/DNAseq/targets.txt: -------------------------------------------------------------------------------- 1 | sample file 2 | sample_1 filename1 3 | sample_2 filename2 4 | 5 | -------------------------------------------------------------------------------- /pipelines/DNAseq/tools.groovy: -------------------------------------------------------------------------------- 1 | // Tools custom versions and run environments 2 | // Overrides the defaults defined in ${PIPELINE_ROOT}/config/tools.groovy 3 | // Names should match tools_defaults.keys() in ${PIPELINE_ROOT}/config/tools.groovy 4 | // 5 | // The structure of this map is: 6 | // tools_custom = [ 7 | // R : [ runenv: "lmod", version: "3.6.0" ], 8 | // <...> 9 | // samtools: [ runenv: "lmod", version: "1.9" ] 10 | // ] 11 | // 12 | // Tips: 13 | // * Indentation is important in this file. Please, use 4 spaces for indent. *NO TABS*. 14 | load PIPELINE_ROOT + "/config/tools.groovy" // tools_defaults are specified here 15 | tools_custom = [ ] 16 | 17 | tools = new LinkedHashMap(tools_defaults) // create new tools map based on defaults 18 | tools.putAll(tools_custom) // override with users custom versions/runenvs 19 | -------------------------------------------------------------------------------- /pipelines/RNAseq/contrasts.txt: -------------------------------------------------------------------------------- 1 | contrast.name contrast mmatrix 2 | KO.vs.WT (KO-WT) ~group 3 | -------------------------------------------------------------------------------- /pipelines/RNAseq/targets.txt: -------------------------------------------------------------------------------- 1 | sample file group replicate 2 | mut_1 mut_1_f.readcounts.tsv mut 1 3 | mut_2 mut_2_f.readcounts.tsv mut 2 4 | wt_1 wt_1_f.readcounts.tsv wt 1 5 | wt_2 wt_2_f.readcounts.tsv wt 2 6 | -------------------------------------------------------------------------------- /pipelines/RNAseq/tools.groovy: -------------------------------------------------------------------------------- 1 | // Tools custom versions and run environments 2 | // Overrides the defaults defined in ${PIPELINE_ROOT}/config/tools.groovy 3 | // Names should match tools_defaults.keys() in ${PIPELINE_ROOT}/config/tools.groovy 4 | // 5 | // The structure of this map is: 6 | // tools_custom = [ 7 | // R : [ runenv: "lmod", version: "3.6.0" ], 8 | // <...> 9 | // samtools: [ runenv: "lmod", version: "1.9" ] 10 | // ] 11 | // 12 | // Tips: 13 | // * Indentation is important in this file. Please, use 4 spaces for indent. *NO TABS*. 14 | load PIPELINE_ROOT + "/config/tools.groovy" // tools_defaults are specified here 15 | tools_custom = [ ] 16 | 17 | tools = new LinkedHashMap(tools_defaults) // create new tools map based on defaults 18 | tools.putAll(tools_custom) // override with users custom versions/runenvs 19 | -------------------------------------------------------------------------------- /pipelines/RNAseqVariantCalling/README.md: -------------------------------------------------------------------------------- 1 | **Important NOTE: this is a legacy pipeline using GATK3, which is now obsolete. There's no plans to upgrade the pipeline to GATK4, and it is actually unsupported by BCF. Use the pipeline at your own risk.** 2 | 3 | # SNP calling on RNAseq data pipeline 4 | 5 | ### What it will it do 6 | 7 | Heavily undocumented, but it will identify SNP from RNA-seq data using GATK best practices. Features a 2-step mapping approach with STAR. 8 | 9 | To be Documented at a later stage. 10 | 11 | ### Prerequisites 12 | 13 | - picard tools (> 1.119) 14 | - GenomeAnalysisTK version 3. **Please note it is obsolete** 15 | -------------------------------------------------------------------------------- /pipelines/RNAseqVariantCalling/essential.vars.groovy: -------------------------------------------------------------------------------- 1 | //Pipeline GATK RNA-seq variant calling 2 | ESSENTIAL_PROJECT="/local/scratch1/imb-kettinggr/adomingues/projects/snps-splicing" 3 | ESSENTIAL_STAR_REF="/fsimb/groups/imb-kettinggr/genomes/Danio_rerio/Ensembl/Zv10/Sequence/StarIndex2_4_1d_modified/" 4 | ESSENTIAL_GENOME_REF="/fsimb/groups/imb-kettinggr/genomes/Danio_rerio/Ensembl/Zv10/Sequence/chr_sequences/chr.clean.fa" 5 | ESSENTIAL_VCF_REF="/home/adomingu/imb-kettinggr/genomes/Danio_rerio/Ensembl/Zv10/Annotation/variation/Danio_rerio.vcf.gz" 6 | ESSENTIAL_READLENGTH=101 7 | ESSENTIAL_THREADS=4 8 | 9 | //global vars that will be reused in some global vars 10 | PROJECT=ESSENTIAL_PROJECT 11 | LOGS=PROJECT + "/logs" 12 | MAPPED=PROJECT + "/mapped" 13 | QC=PROJECT + "data/qc" 14 | REPORTS=PROJECT + "/reports" 15 | RESULTS=PROJECT + "/results" 16 | TMP=PROJECT + "/tmp" 17 | TRACKS=MAPPED + "/tracks" 18 | 19 | // optional pipeline stages to include 20 | -------------------------------------------------------------------------------- /pipelines/RNAseqVariantCalling/rnaseq_variant_calling.pipeline.groovy: -------------------------------------------------------------------------------- 1 | PIPELINE="RNAseqVariantCalling" 2 | PIPELINE_VERSION="1.0" 3 | PIPELINE_ROOT="./NGSpipe2go/" // adjust to your projects needs 4 | 5 | load PIPELINE_ROOT + "/pipelines/RNAseqVariantCalling/essential.vars.groovy" 6 | load PIPELINE_ROOT + "/pipelines/RNAseqVariantCalling/tools.groovy" 7 | load PIPELINE_ROOT + "/config/preambles.groovy" 8 | load PIPELINE_ROOT + "/config/bpipe.config.groovy" 9 | load PIPELINE_ROOT + "/config/validate_module_params.groovy" 10 | 11 | load PIPELINE_ROOT + "/modules/NGS/bamindexer.header" 12 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/star1pass.header" 13 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/merge_SJ_tab.header" 14 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/star2pass.header" 15 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/add_read_group.header" 16 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/mark_dups.header" 17 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/splitNcigar.header" 18 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/base_recalibration.header" 19 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/create_star_index_sjdb.header" 20 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/variantCall_HC.header" 21 | load PIPELINE_ROOT + "/modules/RNAseqVariantCalling/variant_filtration.header" 22 | load PIPELINE_ROOT + "/modules/miscellaneous/collect_tool_versions.header" 23 | 24 | //MAIN PIPELINE TASK 25 | dontrun = { println "didn't run $module" } 26 | 27 | Bpipe.run { 28 | "%R*.fastq.gz" * [ STAR_pe ] + 29 | "*.SJ.out.tab" * [ FilterAndMergeSJtab + GenerateStarIndexFromSJ ] + 30 | "%R*.fastq.gz" * [ 31 | STAR_pe_2nd + AddRG + MarkDups + SplitNCigarReads + BaseRecalibration + VariantCallHC + VariantFiltration 32 | ] + collectToolVersions 33 | } 34 | -------------------------------------------------------------------------------- /pipelines/RNAseqVariantCalling/tools.groovy: -------------------------------------------------------------------------------- 1 | // Tools custom versions and run environments 2 | // Overrides the defaults defined in ${PIPELINE_ROOT}/config/tools.groovy 3 | // Names should match tools_defaults.keys() in ${PIPELINE_ROOT}/config/tools.groovy 4 | // 5 | // The structure of this map is: 6 | // tools_custom = [ 7 | // R : [ runenv: "lmod", version: "3.6.0" ], 8 | // <...> 9 | // samtools: [ runenv: "lmod", version: "1.9" ] 10 | // ] 11 | // 12 | // Tips: 13 | // * Indentation is important in this file. Please, use 4 spaces for indent. *NO TABS*. 14 | load PIPELINE_ROOT + "/config/tools.groovy" // tools_defaults are specified here 15 | tools_custom = [ 16 | gatk: [ runenv: "lmod", version: "3.4-46" ] 17 | ] 18 | 19 | tools = new LinkedHashMap(tools_defaults) // create new tools map based on defaults 20 | tools.putAll(tools_custom) // override with users custom versions/runenvs 21 | -------------------------------------------------------------------------------- /pipelines/breaktag/breaktag.pipeline.groovy: -------------------------------------------------------------------------------- 1 | PIPELINE="breaktag" 2 | PIPELINE_VERSION="1.0.0" 3 | PIPELINE_ROOT="./NGSpipe2go" 4 | 5 | load PIPELINE_ROOT + "/pipelines/breaktag/essential.vars.groovy" 6 | load PIPELINE_ROOT + "/pipelines/breaktag/tools.groovy" 7 | load PIPELINE_ROOT + "/config/preambles.groovy" 8 | load PIPELINE_ROOT + "/config/bpipe.config.groovy" 9 | load PIPELINE_ROOT + "/config/validate_module_params.groovy" 10 | 11 | load PIPELINE_ROOT + "/modules/breaktag/pattern_filtering.header" 12 | load PIPELINE_ROOT + "/modules/breaktag/bwa.header" 13 | load PIPELINE_ROOT + "/modules/breaktag/count_breaks.header" 14 | load PIPELINE_ROOT + "/modules/breaktag/count_breaks_strandless.header" 15 | load PIPELINE_ROOT + "/modules/breaktag/collect_stats.header" 16 | load PIPELINE_ROOT + "/modules/NGS/fastqc.header" 17 | load PIPELINE_ROOT + "/modules/NGS/multiqc.header" 18 | load PIPELINE_ROOT + "/modules/miscellaneous/collect_tool_versions.header" 19 | 20 | //MAIN PIPELINE TASK 21 | dontrun = { println "didn't run $module" } 22 | collect_bams = { forward inputs.bam } 23 | 24 | Bpipe.run { 25 | (RUN_IN_PAIRED_END_MODE ? "%.R*.fastq.gz" : "%.fastq.gz") * [ 26 | FastQC, 27 | pattern_filtering + 28 | bwa + 29 | count_breaks + 30 | count_breaks_strandless + 31 | collect_stats 32 | ] + 33 | collectToolVersions + 34 | MultiQC 35 | } 36 | -------------------------------------------------------------------------------- /pipelines/breaktag/essential.vars.groovy: -------------------------------------------------------------------------------- 1 | // breaktag ESSENTIAL VARIABLES 2 | 3 | // Define essential variables here. 4 | // Further module-specific variables can be adjusted in the corresponding ".header" files for each module. 5 | // 6 | 7 | // General parameters 8 | ESSENTIAL_PROJECT="/fsimb/groups/imb-bioinfocf/projects/roukos/imb_roukos_2021_29_longo_breaktag_novogene/ngspipe2go" 9 | ESSENTIAL_SAMPLE_PREFIX="" 10 | ESSENTIAL_THREADS=16 11 | 12 | // Mapping parameters 13 | ESSENTIAL_BWA_REF="/fsimb/common/genomes/homo_sapiens/ucsc/hg38/canonical/index/bwa/hg38.fa" 14 | ESSENTIAL_PAIRED="yes" // paired end design 15 | ESSENTIAL_QUALITY=60 // min mapping quality of reads to be kept. Defaults to 60 16 | 17 | // further optional pipeline stages to include 18 | RUN_IN_PAIRED_END_MODE=(ESSENTIAL_PAIRED == "yes") 19 | 20 | // project folders 21 | PROJECT=ESSENTIAL_PROJECT 22 | LOGS=PROJECT + "/logs" 23 | MAPPED=PROJECT + "/mapped" 24 | QC=PROJECT + "/qc" 25 | RAWDATA=PROJECT + "/rawdata" 26 | REPORTS=PROJECT + "/reports" 27 | RESULTS=PROJECT + "/results" 28 | TMP=PROJECT + "/tmp" 29 | TRACKS=PROJECT + "/tracks" 30 | TARGETS=PROJECT + "/targets.txt" 31 | 32 | -------------------------------------------------------------------------------- /pipelines/breaktag/tools.groovy: -------------------------------------------------------------------------------- 1 | // Tools custom versions and run environments 2 | // Overrides the defaults defined in ${PIPELINE_ROOT}/config/tools.groovy 3 | // Names should match tools_defaults.keys() in ${PIPELINE_ROOT}/config/tools.groovy 4 | // 5 | // The structure of this map is: 6 | // tools_custom = [ 7 | // R : [ runenv: "lmod", version: "3.6.0" ], 8 | // <...> 9 | // samtools: [ runenv: "lmod", version: "1.9" ] 10 | // ] 11 | // 12 | // Tips: 13 | // * Indentation is important in this file. Please, use 4 spaces for indent. *NO TABS*. 14 | load PIPELINE_ROOT + "/config/tools.groovy" // tools_defaults are specified here 15 | tools_custom = [ 16 | // fastqc : [ runenv: "lmod", version: "0.11.8" ] 17 | ] 18 | 19 | tools = new LinkedHashMap(tools_defaults) // create new tools map based on defaults 20 | tools.putAll(tools_custom) // override with users custom versions/runenvs 21 | -------------------------------------------------------------------------------- /pipelines/scRNAseq/contrasts.txt: -------------------------------------------------------------------------------- 1 | contrast.name contrast 2 | Treatment1.vs.Ctrl (Treat1-Ctrl) 3 | Treatment2.vs.Ctrl (Treat2-Ctrl) 4 | -------------------------------------------------------------------------------- /pipelines/scRNAseq/targets.txt: -------------------------------------------------------------------------------- 1 | sample file group replicate 2 | S1 org1_1 organoid1 1 3 | S3 org3_1 organoid3 1 4 | -------------------------------------------------------------------------------- /pipelines/scRNAseq/tenXatac.pipeline.groovy: -------------------------------------------------------------------------------- 1 | PIPELINE="tenXatac" 2 | PIPELINE_VERSION="1.0" 3 | PIPELINE_ROOT="./NGSpipe2go/" // may need adjustment for some projects 4 | 5 | load PIPELINE_ROOT + "/pipelines/scRNAseq/essential.vars.groovy" 6 | load PIPELINE_ROOT + "/pipelines/scRNAseq/tools.groovy" 7 | load PIPELINE_ROOT + "/config/preambles.groovy" 8 | load PIPELINE_ROOT + "/config/bpipe.config.groovy" 9 | load PIPELINE_ROOT + "/config/validate_module_params.groovy" 10 | 11 | load PIPELINE_ROOT + "/modules/scRNAseq/cellrangeratac_count.header" 12 | load PIPELINE_ROOT + "/modules/scRNAseq/cellrangeratac_aggr.header" 13 | load PIPELINE_ROOT + "/modules/NGS/bamcoverage.header" 14 | load PIPELINE_ROOT + "/modules/NGS/bamindexer.header" 15 | load PIPELINE_ROOT + "/modules/NGS/fastqc.header" 16 | load PIPELINE_ROOT + "/modules/NGS/fastqscreen.header" 17 | load PIPELINE_ROOT + "/modules/NGS/markdups2.header" 18 | load PIPELINE_ROOT + "/modules/NGS/insertsize.header" 19 | load PIPELINE_ROOT + "/modules/NGS/cutadapt.header" 20 | load PIPELINE_ROOT + "/modules/miscellaneous/collect_tool_versions.header" 21 | load PIPELINE_ROOT + "/modules/scRNAseq/shinyreports.header" 22 | load PIPELINE_ROOT + "/modules/NGS/multiqc.header" 23 | 24 | 25 | dontrun = { println "didn't run $module" } 26 | 27 | Bpipe.run { 28 | "%.fastq.gz" * [ FastQC + FastqScreen + 29 | (RUN_CUTADAPT ? Cutadapt + FastQC.using(subdir:"trimmed") : dontrun.using(module:"Cutadapt")) ] + 30 | "%_S*_L*_R*_001.fastq.gz" * [ 31 | cellrangeratac_count + [ 32 | bamCoverage, 33 | InsertSize 34 | ] 35 | ] + 36 | cellrangeratac_aggr + 37 | (RUN_TRACKHUB ? trackhub_config + trackhub : dontrun.using(module:"trackhub")) + 38 | collectToolVersions + MultiQC + shinyReports 39 | } 40 | 41 | -------------------------------------------------------------------------------- /pipelines/scRNAseq/tools.groovy: -------------------------------------------------------------------------------- 1 | // Tools custom versions and run environments 2 | // Overrides the defaults defined in ${PIPELINE_ROOT}/config/tools.groovy 3 | // Names should match tools_defaults.keys() in ${PIPELINE_ROOT}/config/tools.groovy 4 | // 5 | // The structure of this map is: 6 | // tools_custom = [ 7 | // R : [ runenv: "lmod", version: "3.6.0" ], 8 | // <...> 9 | // samtools: [ runenv: "lmod", version: "1.9" ] 10 | // ] 11 | // 12 | // Tips: 13 | // * Indentation is important in this file. Please, use 4 spaces for indent. *NO TABS*. 14 | load PIPELINE_ROOT + "/config/tools.groovy" // tools_defaults are specified here 15 | tools_custom = [ ] 16 | 17 | tools = new LinkedHashMap(tools_defaults) // create new tools map based on defaults 18 | tools.putAll(tools_custom) // override with users custom versions/runenvs 19 | -------------------------------------------------------------------------------- /pipelines/smallRNAseq/contrasts.txt: -------------------------------------------------------------------------------- 1 | contrast.name contrast mmatrix 2 | KO.vs.WT (KO-WT) ~group 3 | -------------------------------------------------------------------------------- /pipelines/smallRNAseq/targets.txt: -------------------------------------------------------------------------------- 1 | sample file group replicate 2 | WT_1 datasetID_WT_1 WT 1 3 | WT_2 datasetID_WT_2 WT 2 4 | KO_1 datasetID_KO_1 KO 1 5 | KO_2 datasetID_KO_2 KO 2 6 | -------------------------------------------------------------------------------- /pipelines/smallRNAseq/tools.groovy: -------------------------------------------------------------------------------- 1 | // Tools custom versions and run environments 2 | // Overrides the defaults defined in ${PIPELINE_ROOT}/config/tools.groovy 3 | // Names should match tools_defaults.keys() in ${PIPELINE_ROOT}/config/tools.groovy 4 | // 5 | // The structure of this map is: 6 | // tools_custom = [ 7 | // R : [ runenv: "lmod", version: "3.6.0" ], 8 | // <...> 9 | // samtools: [ runenv: "lmod", version: "1.9" ] 10 | // ] 11 | // 12 | // Tips: 13 | // * Indentation is important in this file. Please, use 4 spaces for indent. *NO TABS*. 14 | load PIPELINE_ROOT + "/config/tools.groovy" // tools_defaults are specified here 15 | tools_custom = [ 16 | bowtie : [ runenv: "lmod", version: "1.3.1" ], 17 | cutadapt : [ runenv: "lmod", version: "4.0" ], 18 | kentutils : [ runenv: "lmod", version: "v385" ], 19 | subread : [ runenv: "lmod", version: "2.0.0" ] 20 | ] 21 | 22 | tools = new LinkedHashMap(tools_defaults) // create new tools map based on defaults 23 | tools.putAll(tools_custom) // override with users custom versions/runenvs 24 | -------------------------------------------------------------------------------- /pipelines/test/README.md: -------------------------------------------------------------------------------- 1 | # test pipeline 2 | Basically does nothing, but serves as a backbone to test the NGSpipe2go modular system 3 | -------------------------------------------------------------------------------- /pipelines/test/test.pipeline.groovy: -------------------------------------------------------------------------------- 1 | PIPELINE="test" 2 | PIPELINE_VERSION="1.0" 3 | PIPELINE_ROOT="./NGSpipe2go/" // may need adjustment for some projects 4 | 5 | // essential vars 6 | PROJECT="/fsimb/imbc_home/ssayolsp/tmp/test" 7 | LOGS=PROJECT + "/logs" 8 | OUT=PROJECT + "/out" 9 | TMP=PROJECT + "/tmp" 10 | 11 | // load external things 12 | load PIPELINE_ROOT + "/config/preambles.groovy" 13 | load PIPELINE_ROOT + "/config/bpipe.config.groovy" 14 | load PIPELINE_ROOT + "/config/validate_module_params.groovy" 15 | 16 | //MAIN PIPELINE TASK 17 | test = { 18 | output.dir = OUT 19 | def branch_outdir = new File(output.dir).getName() 20 | 21 | def PREAMBLE = get_preamble(module:"test", branch:branch, branch_outdir:branch_outdir) 22 | 23 | transform("*.in") to (".out") { 24 | exec """ 25 | >&2 echo "before preamble, logs go to the screen"; 26 | ${PREAMBLE} && 27 | >&2 echo "after preamble, logs go to the corresponding file"; 28 | cat $input > $output; 29 | """ 30 | } 31 | } 32 | 33 | Bpipe.run { "%.in" * [ test ] } 34 | 35 | -------------------------------------------------------------------------------- /resources/IMB_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imbforge/NGSpipe2go/46d835b6e80c92296a9338addb2215f6b26ed487/resources/IMB_logo.png -------------------------------------------------------------------------------- /resources/MARS-Seq_protocol_Step-by-Step_MML.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imbforge/NGSpipe2go/46d835b6e80c92296a9338addb2215f6b26ed487/resources/MARS-Seq_protocol_Step-by-Step_MML.pdf -------------------------------------------------------------------------------- /resources/NGSpipe2go_scheme.old.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imbforge/NGSpipe2go/46d835b6e80c92296a9338addb2215f6b26ed487/resources/NGSpipe2go_scheme.old.png -------------------------------------------------------------------------------- /resources/NGSpipe2go_scheme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imbforge/NGSpipe2go/46d835b6e80c92296a9338addb2215f6b26ed487/resources/NGSpipe2go_scheme.png -------------------------------------------------------------------------------- /testdata/ChIPseq/README.Rmd: -------------------------------------------------------------------------------- 1 | Obtained from the `chpseqDB` Bioconductor [package](http://bioconductor.org/packages/release/workflows/html/chipseqDB.html) 2 | Five NF-YA (nuclear transcription factor Y subunit alpha) ChIP-seq libraries, with two biological replicates for murine terminal neurons, two replicates for embryonic stem cells and one input ontrol. This uses single-end sequencing data from accession GSE25532 of the NCBI Gene Expression Omnibus. 3 | 4 | 5 | ```{r} 6 | if(!require(chipseqDBData)) { 7 | BiocManager::install("chipseqDBData") 8 | require(chipseqDBData) 9 | } 10 | 11 | nfyadata <- NFYAData() 12 | nfyadata$name <- gsub("[- ]", "_", gsub("[\\(\\)]", "", nfyadata$Description)) 13 | nfyadata$path <- sapply(nfyadata$Path, function(x) x$path) 14 | cat("mkdir ./rawdata", file="download_rawdata.sh", fill=TRUE, append=FALSE) 15 | cat("module load samtools", file="download_rawdata.sh", fill=TRUE, append=TRUE) 16 | Map(name=nfyadata$name, path=nfyadata$path, function(name, path) { 17 | cat("samtools bam2fq ", path, " | /fsimb/common/tools/pigz/2.4/pigz -p8 > ./rawdata/", name, ".fastq.gz\n", 18 | file="download_rawdata.sh", sep="", append=TRUE) 19 | }) 20 | ``` 21 | 22 | -------------------------------------------------------------------------------- /testdata/ChIPseq/contrasts_diffbind.txt: -------------------------------------------------------------------------------- 1 | contrast.name contrast mmatrix sub_experiment 2 | NF_YA_ESCvsNF_YA_TN (NF_YA_ESC-NF_YA_TN) ~group 1 3 | -------------------------------------------------------------------------------- /testdata/ChIPseq/targets.txt: -------------------------------------------------------------------------------- 1 | IP IPname INPUT INPUTname group Replicate PeakCaller 2 | NF_YA_ESC_1 NF_YA_ESC_1 Input Input NF_YA_ESC 1 macs 3 | NF_YA_ESC_2 NF_YA_ESC_2 Input Input NF_YA_ESC 2 macs 4 | NF_YA_TN_1 NF_YA_TN_1 Input Input NF_YA_TN 1 macs 5 | NF_YA_TN_2 NF_YA_TN_2 Input Input NF_YA_TN 2 macs 6 | -------------------------------------------------------------------------------- /testdata/DNASeq/README.md: -------------------------------------------------------------------------------- 1 | Run this bash code to generate 5M 101bp of PE reads for a reference (eg. Yeast), 2 replicates x 2 conditions. 2 | 3 | ```sh 4 | #!/bin/bash 5 | # 6 | set -euo pipefail 7 | 8 | export READ_LEN=101 9 | export FRAG_LEN=500 10 | export NUM_READS=5000000 11 | export ERR_RATE=0.001 12 | export MUT_RATE=0.0001 13 | export INDEL_RATE=0.15 14 | export INDEL_EXTEND_RATE=0.3 15 | 16 | export REF="/fsimb/groups/imb-bioinfocf/common-data/GATK_resources/gatk_bundle_hg38_v0/Homo_sapiens_assembly38.fasta" 17 | export RAWDATA="./rawdata" 18 | 19 | CORES=4 20 | 21 | function f { 22 | REF=$1 23 | BASE=$2 24 | REPL=${RAWDATA}/${BASE}_$3 25 | SEED=$3 26 | 27 | echo "replicate $REPL using ref $REF" 28 | wgsim \ 29 | -1${READ_LEN} \ 30 | -2${READ_LEN} \ 31 | -d${FRAG_LEN} \ 32 | -N${NUM_READS} \ 33 | -e${ERR_RATE} \ 34 | -r${MUT_RATE} \ 35 | -R${INDEL_RATE} \ 36 | -X${INDEL_EXTEND_RATE} \ 37 | -S${SEED} \ 38 | ${REF} ${REPL}.R1.fastq ${REPL}.R2.fastq | gzip > ${REPL}_sim.txt.gz 39 | 40 | gzip ${REPL}.R1.fastq ${REPL}.R2.fastq 41 | } 42 | export -f f 43 | 44 | # Generate 2 replicates for 2 conditions. Both from the same reference, thus they'll have no differences. 45 | # Use a modified reference for treated, if you wanna introduce changes 46 | parallel --xapply -j $CORES "f {1} {2} {3}" ::: "$REF" ::: control treated ::: 1 1 2 2 47 | ``` 48 | -------------------------------------------------------------------------------- /testdata/RNAseq/README.Rmd: -------------------------------------------------------------------------------- 1 | Obtained from the `yeastRNASeq` Bioconductor [package](https://bioconductor.org/packages/release/data/experiment/html/yeastRNASeq.html) 2 | The subset of the data which this package contains is more specifically 3 | data from a wild-type and a single mutant yeast. For each condition (mutant, 4 | wild-type) there is two lanes worth of data, each lane containing a sample of 5 | 500,000 raw (unaligned) reads from each of 2 lanes each. 6 | 7 | ```{r} 8 | if(!require(yeastRNASeq)) { 9 | BiocManager::install("yeastRNASeq") 10 | require(yeastRNASeq) 11 | } 12 | 13 | files <- list.files(file.path(system.file(package = "yeastRNASeq"), "reads"), full.names=TRUE) 14 | files.fastq.gz <- files[grepl("\\.fastq\\.gz$", files)] 15 | dir.create("./rawdata", showWarnings=FALSE) 16 | sapply(files.fastq.gz, file.copy, to="./rawdata/") 17 | ``` 18 | 19 | In order to execute the pipeline on test data, you may follow the steps described [here](https://gitlab.rlp.net/imbforge/NGSpipe2go). However, you would have to create a symlink for the files located in the testdata folder like 20 | 21 | ln -s NGSpipe2go/testdata/RNAseq/* . 22 | ln -s NGSpipe2go/pipelines/RNAseq/rnaseq.pipeline.groovy . 23 | 24 | Please ensure to make the following modifications, in order for the test run to work: 25 | 26 | - Change the _ESSENTIAL_PROJECT_ variable in the _essential.vars.groovy_ file 27 | - Adjust the _rnaseq.pipeline.groovy_ file to reflect the location of the correct _essential.vars.groovy_ file 28 | - Adjust the _target_ variable in the _shinyreports.header_ file located at NGSpipe2go/modules/RNAseq/ folder 29 | 30 | Now you should be able to run the pipeline successfully. 31 | -------------------------------------------------------------------------------- /testdata/RNAseq/contrasts.txt: -------------------------------------------------------------------------------- 1 | contrast.name contrast mmatrix 2 | mut.vs.wt (mut-wt) ~group 3 | -------------------------------------------------------------------------------- /testdata/RNAseq/targets.txt: -------------------------------------------------------------------------------- 1 | sample file group replicate 2 | mut_1 mut_1.readcounts.tsv mut 1 3 | mut_2 mut_2.readcounts.tsv mut 2 4 | wt_1 wt_1.readcounts.tsv wt 1 5 | wt_2 wt_2.readcounts.tsv wt 2 6 | -------------------------------------------------------------------------------- /testdata/RNAseqVariantCalling/essential.vars.groovy: -------------------------------------------------------------------------------- 1 | //Pipeline GATK RNA-seq variant calling 2 | ESSENTIAL_PROJECT="/tmp/ngspipe2go_rnaseqvariantcalling_test" 3 | ESSENTIAL_STAR_REF="/tmp/ngspipe2go_rnaseqvariantcalling_test/ref/" 4 | ESSENTIAL_GENOME_REF="/tmp/ngspipe2go_rnaseqvariantcalling_test/ref/ref.fa" 5 | ESSENTIAL_VCF_REF="/tmp/ngspipe2go_rnaseqvariantcalling_test/knowVariants.vcf" 6 | ESSENTIAL_READLENGTH=101 7 | ESSENTIAL_THREADS=4 8 | 9 | //global vars that will be reused in some global vars 10 | PROJECT=ESSENTIAL_PROJECT 11 | LOGS=PROJECT + "/logs" 12 | MAPPED=PROJECT + "/mapped" 13 | QC=PROJECT + "data/qc" 14 | REPORTS=PROJECT + "/reports" 15 | RESULTS=PROJECT + "/results" 16 | TMP=PROJECT + "/tmp" 17 | TRACKS=MAPPED + "/tracks" 18 | 19 | // optional pipeline stages to include 20 | -------------------------------------------------------------------------------- /testdata/RNAseqVariantCalling/knowVariants.vcf.idx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/imbforge/NGSpipe2go/46d835b6e80c92296a9338addb2215f6b26ed487/testdata/RNAseqVariantCalling/knowVariants.vcf.idx -------------------------------------------------------------------------------- /testdata/smallRNAseq_BCF_miRNA/miRNA.essential.vars.groovy: -------------------------------------------------------------------------------- 1 | //Pipeline generated with command line: ./imb-pip.pl --tasks-pip=1 --force 2 | //By: ssayolsp At: Fr 17 Okt 2014 17:12:41 CEST 3 | // 4 | // REMEMBER TO CHANGE THESE ESSENTIAL VARS!! 5 | // 6 | ESSENTIAL_PROJECT="./" 7 | ESSENTIAL_BOWTIE_REF="./ref/mmu" 8 | ESSENTIAL_GENOME_REF="./ref/mmu.fa" 9 | 10 | ESSENTIAL_GENESGTF="./ref/mmu.gtf" 11 | ESSENTIAL_RRNA_BOWTIE_REF="./ref/rrna" 12 | 13 | ESSENTIAL_SPECIES="Mouse" // necessary for miRDeep2, used to refer to UCSC 14 | ESSENTIAL_SAMPLE_PREFIX="" 15 | ESSENTIAL_FEATURETYPE="gene_biotype" //gencode uses gene_type; ensemble uses gene_biotype 16 | ESSENTIAL_PAIRED="no" // paired end design 17 | ESSENTIAL_STRANDED="yes" // strandness: no|yes|reverse 18 | ESSENTIAL_THREADS=4 // number of threads for parallel tasks 19 | 20 | ESSENTIAL_READLENGTH=51 // actual read length in original raw data (incl. insert, UMIs, adapter) 21 | ESSENTIAL_MINADAPTEROVERLAP=5 // minimal overlap with adapter 22 | ESSENTIAL_MINREADLENGTH=15 // remaining read length plus UMIs (2x4) 23 | ESSENTIAL_UMI_LENGTH=4 // (2x4bp) 24 | ESSENTIAL_ADAPTER_SEQUENCE="TGGAATTCTCGGGTGCCAAGG" // needed for cutadapt adapter trimming 25 | 26 | // vars for mirDeep2 27 | ESSENTIAL_MATURE_MIRNA="./ref/mature.fa" 28 | ESSENTIAL_HAIRPIN_MIRNA="./ref/hairpin.fa" 29 | 30 | //global vars that will be reused in some global vars 31 | PROJECT=ESSENTIAL_PROJECT 32 | LOGS=PROJECT + "/logs" 33 | QC=PROJECT + "/qc" 34 | REPORTS=PROJECT + "/reports" 35 | RESULTS=PROJECT + "/results" 36 | PROCESSED=PROJECT + "/rawdata_processed" 37 | MAPPED=PROJECT + "/mapped" 38 | TMP=PROJECT + "/tmp" 39 | TRACKS=PROJECT + "/tracks" 40 | 41 | // optional pipeline stages to include 42 | 43 | -------------------------------------------------------------------------------- /testdata/smallRNAseq_BCF_smallrnaseq/README.md: -------------------------------------------------------------------------------- 1 | Get mouse microRNA experiment from [GSE57138](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE57138). 2 | Groenendyk J, Fan X, Peng Z, Ilnytskyy Y et al. Genome-wide analysis of thapsigargin-induced microRNAs and their targets in NIH3T3 cells. Genom Data 2014 Dec;2:325-7. PMID: 26484121 3 | 4 | ```bash 5 | ml sratoolkit bowtie 6 | 7 | # get rawdata 8 | mkdir rawdata 9 | parallel --xapply -j4 "fastq-dump --stdout {1} | gzip > rawdata/{2}.fastq.gz" ::: SRR1269676 SRR1269677 SRR1269678 SRR1269679 ::: control1 control2 thapsigargin1 thapsigargin2 10 | 11 | # get mouse reference & annotation 12 | mkdir ref 13 | wget -qO- ftp://ftp.ensembl.org/pub/release-98/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna.chromosome.19.fa.gz | gzip -cd > ref/mmu_chr19.fa 14 | bowtie-build --threads 8 ref/mmu_chr19.fa ref/mmu_chr19 15 | wget -qO- ftp://ftp.ensembl.org/pub/release-98/gtf/mus_musculus/Mus_musculus.GRCm38.98.gtf.gz | gzip -cd | grep "^19"> ref/mmu_chr19.gtf 16 | 17 | # rrna reference 18 | Rscript -e 'biomaRt::exportFASTA(biomaRt::getBM(filters="biotype", values="rRNA", attributes=c("gene_exon_intron", "ensembl_gene_id"), mart=biomaRt::useEnsembl("ensembl", dataset="mmusculus_gene_ensembl")), "ref/rrna.fa")' 19 | bowtie-build --threads 8 ref/rrna.fa ref/rrna 20 | ``` 21 | -------------------------------------------------------------------------------- /testdata/smallRNAseq_BCF_smallrnaseq/smallrnaseq.essential.vars.groovy: -------------------------------------------------------------------------------- 1 | //Pipeline generated with command line: ./imb-pip.pl --tasks-pip=1 --force 2 | //By: ssayolsp At: Fr 17 Okt 2014 17:12:41 CEST 3 | // 4 | // REMEMBER TO CHANGE THESE ESSENTIAL VARS!! 5 | // 6 | ESSENTIAL_PROJECT="./" 7 | ESSENTIAL_BOWTIE_REF="./ref/mmu_chr19" 8 | ESSENTIAL_GENOME_REF="./ref/mmu_chr19.fa" 9 | 10 | ESSENTIAL_GENESGTF="./ref/mmu_chr19.gtf" 11 | ESSENTIAL_RRNA_BOWTIE_REF="./ref/rrna" 12 | 13 | ESSENTIAL_SPECIES="Mouse" // necessary for miRDeep2 and fastqscreen 14 | ESSENTIAL_SAMPLE_PREFIX="" 15 | ESSENTIAL_FEATURETYPE="gene_biotype" //gencode uses gene_type; ensemble uses gene_biotype 16 | ESSENTIAL_PAIRED="no" // paired end design 17 | ESSENTIAL_STRANDED="yes" // strandness: no|yes|reverse 18 | ESSENTIAL_THREADS=4 // number of threads for parallel tasks 19 | 20 | ESSENTIAL_READLENGTH=51 // actual read length in original raw data (incl. insert, UMIs, adapter) 21 | ESSENTIAL_MINADAPTEROVERLAP=5 // minimal overlap with adapter 22 | ESSENTIAL_MINREADLENGTH=26 // remaining read length plus UMIs (2x4) 23 | ESSENTIAL_UMI_LENGTH=8 // (2x4bp) 24 | ESSENTIAL_ADAPTER_SEQUENCE="TGGAATTCTCGGGTGCCAAGG" // needed for cutadapt adapter trimming 25 | 26 | //global vars that will be reused in some global vars 27 | PROJECT=ESSENTIAL_PROJECT 28 | LOGS=PROJECT + "/logs" 29 | QC=PROJECT + "/qc" 30 | REPORTS=PROJECT + "/reports" 31 | RESULTS=PROJECT + "/results" 32 | PROCESSED=PROJECT + "/rawdata_processed" 33 | MAPPED=PROJECT + "/mapped" 34 | TMP=PROJECT + "/tmp" 35 | TRACKS=PROJECT + "/tracks" 36 | 37 | // optional pipeline stages to include 38 | 39 | -------------------------------------------------------------------------------- /testdata/tenx_ATAC/README.md: -------------------------------------------------------------------------------- 1 | Use the test data from 10X Genomics `cellranger-atac` program. 2 | 3 | The tiny reference and sample FASTQ data can be found in the `external/atac_testrun_ref` and `external/cellranger_atac_tiny_fastq` subfolders of the installation folder (as of v2.0.0). 4 | 5 | The sample FASTQs should be placed in the `rawdata` subfolder of the test project folder, and the `essential.vars.groovy` file edited to point to the relevant reference and raw data folders. 6 | 7 | Note that the genes GTF file is compressed with cellranger-atac (`genes/genes.gtf.gz`). This is not important as no NGSpipe2go pipeline modules use it in this pipeline. 8 | 9 | -------------------------------------------------------------------------------- /testdata/tenx_GEX/README.md: -------------------------------------------------------------------------------- 1 | Use the test data from 10X Genomics `cellranger` program. 2 | 3 | The tiny reference and sample FASTQ data can be found in the `external/cellranger_tiny_ref` and `external/cellranger_tiny_fastq` subfolders of the installation folder (as of v6.0.0). 4 | 5 | The sample FASTQs should be placed in the `rawdata` subfolder of the test project folder, and the `essential.vars.groovy` file edited to point to the relevant reference and raw data folders. 6 | 7 | Note that for the test data associated with cellranger (v6.0.0), the `ESSENTIAL_FEATURETYPE` variable should be set to the (ENSEMBL-associated) `gene_biotype`, not the (GENCODE-associated) `gene_type`. The full human and mouse 10X datasets, as well as the test datasets for cellranger-atac and cellranger-arc, use the GENCODE-style `gene_type`. 8 | 9 | -------------------------------------------------------------------------------- /testdata/tenx_multiome/README.md: -------------------------------------------------------------------------------- 1 | Use the test data from 10X Genomics `cellranger-arc` program. 2 | 3 | The tiny reference and sample FASTQ data can be found in the `external/arc_testrun_files` subfolder of the installation folder (as of v2.0.0). 4 | 5 | The sample FASTQs should be placed in the `rawdata` subfolder of the test project folder, and the `essential.vars.groovy` file edited to point to the relevant reference and raw data folders. 6 | 7 | Note that the genes GTF file is compressed with cellranger-arc (`genes/genes.gtf.gz`). The `qualimap` tool requires uncompressed GTF files, so an uncompressed version needs to be available for it, and this is the one that should be set in the `essential.vars.groovy` file. The `geneBodyCov2` tool also uses the GTF file, but it can read the compressed version as well. 8 | 9 | -------------------------------------------------------------------------------- /tools/breaktag/umi_filtering.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import sys 4 | import csv 5 | import numpy as np 6 | 7 | filename = sys.argv[1] 8 | 9 | with open(filename, 'r') as f: 10 | reader = csv.reader(f) 11 | data = list(reader) 12 | 13 | # GROUP TOGETHER CONSECUTIVE IDENTICAL UMIS, ASSUMING THEIR PROXIMITY 14 | row_old = data[0] 15 | data_aggegated_by_umi_identity = [row_old] 16 | for row in data[1:]: 17 | if (row_old[0]==row[0] and row_old[3]==row[3] and row_old[4]==row[4]): 18 | if int(row[5]) >= int(row_old[5]): 19 | row[5] = str(int(row[5])+int(row_old[5])) 20 | else: 21 | row[5] = str(int(row[5])+int(row_old[5])) 22 | row[1:3] = row_old[1:3] 23 | del data_aggegated_by_umi_identity[-1] 24 | data_aggegated_by_umi_identity.append(row) 25 | row_old = row 26 | 27 | # GROUP TOGETHER CLOSE SPATIAL CONSECUTIVE READS WHOSE UMI DIFFERS AT MOST BY 2 MISMATCHES 28 | row_old = data_aggegated_by_umi_identity[0] 29 | data_aggegated_by_umi_similarity = [row_old] 30 | space_gap = 30 31 | mm_gap = 2 32 | for row in data_aggegated_by_umi_identity[1:]: 33 | s1 = row_old[4] 34 | s2 = row[4] 35 | numb_mismatches = sum(c1!=c2 for c1,c2 in zip(s1,s2)) 36 | dist = abs(int(row[1])-int(row_old[1])) 37 | if (row_old[0]==row[0] and dist<=space_gap and row_old[3]==row[3] and numb_mismatches<=mm_gap): 38 | if int(row[5]) >= int(row_old[5]): 39 | row[5] = str(int(row[5])+int(row_old[5])) 40 | else: 41 | row[5] = str(int(row[5])+int(row_old[5])) 42 | row[1:3] = row_old[1:3] 43 | row[4] = row_old[4] 44 | del data_aggegated_by_umi_similarity[-1] 45 | data_aggegated_by_umi_similarity.append(row) 46 | row_old = row 47 | 48 | for item in data_aggegated_by_umi_similarity: 49 | print('\t'.join(item)) 50 | -------------------------------------------------------------------------------- /tools/collectBpipeLogs/collectBpipeLogs.sh: -------------------------------------------------------------------------------- 1 | PROJECT=$1 2 | LOGS=$2 3 | 4 | # iterate through .bpipe outputs to pick up the run IDs of all finished tasks 5 | # copy all log files of the finished tasks out of the commandtmp folder 6 | for f in ${PROJECT}/.bpipe/outputs/*; 7 | do 8 | echo $f 9 | F=$(basename $f) 10 | #echo "FILEname " $F 11 | JOB=$(echo $F | cut -d. -f1) 12 | #echo "JOB " ${JOB} 13 | ID=$(grep -E "^commandId" $f | cut -d= -f2) 14 | #echo "ID " $ID 15 | FILE=$(grep -E "^outputFile" $f|cut -d= -f2) 16 | FILE=$(basename ${FILE}) 17 | echo "JOB: ${JOB}, ID: ${ID}, FILE: ${FILE}, DIR: ${PROJECT}" 18 | if [ ! -d "${LOGS}/${JOB}" ]; 19 | then 20 | echo "mkdir ${LOGS}/${JOB}" 21 | mkdir -p ${LOGS}/${JOB} 22 | fi 23 | 24 | if [ -e ${PROJECT}/.bpipe/commandtmp/${ID}/${ID}.err ]; 25 | then 26 | echo "${PROJECT}/.bpipe/commandtmp/${ID}/${ID}.err --> ${LOGS}/${JOB}/${FILE}.log" 27 | cp -v ${PROJECT}/.bpipe/commandtmp/${ID}/${ID}.err ${LOGS}/${JOB}/${FILE}.log 28 | # cp -v ${PROJECT}/.bpipe/commandtmp/${ID}/${ID}.out ${LOGS}/${JOB}/${FILE}.out 29 | fi 30 | done 31 | -------------------------------------------------------------------------------- /tools/dedup/remove_duplicates_with_stats.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | input=${1} 4 | output=${input/.highQ.fastq.gz/.deduped_barcoded.fastq.gz} 5 | highQ=${input%.highQ.fastq.gz}".highQ" 6 | unique=${input%.highQ.fastq.gz}".unique" 7 | 8 | zcat ${input} | paste -d, - - - - | tee >(awk -v var="$highQ" 'END {print NR,var}' >> dedup.stats.txt) | sort -u -t, -k2,2 | tee >(awk -v var="$uniq" 'END {print NR,var}' >> dedup.stats.txt) | tr ',' '\\n' | gzip > ${output} 9 | -------------------------------------------------------------------------------- /tools/reports/shiny_smallrnaseq_reporting_tool/README: -------------------------------------------------------------------------------- 1 | 2 | The analysis of mature miRNA needs a separate report file, since for mature miRNAs 3 | a different gtf (gff3) file is used, which requires another treatment than the 4 | other analyses. 5 | 6 | --------------------------------------------------------------------------------