├── .coveragerc ├── envs ├── qc.yaml ├── bbduk.yaml ├── salmon.yaml ├── kallisto.yaml ├── fastqc.yaml ├── zip.yaml ├── star.yaml ├── trimgalore.yaml ├── bbmap.yaml ├── fastp.yaml ├── picard.yaml ├── ciri2.yaml ├── cutadapt.yaml ├── dorado.yaml ├── bwa.yaml ├── bwa2.yaml ├── samtools.yaml ├── minimap.yaml ├── piranha.yaml ├── index.yaml ├── macs.yaml ├── segemehl3.yaml ├── umitools.yaml ├── segemehl.yaml ├── bwameth.yaml ├── sra.yaml ├── trimmomatic.yaml ├── trnascan.yaml ├── hisat2.yaml ├── bedtools.yaml ├── scyphy.yaml ├── ucsc.yaml ├── trimm.yaml ├── base.yaml ├── guppy.yaml ├── diego_DAS.yaml ├── countreads.yaml ├── countreads_de.yaml ├── summary.yaml ├── monsda.yaml ├── edger_DAS.yaml ├── edger_DEU.yaml ├── perl.yaml ├── drimseq_DTU.yaml ├── dexseq_DTU.yaml └── isoformswitchanalyzer.yaml ├── scripts ├── lib │ ├── __init.py__ │ ├── _lib.R │ └── Logger.py ├── Preprocessing │ └── indexfa.sh ├── Shells │ ├── printFQEnds.sh │ ├── Sam2Bed.sh │ ├── Sam2Bam.sh │ ├── printEnds.sh │ ├── NonUniqueBam_woPicard.sh │ ├── UniqueBam_woPicard.sh │ ├── MergeGeneExpression_Cufflinks.sh │ ├── MergeExpression_RNAcounter.sh │ ├── UniqueSam_woPicard.sh │ └── MergeExpression_Cufflinks.sh ├── Analysis │ ├── GettRNAExpression.sh │ ├── SUMMARY │ │ └── header_summary.Rmd │ ├── CountFastqEnds.pl │ ├── DAS │ │ └── FeatureCounts2DIEGO.pl │ ├── PreprocessPeaks.pl │ ├── GOA.R │ └── AddStructure.py └── Universal │ ├── sam2fastq.pl │ └── countCCA.pl ├── workflows ├── unlock.smk ├── footer.smk ├── footer.nf ├── collect.nf ├── summary.smk ├── fastqc_raw.nf ├── multiqc.nf ├── picard_dedup.smk ├── dorado.smk ├── summary.nf ├── premultiqc.nf ├── simulatetrim.smk ├── dorado.nf ├── premultiqc.smk ├── guppy.smk ├── header.nf ├── ciri2.smk ├── umitools_dedup.nf ├── simulatetrim.nf ├── guppy.nf ├── picard_dedup.nf ├── wip │ └── pycoqc.smk ├── trimgalore.nf ├── ciri2.nf ├── sra.nf ├── manipulate_genome.smk ├── fastqc_dedup.nf ├── fastqc_trim.nf ├── cutadapt.nf ├── bbduk.nf ├── fastp.nf ├── manipulate_genome.nf ├── bbduk.smk ├── mapping.smk ├── fastp.smk ├── fastqc_raw.smk ├── cutadapt.smk ├── minimap.smk ├── segemehl.smk ├── bwameth.smk ├── fastqc_dedup_trim.nf ├── trimgalore.smk ├── bwa2.smk ├── salmon.smk ├── kallisto.smk ├── segemehl3_bisulfite.smk └── fastqc.nf ├── .gitattributes ├── MONSDA ├── __main__.py ├── __init__.py └── lib │ └── Collection.groovy ├── MANIFEST.in ├── requirements.txt ├── docs ├── source │ ├── _static │ │ └── css │ │ │ └── custom.css │ ├── integrate.rst │ ├── contribute.rst │ ├── cluster.rst │ ├── installation.rst │ ├── runsmk.rst │ ├── wrapper.rst │ ├── first.rst │ └── conditiontree.rst ├── requirements.txt ├── Makefile ├── make.bat ├── index.rst └── conf.py ├── profile_snakemake ├── slurm-jobscript.sh ├── config.yaml ├── cluster_config.yaml ├── slurm-submit.py └── slurm-status.py ├── tests ├── TestCondaEnvs.sh ├── test_nextflow.sh ├── test_snakemake.sh ├── manual_test.sh └── test_Utils.py ├── profile_nextflow └── nextflow.config ├── .readthedocs.yaml ├── .vscode └── settings.json ├── setup.cfg ├── configs └── tutorial_quick.json ├── pyproject.toml ├── .gitignore └── setup.py /.coveragerc: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /envs/qc.yaml: -------------------------------------------------------------------------------- 1 | fastqc.yaml -------------------------------------------------------------------------------- /scripts/lib/__init.py__: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /envs/bbduk.yaml: -------------------------------------------------------------------------------- 1 | bbmap.yaml -------------------------------------------------------------------------------- /workflows/unlock.smk: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | NextSnakes/_version.py export-subst 2 | -------------------------------------------------------------------------------- /MONSDA/__main__.py: -------------------------------------------------------------------------------- 1 | from MONSDA import main 2 | 3 | main() 4 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include versioneer.py 2 | include MONSDA/_version.py 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | --index-url https://pypi.python.org/simple/ 2 | 3 | -e . 4 | 5 | -------------------------------------------------------------------------------- /docs/source/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | .tight-table td{ 2 | white-space: normal !important; 3 | } -------------------------------------------------------------------------------- /MONSDA/__init__.py: -------------------------------------------------------------------------------- 1 | from . import _version 2 | 3 | __version__ = _version.get_versions()["version"] 4 | -------------------------------------------------------------------------------- /profile_snakemake/slurm-jobscript.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # properties = {properties} 3 | {exec_job} 4 | -------------------------------------------------------------------------------- /envs/salmon.yaml: -------------------------------------------------------------------------------- 1 | name: salmon 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | dependencies: 6 | - salmon =1.10.3 -------------------------------------------------------------------------------- /envs/kallisto.yaml: -------------------------------------------------------------------------------- 1 | name: kallisto 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | dependencies: 6 | - kallisto =0.51.0 -------------------------------------------------------------------------------- /workflows/footer.smk: -------------------------------------------------------------------------------- 1 | onsuccess: 2 | print("Workflow finished, no error") 3 | onerror: 4 | print("ERROR: "+str({log})) 5 | -------------------------------------------------------------------------------- /envs/fastqc.yaml: -------------------------------------------------------------------------------- 1 | name: qc 2 | channels: 3 | - bioconda 4 | - conda-forge 5 | dependencies: 6 | - multiqc =1.21 7 | - fastqc =0.12.1 8 | -------------------------------------------------------------------------------- /workflows/footer.nf: -------------------------------------------------------------------------------- 1 | workflow.onComplete { 2 | script: 3 | """ 4 | echo 'Workflow finished, no error' 5 | """ 6 | } 7 | -------------------------------------------------------------------------------- /envs/zip.yaml: -------------------------------------------------------------------------------- 1 | name: zip 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | dependencies: 7 | - pigz =2.6 8 | - zlib =1.2.11 -------------------------------------------------------------------------------- /envs/star.yaml: -------------------------------------------------------------------------------- 1 | name: star 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | - r 7 | dependencies: 8 | - samtools =1.16.1 9 | - star =2.7.10b -------------------------------------------------------------------------------- /envs/trimgalore.yaml: -------------------------------------------------------------------------------- 1 | name: trimgalore 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | dependencies: 7 | - trim-galore =0.6.7 8 | - rename =1.601 -------------------------------------------------------------------------------- /envs/bbmap.yaml: -------------------------------------------------------------------------------- 1 | name: bbmap 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | dependencies: 7 | - bbmap =39.01 8 | - rename =1.601 9 | 10 | -------------------------------------------------------------------------------- /envs/fastp.yaml: -------------------------------------------------------------------------------- 1 | name: fastp 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | dependencies: 7 | - fastp =0.23.4 8 | - rename =1.601 9 | 10 | -------------------------------------------------------------------------------- /envs/picard.yaml: -------------------------------------------------------------------------------- 1 | name: picardtools 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | - r 7 | dependencies: 8 | - picard =2.27.4 9 | - samtools =1.16.1 -------------------------------------------------------------------------------- /tests/TestCondaEnvs.sh: -------------------------------------------------------------------------------- 1 | for i in ~/MONSDA/envs/*.yaml;do rm -rf ~/anaconda3/envs/tempenv;echo "INSTALLING $i" && conda env create -n tempenv -f $i && echo "DONE, NEXT";done 2 | -------------------------------------------------------------------------------- /envs/ciri2.yaml: -------------------------------------------------------------------------------- 1 | name: ciri2 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | - r 7 | dependencies: 8 | - perl =5.32.0 9 | - samtools =1.16.1 10 | -------------------------------------------------------------------------------- /envs/cutadapt.yaml: -------------------------------------------------------------------------------- 1 | name: cutadapt 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | dependencies: 7 | - cutadapt =4.1 8 | - rename =1.601 9 | 10 | -------------------------------------------------------------------------------- /envs/dorado.yaml: -------------------------------------------------------------------------------- 1 | name: dorado 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | - r 7 | dependencies: 8 | - samtools =1.16.1 9 | - pigz =2.6 10 | -------------------------------------------------------------------------------- /envs/bwa.yaml: -------------------------------------------------------------------------------- 1 | name: bwa 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | - r 7 | dependencies: 8 | - bwa =0.7.17 9 | - pigz =2.6 10 | - samtools =1.16.1 -------------------------------------------------------------------------------- /envs/bwa2.yaml: -------------------------------------------------------------------------------- 1 | name: bwa 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | - r 7 | dependencies: 8 | - bwa-mem2 =2.2.1 9 | - pigz =2.6 10 | - samtools =1.16.1 -------------------------------------------------------------------------------- /envs/samtools.yaml: -------------------------------------------------------------------------------- 1 | name: samtools 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | dependencies: 7 | - samtools =1.16.1 8 | - grep =2.14 9 | - pigz =2.6 10 | -------------------------------------------------------------------------------- /tests/test_nextflow.sh: -------------------------------------------------------------------------------- 1 | DEF="INFO" 2 | LVL="${1:-$DEF}" 3 | bash cleanup.sh && export NXF_EXECUTOR=slurm; MONSDA --nextflow -j 8 --configfile multitool.json --directory ${PWD} --loglevel $LVL 4 | -------------------------------------------------------------------------------- /envs/minimap.yaml: -------------------------------------------------------------------------------- 1 | name: minimap 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | - r 7 | dependencies: 8 | - minimap2 =2.24 9 | - pigz =2.6 10 | - samtools =1.16.1 -------------------------------------------------------------------------------- /envs/piranha.yaml: -------------------------------------------------------------------------------- 1 | name: piranha 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | - r 7 | dependencies: 8 | - bamtools =2.5.1 9 | - piranha =1.2.1 10 | - readline =8.2 -------------------------------------------------------------------------------- /envs/index.yaml: -------------------------------------------------------------------------------- 1 | name: index 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | dependencies: 7 | - samtools =1.16.1 8 | - curl =7.87.0 9 | - segemehl =0.2.0 10 | - pigz =2.6 -------------------------------------------------------------------------------- /tests/test_snakemake.sh: -------------------------------------------------------------------------------- 1 | bash cleanup.sh && MONSDA -j 8 --configfile multitool.json --directory ${PWD} --conda-frontend mamba --profile slurm --conda-frontend mamba --conda-prefix /scratch/snakemake_conda_envs 2 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | sphinxcontrib-napoleon 3 | sphinx-argparse 4 | sphinx_rtd_theme 5 | pytest-sphinx 6 | docutils 7 | autodoc 8 | mathjax 9 | recommonmark 10 | configargparse 11 | appdirs 12 | -------------------------------------------------------------------------------- /envs/macs.yaml: -------------------------------------------------------------------------------- 1 | name: macs 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | - r 7 | dependencies: 8 | - macs2 =2.2.7.1 9 | - readline =8.2 10 | - samtools =1.16.1 11 | - sed =4.8 -------------------------------------------------------------------------------- /envs/segemehl3.yaml: -------------------------------------------------------------------------------- 1 | name: segemehl3 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - r 6 | dependencies: 7 | - grep =2.14 8 | - samtools =1.16.1 9 | - segemehl =0.3.4 10 | - pigz =2.6 11 | -------------------------------------------------------------------------------- /envs/umitools.yaml: -------------------------------------------------------------------------------- 1 | name: umitools 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | - r 7 | dependencies: 8 | - samtools =1.16.1 9 | - umi_tools =1.1.2 10 | - dateutils =0.6.12 -------------------------------------------------------------------------------- /envs/segemehl.yaml: -------------------------------------------------------------------------------- 1 | name: segemehl 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | dependencies: 7 | - samtools =1.16.1 8 | - segemehl =0.2.0 9 | - grep =2.14 10 | - pigz =2.6 11 | -------------------------------------------------------------------------------- /envs/bwameth.yaml: -------------------------------------------------------------------------------- 1 | name: bwameth 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | - r 7 | dependencies: 8 | - bwameth =0.2.7 9 | - bwa-mem2 =2.2.1 10 | - pigz =2.6 11 | - samtools =1.16.1 -------------------------------------------------------------------------------- /envs/sra.yaml: -------------------------------------------------------------------------------- 1 | name: sratools 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | - r 7 | dependencies: 8 | - perl =5.32.1 9 | - pigz =2.6 10 | - rename =1.601 11 | - sra-tools =2.11.0 12 | -------------------------------------------------------------------------------- /envs/trimmomatic.yaml: -------------------------------------------------------------------------------- 1 | name: trimmomatic 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | dependencies: 7 | - trimmomatic =0.39 8 | - python =3.11.0 9 | - fastqc =0.11.8 10 | - rename =1.601 -------------------------------------------------------------------------------- /envs/trnascan.yaml: -------------------------------------------------------------------------------- 1 | name: trnascan 2 | channels: 3 | - bioconda 4 | - conda-forge 5 | - r 6 | - defaults 7 | dependencies: 8 | - infernal =1.1.2 9 | - trnascan-se =2.0 10 | - perl =5.26.2 11 | - pigz =2.6 12 | -------------------------------------------------------------------------------- /envs/hisat2.yaml: -------------------------------------------------------------------------------- 1 | name: hisat2 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | - r 7 | dependencies: 8 | - hisat2 =2.2.1 9 | - samtools =1.16.1 10 | - pigz =2.6 11 | - pysam =0.19.1 12 | - python =3.9 13 | - rename =1.601 14 | -------------------------------------------------------------------------------- /envs/bedtools.yaml: -------------------------------------------------------------------------------- 1 | name: bedtools 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | - r 7 | dependencies: 8 | - bedtools =2.30.0 9 | - curl =7.87.0 10 | - grep =3.4 11 | - pigz =2.6 12 | - pysam =0.20.0 13 | - python =3.10.9 14 | - samtools =1.16.1 -------------------------------------------------------------------------------- /envs/scyphy.yaml: -------------------------------------------------------------------------------- 1 | name: scyphy 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | - r 7 | dependencies: 8 | - bzip2 =1.0.8 9 | - piranha =1.2.1 10 | - pyfaidx =0.7.1 11 | - pysam =0.19.1 12 | - python =3.9 13 | - readline =8.2 14 | - samtools =1.16.1 15 | -------------------------------------------------------------------------------- /envs/ucsc.yaml: -------------------------------------------------------------------------------- 1 | name: ucsc 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | - r 7 | dependencies: 8 | - grep =2.14 9 | - ucsc-bedgraphtobigwig =377 10 | - ucsc-beditemoverlapcount =377 11 | - ucsc-fatotwobit =377 12 | - ucsc-twobitinfo =377 13 | - pigz =2.6 14 | 15 | -------------------------------------------------------------------------------- /profile_nextflow/nextflow.config: -------------------------------------------------------------------------------- 1 | profiles { 2 | slurm { 3 | process.executor = 'slurm' 4 | process.memory = '10 GB' 5 | process.queue = 'main' 6 | withName: '_idx|_map' { 7 | memory = '160GB' 8 | } 9 | } 10 | 11 | local { 12 | process.executor = 'local' 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /envs/trimm.yaml: -------------------------------------------------------------------------------- 1 | name: trimm 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | dependencies: 7 | - bbmap =38.22 8 | - cutadapt =1.18 9 | - fastqc =0.11.8 10 | - trim-galore =0.5.0 11 | - perl =5.26.2 12 | - pigz =2.6 13 | - pip =18.1 14 | - python =3.9 15 | - rename =1.601 16 | -------------------------------------------------------------------------------- /envs/base.yaml: -------------------------------------------------------------------------------- 1 | name: base 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | dependencies: 7 | - grep >=3.4 8 | - samtools =1.16.1 9 | - natsort >=8.1.0 10 | - perl =5.32.1 11 | - picard =2.27.4 12 | - pigz =2.6 13 | - python =3.10.4 14 | - python_abi =3.10 15 | - pyfaidx =0.5.9 16 | - pysam =0.20.0 -------------------------------------------------------------------------------- /scripts/Preprocessing/indexfa.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | inf=$1 4 | 5 | if [[ -s $inf ]] 6 | then 7 | if [[ "$inf" == *.gz* ]] 8 | then 9 | filename="${inf%.*}" 10 | zcat $inf > $filename && samtools faidx $filename && rm -f $filename 11 | else 12 | samtools faidx $inf 13 | fi 14 | else 15 | touch $inf.fai 16 | fi 17 | -------------------------------------------------------------------------------- /profile_snakemake/config.yaml: -------------------------------------------------------------------------------- 1 | restart-times: 3 2 | jobscript: "slurm-jobscript.sh" 3 | cluster: "slurm-submit.py" 4 | #cluster-status: "slurm-status.py" 5 | max-jobs-per-second: 1 6 | max-status-checks-per-second: 3 7 | local-cores: 1 8 | latency-wait: 60 9 | #use-conda: True 10 | keep-going: True 11 | rerun-incomplete: True 12 | #printshellcmds: True 13 | -------------------------------------------------------------------------------- /envs/guppy.yaml: -------------------------------------------------------------------------------- 1 | name: guppy 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | - r 7 | dependencies: 8 | - gettext =0.19.8.1 9 | - libffi =3.2.1 10 | - libgcc-ng =9.3.0 11 | - libgomp =9.3.0 12 | - libiconv =1.16 13 | - libidn =7.45.0 14 | - libidn11 =1.34 15 | - libidn2 =2.3.0 16 | - libstdcxx-ng =9.3.0 17 | - libunistring =0.9.10 18 | 19 | -------------------------------------------------------------------------------- /profile_snakemake/cluster_config.yaml: -------------------------------------------------------------------------------- 1 | __default__: 2 | account: user # your account name 3 | partition: main # the partition to use 4 | time: 1500 # default time (minutes) 5 | nodes: 1 6 | output: "/SLURMLOG/{rule}.{wildcards}.out" 7 | error: "/SLURMLOG/{rule}.{wildcards}.err" 8 | #ntasks: 1 9 | #mem: 14GB # default memory 10 | 11 | generate_index: 12 | mem: 200GB 13 | 14 | mapping: 15 | mem: 200GB 16 | -------------------------------------------------------------------------------- /envs/diego_DAS.yaml: -------------------------------------------------------------------------------- 1 | name: diego 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | - r 7 | dependencies: 8 | - bedtools =2.30.0 9 | - diego =0.1.2 10 | - graphviz =2.49.1 11 | - imagemagick =7.1.0_10 12 | - matplotlib-base =3.5.1 13 | - numpy =1.22.3 14 | - perl =5.32.1 15 | - python-dateutil =2.8.2 16 | - pysam =0.19.1 17 | - python =3.9 18 | - readline =8.2 19 | - samtools =1.16.1 20 | - scipy =1.8.0 -------------------------------------------------------------------------------- /scripts/Shells/printFQEnds.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | in=$1 4 | out=$2 5 | 6 | if [ ! -f $out ]; then 7 | echo -ne "SAMPLE\tCCA" > $out 8 | fi 9 | 10 | fn=${in#*/} 11 | 12 | echo -ne "\n$fn\t" >> $out 13 | 14 | for a in CCA;do 15 | zcat $in|perl -sae 'BEGIN{$c=0}{if($F[0] eq $e){$c+=$F[1]}}END{{print $c}}' -- -e=$a >> $out 16 | done 17 | 18 | #zcat $fn|perl -sae 'print $fn."\t".join("\t",@F)' -- -e=$fn >> $out 19 | #echo -ne "\n" >> $out 20 | -------------------------------------------------------------------------------- /envs/countreads.yaml: -------------------------------------------------------------------------------- 1 | name: countreads 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | - r 7 | dependencies: 8 | - bcftools =1.8 9 | - gffutils =0.11.1 10 | - grep =2.14 11 | - htseq =2.0.2 12 | - pigz =2.6 13 | - pyfaidx =0.5.8 14 | - pyparsing =2.4.6 15 | - python =3.9 16 | - pysam =0.19.1 17 | - python-dateutil =2.8.1 18 | - readline =8.2 19 | - samtools =1.16.1 20 | - simplejson =3.17.0 21 | - subread =1.6.4 22 | -------------------------------------------------------------------------------- /envs/countreads_de.yaml: -------------------------------------------------------------------------------- 1 | name: countreads_de 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | - r 7 | dependencies: 8 | - bcftools =1.8 9 | - gffutils =0.11.1 10 | - grep =2.14 11 | - htseq =2.0.2 12 | - pigz =2.6 13 | - pyfaidx =0.5.8 14 | - pyparsing =2.4.6 15 | - pysam =0.19.1 16 | - python =3.9 17 | - python-dateutil =2.8.1 18 | - readline =8.2 19 | - samtools =1.16.1 20 | - simplejson =3.17.0 21 | - subread =1.6.4 22 | -------------------------------------------------------------------------------- /scripts/Shells/Sam2Bed.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | FILE=$1 3 | OUT=$(basename $FILE) 4 | 5 | if [[ "$FILE" == *.gz* ]] 6 | then 7 | zcat $FILE|grep "HWI-ST"|awk '{FS="\t";OFS="\t"}{if(and($2,16)){print $3,$4-1,$4+length($10),$1,$2,"-"} else {print $3,$4-1,$4+length($10),$1,$2,"+"}}' - > $OUT"_Unique.bed"; 8 | else 9 | cat $FILE|grep "HWI-ST"|awk '{FS="\t";OFS="\t"}{if(and($2,16)){print $3,$4-1,$4+length($10),$1,$2,"-"} else {print $3,$4-1,$4+length($10),$1,$2,"+"}}' - > $OUT"_Unique.bed"; 10 | fi 11 | 12 | -------------------------------------------------------------------------------- /scripts/Analysis/GettRNAExpression.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | in=$1 4 | out=$2 5 | 6 | if [ ! -f $out ]; then 7 | echo -e "SAMPLE\ttRNA\tCount" > $out 8 | fi 9 | 10 | fn=${in#*/} 11 | 12 | zgrep -w CCA $in | perl -sae 'BEGIN{$c={}}{if($F[1] eq "seq"){@trnas=split(",",$F[6]);foreach $rna (@trnas){$c->{$rna}+=$F[3]}}}END{foreach $rna (keys %{$c}){print "$e\t$rna\t$c->{$rna}\n"}}' -- -e=$fn >> $out 13 | 14 | #awk -v e="$a" 'BEGIN{FS="\t";OFS="";c=0}{c+=$3}END{if(e != "A"){print c,"\t"}else{print c}}' >> $out 15 | -------------------------------------------------------------------------------- /scripts/Analysis/SUMMARY/header_summary.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "MONSDA SUMMARY REPORT" 3 | date: "`r Sys.Date()`" 4 | author: "`r Sys.getenv('LOGNAME')`" 5 | output: 6 | rmdformats::readthedown: 7 | code_folding: show 8 | self_contained: false 9 | thumbnails: false 10 | lightbox: false 11 | pkgdown: 12 | as_is: true 13 | params: 14 | root: '' 15 | outdir: '' 16 | --- 17 | 18 | 19 | ```{r setup, include=FALSE} 20 | library(knitr) 21 | knitr::opts_chunk$set(echo = TRUE) 22 | knitr::opts_knit$set(root.dir = params$root) 23 | ``` 24 | -------------------------------------------------------------------------------- /tests/manual_test.sh: -------------------------------------------------------------------------------- 1 | VERSION=$1 2 | #tag 3 | git tag -f v$VERSION 4 | #build 5 | rm -rf .eggs build *.egg-info dist ; nocorrect python setup.py bdist_wheel sdist 6 | #goto test dir 7 | cd ~/Work/Test/Pipi 8 | #uninstall old and install local new 9 | pip uninstall -y MONSDA && pip install ~/MONSDA/dist/MONSDA-$VERSION\-py3-none-any.whl 10 | #run snakemake 11 | clear && MONSDA -j 4 --configfile multitool.json --directory ${PWD} --conda-frontend mamba 12 | #run nextflow 13 | clear && MONSDA --nextflow -j 4 -resume --configfile multitool.json --directory ${PWD} 14 | -------------------------------------------------------------------------------- /scripts/Shells/Sam2Bam.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | file=$1 ### Name of the sam file you want to convert 4 | ref=$2 ### Path to reference genome fasta file 5 | bins=$3 6 | out=$4 7 | threads=$5 8 | 9 | echo "running samtools view -bT $ref -o $out --threads $threads $file" 10 | 11 | if [ ! -f $out ];then 12 | echo "$out not found, creating new" 13 | zcat $file|samtools view -bT $ref -o $out --threads $threads - 14 | fi 15 | if [ ! -f $out".bai" ];then 16 | echo "$out.bai not found, creating new" 17 | samtools index $out 18 | fi 19 | 20 | -------------------------------------------------------------------------------- /scripts/Shells/printEnds.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | in=$1 4 | out=$2 5 | 6 | if [ ! -f $out ]; then 7 | echo -ne "SAMPLE\tCCACCA\tCCACC\tCCAC\tCCA\tCC\tCA\tC\tA" > $out 8 | fi 9 | 10 | fn=${in#*/} 11 | 12 | echo -ne "\n$fn\t" >> $out 13 | #perl -se '{$o = (split("\/",$f))[-1];print "\n".$o."\t"}' -- -f=$in >> $out 14 | 15 | for a in CCACCA CCACC CCAC CCA CC CA C A;do 16 | zgrep -w $a $in | perl -sae 'BEGIN{$c=0}{if($F[1] eq "seq"){$c+=$F[3]}}END{if($e ne "A"){print $c."\t"}else{print $c}}' -- -e=$a >> $out 17 | done 18 | 19 | #awk -v e="$a" 'BEGIN{FS="\t";OFS="";c=0}{c+=$3}END{if(e != "A"){print c,"\t"}else{print c}}' >> $out 20 | -------------------------------------------------------------------------------- /workflows/collect.nf: -------------------------------------------------------------------------------- 1 | process collect_stuff{ 2 | cpus THREADS 3 | cache 'lenient' 4 | //validExitStatus 0,1 5 | 6 | publishDir "${workflow.workDir}/../" , mode: 'link', 7 | saveAs: {filename -> 8 | "LOGS/COLLECT/${COMBO}/${CONDITION}/${file(filename).getName()}" 9 | } 10 | input: 11 | path check 12 | 13 | 14 | output: 15 | path "collect.txt", emit: done 16 | 17 | script: 18 | """ 19 | echo "$check successful!" > collect.txt 20 | """ 21 | } 22 | 23 | workflow COLLECT{ 24 | take: 25 | whatever 26 | 27 | main: 28 | 29 | collect_stuff(whatever.collect()) 30 | 31 | 32 | } 33 | -------------------------------------------------------------------------------- /scripts/Analysis/CountFastqEnds.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use strict; 3 | use warnings; 4 | use PerlIO::gzip; 5 | 6 | my $in = shift; #fastq 7 | 8 | open (SEQ, "<:gzip(autopop)", $in) or die "$!"; 9 | 10 | my $tails = (); 11 | my $all = 0; 12 | my $i = 1; 13 | 14 | while(){ 15 | 16 | chomp(my $line = $_); 17 | 18 | if ($i == 2){ 19 | my $tail = substr $line,-3; 20 | $tails->{$tail}++; 21 | $i++; 22 | } 23 | elsif($i==4){ 24 | $i=1; 25 | next; 26 | } 27 | else{ 28 | $i++; 29 | next; 30 | } 31 | } 32 | 33 | foreach my $tail (sort{$a eq $b} keys %{$tails}){ 34 | print join("\t", $tail, $tails->{$tail})."\n"; 35 | } 36 | -------------------------------------------------------------------------------- /envs/summary.yaml: -------------------------------------------------------------------------------- 1 | name: summary 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | - r 7 | dependencies: 8 | - r-base =4.2.2 9 | - r-base64enc =0.1_3 10 | - r-bookdown =0.30 11 | - r-digest =0.6.30 12 | - r-evaluate =0.18 13 | - r-glue =1.6.2 14 | - r-highr =0.9 15 | - r-htmltools =0.5.3 16 | - r-jquerylib =0.1.4 17 | - r-jsonlite =1.8.3 18 | - r-knitr =1.40 19 | - r-magrittr =2.0.3 20 | - r-markdown =1.4 21 | - r-mime =0.12 22 | - r-rlang =1.0.6 23 | - r-rmarkdown =2.18 24 | - r-rmdformats =1.0.4 25 | - r-stringi =1.7.8 26 | - r-stringr =1.4.1 27 | - r-tinytex =0.42 28 | - r-xfun =0.35 29 | - r-yaml =2.3.6 30 | - readline =8.2 31 | - sed =4.8 -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/source/integrate.rst: -------------------------------------------------------------------------------- 1 | Integrating new tools/workflows 2 | ================================ 3 | 4 | In case new tools need to be integrated, please refer to similar tools already implemented or contact us in case nothing similar is available yet. Workflows should always be split in subworkflows that follow the same principle as existing subworkflows, ideally making them reusable for other workflows in the future. 5 | 6 | Tools should be easy to integrate, all that is needed is to write a tool and if applicable version specific **.smk** or **.nf** file describing input/output and command line calls as well as a fitting **conda environment yaml** file. Once these are available, they should already be usable and configurable via the **config.json** in the specific workflow section. 7 | -------------------------------------------------------------------------------- /scripts/Shells/NonUniqueBam_woPicard.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | in=$1 4 | out=$2 5 | threads=$3 6 | bwa="${4:-}" 7 | 8 | samtools view -H ${in} | grep '@HD' > nhead 9 | samtools view -H ${in} | grep '@SQ' | sort -t$'\t' -k1,1 -k2,2V >> nhead 10 | samtools view -H ${in} | grep '@RG' >> nhead 11 | samtools view -H ${in} | grep '@PG' >> nhead 12 | 13 | 14 | if [[ "${in}" == *bwa* ]] || [[ -n "${bwa}" ]] 15 | then 16 | cat nhead <(samtools view --threads ${threads} -F 4 ${in} | grep -v "^@"| grep -e $'\t''XA:Z:' -e $'\t''SA:Z:') | samtools view --threads ${threads} -hb - > ${out} 17 | else 18 | cat nhead <(samtools view --threads ${threads} -F 4 ${in} | grep -v "^@" | grep -v -w -P "NH:i:0|NH:i:1|tp:A:P") | samtools view --threads ${threads} -hb - > ${out} 19 | fi 20 | 21 | rm -f nhead 22 | -------------------------------------------------------------------------------- /envs/monsda.yaml: -------------------------------------------------------------------------------- 1 | name: monsda 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | dependencies: 7 | - biopython=1.83 8 | - grep >=3.4 9 | - isort=5.13.2 10 | - monsda=1.2.8 11 | - natsort=8.4.0 12 | - nextflow=24.04.4 13 | - numpy=1.26.4 14 | - pandas=2.2.1 15 | - perl=5.34.0 16 | - pip>=24.0 17 | - python=3.12.2 18 | - pyyaml=6.0.1 19 | - scipy=1.12.0 20 | - snakemake=8.16.0 21 | - snakemake-executor-plugin-slurm=0.8.0 22 | - snakemake-executor-plugin-cluster-generic=1.0.9 23 | - snakemake-interface-common=1.17.2 24 | - snakemake-interface-executor-plugins=9.2.0 25 | - snakemake-interface-report-plugins=1.0.0 26 | - snakemake-interface-storage-plugins=3.2.3 27 | - snakemake-storage-plugin-s3=0.2.11 28 | - snakemake-storage-plugin-ftp=0.1.2 29 | - snakemake-storage-plugin-http=0.2.3 -------------------------------------------------------------------------------- /docs/source/contribute.rst: -------------------------------------------------------------------------------- 1 | Contribute 2 | ========== 3 | 4 | If you like this project, are missing features, want to contribute or 5 | file bugs please open a PR, leave an issue or contact us directly. 6 | 7 | To contribute new tools feel free to adopt existing ones, there should 8 | be a number of examples available that cover implementation details 9 | for almost all sorts of standard tools. If you need to add new 10 | python/groovy functions for processing of options or parameters add 11 | them to the corresponding file in the **lib** directory. New environments 12 | go into the **envs** directory, new subworkflows into the **workflows** 13 | directory. Do not forget to also extend the **template.json** and add some 14 | documentation before opening a pull request. 15 | 16 | PRs always welcome. 17 | 18 | 19 | ##Contributors 20 | Joerg Fallmann @jfallmann 21 | Robin Goldmann @meisterL 22 | -------------------------------------------------------------------------------- /scripts/Universal/sam2fastq.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use strict; 3 | use warnings; 4 | use autodie; 5 | 6 | my $in1 = shift; #bam file 7 | my $in2 = shift; #fastq 8 | 9 | open IN1, "samtools view -h $in1 |"; 10 | #open IN2, "< $in2" or die "can t open $in2\n"; 11 | open(IN2, "gunzip -c $in2 |") or die "gunzip $in2: $!"; 12 | 13 | 14 | my %hash =(); 15 | while(){ 16 | next if($_ =~ /^@/); 17 | chomp $_; 18 | 19 | my @line = split(/\t/,$_); 20 | my $ID = "@".$line[0]; 21 | $hash{$ID} = 0; 22 | } 23 | 24 | 25 | my @entry = (); 26 | 27 | while(){ 28 | chomp; 29 | push @entry, $_; 30 | 31 | if (scalar(@entry) == 4) { 32 | 33 | my ($id, $seq, $plusLine, $qual) = @entry; 34 | @entry = (); 35 | 36 | if(exists $hash{$id}){ 37 | print join ("\n", $id, $seq, $plusLine, $qual)."\n"; 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /workflows/summary.smk: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | logid = 'summary.smk ' 3 | 4 | outdir = "REPORTS/SUMMARY" 5 | 6 | rule themall: 7 | input: summary_all = expand("{outdir}/SUMMARY.html", outdir=outdir) 8 | # summarys = expand("{dir}/SUMMARY.pdf", dir=get_summary_dirs(config)) 9 | 10 | rule make_rmd: 11 | input: os.path.join(outdir,'summary.Rmd') 12 | output: rules.themall.input.summary_all 13 | # rules.themall.input.summarys 14 | log: expand("LOGS/{outdir}/make_rmd.log", outdir=outdir) 15 | conda: "summary.yaml" 16 | params: outdir = outdir, 17 | currentpath = os.path.join(os.path.dirname(os.path.realpath(os.path.abspath(inspect.getfile( inspect.currentframe() )) )),"..") 18 | shell: "Rscript --vanilla -e \"rmarkdown::render('{input}',params=list(root='{params.currentpath}/'),output_file='{params.currentpath}/{params.outdir}/SUMMARY.html', quiet=TRUE)\" 2> {log}" 19 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the OS, Python version and other tools you might need 9 | 10 | build: 11 | os: ubuntu-22.04 12 | tools: 13 | python: "3.12" 14 | # You can also specify other tool versions: 15 | # nodejs: "20" 16 | # rust: "1.70" 17 | # golang: "1.20" 18 | 19 | # Build documentation in the docs/ directory with Sphinx 20 | sphinx: 21 | configuration: docs/conf.py 22 | 23 | # Build documentation with MkDocs 24 | #mkdocs: 25 | # configuration: mkdocs.yml 26 | 27 | # Optionally build your docs in additional formats such as PDF and ePub 28 | formats: [htmlzip, epub] 29 | 30 | # Optionally set the version of Python and requirements required to build your docs 31 | python: 32 | install: 33 | - requirements: docs/requirements.txt 34 | 35 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.testing.pytestArgs": [ 3 | "tests" 4 | ], 5 | "python.testing.unittestEnabled": false, 6 | "python.testing.nosetestsEnabled": false, 7 | "python.testing.pytestEnabled": true, 8 | "esbonio.server.enabled": true, 9 | "esbonio.sphinx.confDir": "${workspaceFolder}/docs", 10 | //"restructuredtext.linter.disabledLinters": [ 11 | // "doc8" 12 | //], 13 | "restructuredtext.linter.run": "onSave", 14 | "restructuredtext.linter.doc8.extraArgs": [ 15 | "--ignore D001" 16 | ], 17 | "cSpell.words": [ 18 | "COMPARABLES", 19 | "DEDUP", 20 | "MAXTHREADS", 21 | "subdir", 22 | "unstranded" 23 | ], 24 | "cSpell.enableFiletypes": [ 25 | "snakemake" 26 | ], 27 | "python.analysis.typeCheckingMode": "basic", 28 | "[python]": { 29 | "editor.defaultFormatter": "ms-python.black-formatter" 30 | }, 31 | "python.formatting.provider": "none" 32 | } 33 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [aliases] 2 | test=pytest 3 | 4 | [isort] 5 | multi_line_output=3 6 | include_trailing_comma=True 7 | force_grid_wrap=0 8 | use_parentheses=True 9 | line_length=110 10 | 11 | [flake8] 12 | ignore = E203, E266, E501, W503 13 | max-line-length = 110 14 | max-complexity = 18 15 | select = B,C,E,F,W,T4 16 | 17 | [tool:pytest] 18 | testpaths=test 19 | 20 | [versioneer] 21 | VCS = git 22 | style = pep440 23 | versionfile_source = MONSDA/_version.py 24 | versionfile_build = MONSDA/_version.py 25 | tag_prefix = v 26 | parentdir_prefix = MONSDA- 27 | 28 | [report] 29 | exclude_lines = 30 | # Have to re-enable the standard pragma 31 | pragma: no cover 32 | 33 | # Don't complain about missing debug-only code: 34 | def __repr__ 35 | if self\.debug 36 | 37 | # Don't complain if tests don't hit defensive assertion code: 38 | raise AssertionError 39 | raise NotImplementedError 40 | 41 | # Don't complain if non-runnable code isn't run: 42 | if 0: 43 | if __name__ == .__main__.: 44 | -------------------------------------------------------------------------------- /workflows/fastqc_raw.nf: -------------------------------------------------------------------------------- 1 | QCENV=get_always('QCENV') 2 | QCBIN=get_always('QCBIN') 3 | QCPARAMS = get_always('fastqc_params_QC') ?: '' 4 | 5 | process qc_raw{ 6 | conda "$QCENV"+".yaml" 7 | cpus THREADS 8 | cache 'lenient' 9 | //validExitStatus 0,1 10 | 11 | publishDir "${workflow.workDir}/../" , mode: 'link', 12 | saveAs: {filename -> 13 | if (filename.indexOf("zip") > 0) "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip" 14 | else if (filename.indexOf("html") > 0) "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html" 15 | else null 16 | } 17 | 18 | input: 19 | path read 20 | 21 | output: 22 | path "*.{zip,html}", emit: fastqc_results 23 | 24 | script: 25 | """ 26 | fastqc --quiet -t ${task.cpus} $QCPARAMS --noextract -f fastq $read 27 | """ 28 | } 29 | 30 | workflow QC_RAW{ 31 | take: 32 | collection 33 | 34 | main: 35 | 36 | qc_raw(samples_ch) 37 | 38 | emit: 39 | qc = qc_raw.out.fastqc_results 40 | } 41 | -------------------------------------------------------------------------------- /scripts/Shells/UniqueBam_woPicard.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | in=$1 4 | out=$2 5 | threads=$3 6 | specialmappers="${4:-}" 7 | 8 | samtools view -H ${in} | grep '@HD' > ${in}_head 9 | samtools view -H ${in} | grep '@SQ' | sort -t$'\t' -k1,1 -k2,2V >> ${in}_head 10 | samtools view -H ${in} | grep '@RG' >> ${in}_head 11 | samtools view -H ${in} | grep '@PG' >> ${in}_head 12 | 13 | 14 | if [[ "$1" == *bwa* ]] || [[ "$specialmappers" == *bwa* ]] 15 | then 16 | cat ${in}_head <(samtools view --threads ${threads} -F 4 ${in} | grep -v "^@"| grep -v -e $'\t''XA:Z:' -e $'\t''SA:Z:') | samtools view --threads ${threads} -hb - > ${out} 17 | elif [[ "$1" == *minimap* ]] || [[ "$specialmappers" == *minimap* ]] 18 | then 19 | cat ${in}_head <(samtools view --threads ${threads} -F 4 ${in} | grep -v "^@" | perl -wlane 'print if $F[4] >=60') | samtools view --threads ${threads} -hb - > ${out} 20 | else 21 | cat ${in}_head <(samtools view --threads ${threads} -F 4 ${in} | grep -v "^@" | grep -w -P "NH:i:1|tp:A:P") | samtools view --threads ${threads} -hb - > ${out} 22 | fi 23 | 24 | rm -f ${in}_head 25 | -------------------------------------------------------------------------------- /workflows/multiqc.nf: -------------------------------------------------------------------------------- 1 | QCENV=get_always('QCENV') 2 | QCBIN=get_always('QCBIN') 3 | QCPARAMS = get_always('fastqc_params_MULTI') ?: '' 4 | 5 | process mqc{ 6 | conda "$QCENV"+".yaml" 7 | cpus THREADS 8 | cache 'lenient' 9 | //validExitStatus 0,1 10 | 11 | publishDir "${workflow.workDir}/../" , mode: 'link', 12 | saveAs: {filename -> 13 | if (filename.indexOf("zip") > 0) "QC/Multi/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip" 14 | else if (filename.indexOf("html") > 0) "QC/Multi/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html" 15 | else "QC/Multi/${COMBO}/${CONDITION}/${file(filename).getName()}" 16 | } 17 | 18 | input: 19 | path others 20 | //path samples 21 | 22 | output: 23 | path "*.zip", emit: mqc 24 | path "*.html", emit: html 25 | 26 | script: 27 | """ 28 | touch $others; export LC_ALL=en_US.utf8; export LC_ALL=C.UTF-8; multiqc -f --exclude picard --exclude gatk -k json -z -o \${PWD} . 29 | """ 30 | } 31 | 32 | workflow MULTIQC{ 33 | take: 34 | otherqcs 35 | 36 | main: 37 | 38 | mqc(otherqcs.collect()) 39 | 40 | emit: 41 | mqcres = mqc.out.mqc 42 | } 43 | -------------------------------------------------------------------------------- /workflows/picard_dedup.smk: -------------------------------------------------------------------------------- 1 | DEDUPBIN, DEDUPENV = env_bin_from_config(config, 'DEDUP') 2 | 3 | rule dedupbam: 4 | input: bam = "MAPPED/{combo}/{file}_mapped_{type}.bam" 5 | output: bam = report("MAPPED/{combo}/{file}_mapped_{type}_dedup.bam", category="DEDUP"), 6 | bai = report("MAPPED/{combo}/{file}_mapped_{type}_dedup.bam.bai", category="DEDUP"), 7 | met = report("MAPPED/{combo}/{file}_mapped_{type}_dedup_metrics.txt", category="DEDUP"), 8 | td = temp(directory("TMP/UMIDD/{combo}/{file}_{type}")) 9 | log: "LOGS/{combo}/{file}_{type}/dedupbam.log" 10 | conda: ""+DEDUPENV+".yaml" 11 | threads: 1 12 | priority: 0 # This should be done after all mapping is done 13 | params: jpara = lambda wildcards: tool_params(wildcards.file, None, config, "DEDUP", DEDUPENV)['OPTIONS'].get('JAVA', ""), 14 | dpara = lambda wildcards: tool_params(wildcards.file, None, config, "DEDUP", DEDUPENV)['OPTIONS'].get('DEDUP', ""), 15 | dedup = DEDUPBIN 16 | shell: "mkdir -p {output.td} && {params.dedup} {params.jpara} MarkDuplicates --REMOVE_DUPLICATES true --ASSUME_SORT_ORDER coordinate --TMP_DIR {output.td} --INPUT {input.bam} --OUTPUT {output.bam} --METRICS_FILE {output.met} {params.dpara} 2> {log} && samtools index {output.bam} 2>> {log}" -------------------------------------------------------------------------------- /workflows/dorado.smk: -------------------------------------------------------------------------------- 1 | CALLERBIN, CALLERENV = env_bin_from_config(config,'BASECALL') 2 | 3 | wildcard_constraints: 4 | rawfile = '|'.join(SAMPLES) 5 | 6 | rule themall: 7 | input: expand("FASTQ/{rawfile}.fastq.gz", rawfile = SAMPLES) 8 | 9 | rule call_base: 10 | input: p5 = "RAW/{rawfile}.pod5" 11 | output: fq = "FASTQ/{rawfile}.fastq.gz", 12 | bam = temp("FASTQ/{rawfile}.bam") 13 | log: "LOGS/BASECALL/{rawfile}_dorado.log" 14 | conda: ""+CALLERENV+".yaml" 15 | threads: MAXTHREAD 16 | params: caller = CALLERBIN, 17 | cpara = lambda wildcards, input: tool_params(SAMPLES[0], None, config, 'BASECALL', CALLERENV)['OPTIONS'].get('BASECALL', ""), 18 | cmodel = lambda wildcards, input: tool_params(SAMPLES[0], None, config, 'BASECALL', CALLERENV)['OPTIONS'].get('MODEL', ""), 19 | p5dir = lambda wildcards, input: os.path.dirname(input.p5), 20 | p5file = lambda wildcards, input: os.path.basename(input.p5), 21 | fqdir = lambda wildcards, output: os.path.dirname(output.fq) 22 | shell: "{params.caller} download --directory {params.p5dir} --model {params.cmodel} &> {log} && {params.caller} basecaller {params.cpara} {params.p5dir}/{params.cmodel} {params.p5dir}/ 2>> {log} 1> {output.bam} && samtools view -h {output.bam}|samtools fastq -n - | pigz > {output.fq}" -------------------------------------------------------------------------------- /docs/source/cluster.rst: -------------------------------------------------------------------------------- 1 | .. _slurm: 2 | 3 | ============ 4 | Run on Slurm 5 | ============ 6 | 7 | Snakemake 8 | ========= 9 | 10 | You can either use the slurm profile adapted from 11 | Snakemake-Profiles_ that comes with this repository, or go 12 | through the process of manually creating one, either using the cookiecutter example in the 13 | **Snakemake-Profiles** repository or on your own. You can also adapt the example that comes with this repository and execute 14 | 15 | .. _Snakemake-Profiles: https://github.com/Snakemake-Profiles/slurm 16 | 17 | .. code-block:: 18 | 19 | monsda -j ${cpus} --configfile ${config.json} --directory ${PWD} --profile ${path_to_slurm_profile} 20 | 21 | 22 | Further adaptions like grouping of jobs and advanced configs for rule 23 | based performance increase will be tackled in future releases of **MONSDA**. 24 | 25 | Nextflow 26 | ======== 27 | 28 | Cluster config for Nextflow follows the description Nextflow-Executors_ and Nextflow-Profiles_. To use **SLURM** as executor you can adapt the profile that comes with this repository and simply append 29 | 30 | .. code-block:: 31 | 32 | export NXF_EXECUTOR=slurm 33 | 34 | to the call to **MONSDA**. 35 | 36 | .. _Nextflow-Executors: https://www.Nextflow.io/docs/latest/executor.html 37 | .. _Nextflow-Profiles: https://www.Nextflow.io/docs/latest/config.html#config-profiles -------------------------------------------------------------------------------- /configs/tutorial_quick.json: -------------------------------------------------------------------------------- 1 | { 2 | "WORKFLOWS": "FETCH,MAPPING", 3 | "BINS": "", 4 | "MAXTHREADS": "4", 5 | "VERSION": "1.2.8", 6 | "SETTINGS": { 7 | "SIMPLE": { 8 | "SAMPLES": [ 9 | "SRR16324019" 10 | ], 11 | "SEQUENCING": "paired", 12 | "REFERENCE": "GENOMES/Ecoli/ecoli.fa.gz", 13 | "ANNOTATION": { 14 | "GFF": "GENOMES/Ecoli/ecoli.gff.gz", 15 | "GTF": "GENOMES/Ecoli/ecoli.gtf.gz" 16 | } 17 | } 18 | }, 19 | "FETCH": { 20 | "TOOLS" : 21 | { 22 | "sra" : "fasterq-dump" 23 | }, 24 | "SIMPLE": { 25 | "sra": { 26 | "OPTIONS": 27 | { 28 | "PREFETCH": "${HOME}/.ncbi/user-settings.mkfg", 29 | "DOWNLOAD": "" 30 | } 31 | } 32 | } 33 | }, 34 | "MAPPING": { 35 | "TOOLS": { 36 | "star": "STAR" 37 | }, 38 | "SIMPLE": { 39 | "star": { 40 | "OPTIONS": { 41 | "INDEX": "--genomeSAindexNbases 8", 42 | "MAP": "--outSAMprimaryFlag AllBestScore --outFilterMultimapNmax 20", 43 | "EXTENSION": "" 44 | } 45 | } 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /workflows/summary.nf: -------------------------------------------------------------------------------- 1 | process make_rmd{ 2 | conda "summary.yaml" 3 | cpus THREADS 4 | cache 'lenient' 5 | //validExitStatus 0,1 6 | 7 | publishDir "${workflow.workDir}/../" , mode: 'link', 8 | saveAs: {filename -> 9 | if (filename == 'SUMMARY.html') "REPORTS/SUMMARY/SUMMARY.html" 10 | else if (filename.indexOf("log") > 0) "LOGS/REPORTS/SUMMARY/make_rmd.log" 11 | } 12 | 13 | input: 14 | path figs 15 | path tables 16 | 17 | output: 18 | path "*.html", emit: report 19 | path "log", emit: log 20 | 21 | script: 22 | """ 23 | ln -f \"${projectDir}/../REPORTS/SUMMARY/summary.Rmd\" .; 24 | touch log; 25 | Rscript --vanilla -e \"rmarkdown::render('summary.Rmd', params=list(root='.'), output_file='SUMMARY.html', quiet=TRUE)\" 2> log 26 | """ 27 | } 28 | 29 | 30 | workflow SUMMARY{ 31 | take: collection 32 | 33 | main: 34 | 35 | png_ch = Channel.fromPath("${projectDir}/../D{E,EU,AS,TU}/**/Figures/*.{png,pdf}") 36 | tab_ch = Channel.fromPath("${projectDir}/../D{E,EU,AS,TU}/**/Tables/*.tsv.gz") 37 | //png_ch.subscribe { println "PNG: $it" } 38 | //tab_ch.subscribe { println "TABLE: $it" } 39 | 40 | make_rmd(png_ch.collect(), tab_ch.collect()) 41 | 42 | emit: 43 | rmds = make_rmd.out.report 44 | } 45 | 46 | workflow{ 47 | SUMMARY(dummy) 48 | } -------------------------------------------------------------------------------- /workflows/premultiqc.nf: -------------------------------------------------------------------------------- 1 | 2 | QCENV=get_always('QCENV') 3 | QCBIN=get_always('QCBIN') 4 | QCPARAMS = get_always('fastqc_params_MULTI') ?: '' 5 | 6 | process collect_multi{ 7 | input: 8 | path check 9 | 10 | output: 11 | path "collect.txt", emit: done 12 | 13 | script: 14 | """ 15 | echo "$check Collection successful!" > collect.txt 16 | """ 17 | } 18 | 19 | 20 | process premultiqc{ 21 | conda "$QCENV"+".yaml" 22 | cpus THREADS 23 | cache 'lenient' 24 | //validExitStatus 0,1 25 | 26 | publishDir "${workflow.workDir}/../" , mode: 'link', 27 | saveAs: {filename -> 28 | if (filename.indexOf("zip") > 0) "QC/Multi/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip" 29 | else if (filename.indexOf("html") > 0) "QC/Multi/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html" 30 | else null 31 | } 32 | 33 | input: 34 | path samples 35 | 36 | output: 37 | path "*.{zip,html}", emit: multiqc_results 38 | 39 | script: 40 | """ 41 | export LC_ALL=en_US.utf8; export LC_ALL=C.UTF-8; multiqc -f --exclude picard --exclude gatk -k json -z -s 42 | """ 43 | } 44 | 45 | workflow PREMULTIQC{ 46 | take: 47 | otherqcs 48 | 49 | main: 50 | 51 | //SAMPLE CHANNELS 52 | multiqc(otherqcs.collect()) 53 | 54 | emit: 55 | mqcres = premultiqc.out.multiqc_results 56 | } 57 | -------------------------------------------------------------------------------- /docs/source/installation.rst: -------------------------------------------------------------------------------- 1 | .. _install: 2 | 3 | 4 | Installation 5 | ============ 6 | 7 | Install MONSDA via **conda** or **pip** 8 | ------------------------------------------- 9 | 10 | To install via **conda/mamba** in an environment named 'monsda' simply run 11 | 12 | .. code-block:: 13 | 14 | conda create -n monsda -c bioconda -c conda-forge monsda 15 | 16 | 17 | To install via **pip** you first need to create the **MONSDA** environment as found in the **envs** directory of this repository (simply clone with git clone) like so: 18 | 19 | .. code-block:: 20 | 21 | conda env create -n monsda -f MONSDA/envs/monsda.yaml 22 | 23 | 24 | The **envs** directory holds all the environments needed to run the pipelines in the **workflows** directory, these will be installed automatically alongside **MONSDA**. 25 | 26 | For that activate the **MONSDA** environment and run **pip** 27 | 28 | .. code-block:: 29 | 30 | conda activate monsda 31 | pip install MONSDA 32 | 33 | 34 | Install from source 35 | ------------------- 36 | 37 | Simply clone this repository with 38 | 39 | .. code-block:: 40 | 41 | git clone git@github.com:jfallmann/MONSDA.git 42 | 43 | You can then install dependencies as described for **pip** installation and manually run **setup.py**. 44 | 45 | Be aware that **MONSDA** is *version dependent*, so config files can only be run with the **specified** version of **MONSDA** in order to guarantee reproducibility by conserving dependencies and environments. -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "MONSDA" 3 | description = "MONSDA, Modular Organizer of Nextflow and Snakemake driven hts Data Analysis" 4 | readme = "README.md" 5 | license = { file = "LICENSE" } 6 | authors = [{name = "Joerg Fallmann", email = "fallmann.joerg@gmail.com"}] 7 | maintainers = [{name = "Joerg Fallmann", email = "fallmann.joerg@gmail.com"}] 8 | dynamic = ["scripts", "dependencies", "requires-python", "version"] 9 | 10 | 11 | [project.urls] 12 | Homepage = "https://github.com/jfallmann/MONSDA" 13 | Documentation = "https://monsda.readthedocs.io/en/latest" 14 | Repository = "https://github.com/jfallmann/MONSDA" 15 | Issues = "https://github.com/jfallmann/MONSDA/issues" 16 | 17 | [build-system] 18 | build-backend = "setuptools.build_meta" 19 | requires = [ 20 | "setuptools>=42", 21 | 'tomli; python_version >= "3.12.0"', 22 | "biopython>=1.83", 23 | "snakemake>=8.16.0", 24 | "black>=21.5b2", 25 | "flake8>=3.8.3", 26 | "isort>=5.13.2", 27 | "sphinx>=4.1.0", 28 | "versioneer>=0.20", 29 | ] 30 | 31 | [tool.versioneer] 32 | VCS = "git" 33 | style = "pep440" 34 | tag_prefix = "v" 35 | versionfile_build = "MONSDA/_version.py" 36 | versionfile_source = "MONSDA/_version.py" 37 | 38 | [tool.codespell] 39 | # Ref: https://github.com/codespell-project/codespell#using-a-config-file 40 | skip = '.git,*.pdf,*.svg,versioneer.py,*.css,test_*' 41 | check-hidden = true 42 | ignore-regex = '^\s*"image/\S+": ".*|\b[Mm]anuel[. ][Hh]oltgrewe\b' 43 | ignore-words-list = 'testin' 44 | 45 | -------------------------------------------------------------------------------- /workflows/simulatetrim.smk: -------------------------------------------------------------------------------- 1 | if paired == 'paired' or paired == 'singlecell': 2 | rule simulate_trim: 3 | input: r1 = lambda wildcards: "FASTQ/{rawfile}_R1.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_R1_dedup.fastq.gz", 4 | r2 = lambda wildcards: "FASTQ/{rawfile}_R2.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_R2_dedup.fastq.gz" 5 | output: r1 = "TRIMMED_FASTQ/{combo}/{file}_R1_trimmed.fastq.gz", 6 | r2 = "TRIMMED_FASTQ/{combo}/{file}_R2_trimmed.fastq.gz" 7 | threads: 1 8 | params: filetolink = lambda w, input: "{r}".format(r=os.path.abspath(input.r1)), 9 | filetolink2 = lambda w, input: "{r}".format(r=os.path.abspath(input.r2)) 10 | shell: "ln -s {params.filetolink} {output.r1} && ln -s {params.filetolink2} {output.r2}" 11 | 12 | else: 13 | rule simulate_trim: 14 | input: r1 = lambda wildcards: "FASTQ/{rawfile}.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_dedup.fastq.gz" 15 | output: r1 = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fastq.gz" 16 | threads: 1 17 | params: filetolink = lambda w, input: "{r}".format(r=os.path.abspath(input.r1)) 18 | shell: "ln -s {params.filetolink} {output.r1}" 19 | -------------------------------------------------------------------------------- /docs/source/runsmk.rst: -------------------------------------------------------------------------------- 1 | =================== 2 | Start a pipline run 3 | =================== 4 | 5 | 6 | Snakemake 7 | --------- 8 | 9 | Activate the **MONSDA** conda environment and run 10 | 11 | 12 | .. code-block:: 13 | 14 | monsda --help 15 | 16 | 17 | to see the help and available options that will be passed through to **Snakemake**. 18 | 19 | To start a job with **Snakemake**, which is the default, run 20 | 21 | .. code-block:: 22 | 23 | monsda -j NUMBER_OF_CORES -c YOUR_CONFIG.json --directory ${PWD} 24 | 25 | 26 | or add additional arguments for **Snakemake** as you see fit, 27 | **Snakemake** currently defaults to mamba as conda frontend. Please be aware that for that to work one should follow the recommendations at MAMBA_. However, using conda-libmamba-solver, the conda frontend can lead to an even better and more stable experience. We currently recommend to set a fixed directory to store environments (here conda_envs) and run the conda frontend. 28 | 29 | .. _MAMBA: https://mamba.readthedocs.io/en/latest/mamba-installation.html 30 | 31 | .. code-block:: 32 | 33 | --conda-frontend conda --conda-prefix path_to_conda_envs 34 | 35 | 36 | Nextflow 37 | -------- 38 | 39 | To run **MONSDA** in **Nextflow** mode just add '--nextflow' 40 | 41 | .. code-block:: 42 | 43 | monsda --nextflow -j NUMBER_OF_CORES -c YOUR_CONFIG.json --directory ${PWD} 44 | 45 | 46 | As with **Snakemake** additional arguments for **Nextflow** can be added and will be passed through. 47 | -------------------------------------------------------------------------------- /workflows/dorado.nf: -------------------------------------------------------------------------------- 1 | CALLERENV = get_always('BASECALLENV') 2 | CALLERBIN = get_always('BASECALLBIN') 3 | 4 | CALLERPARAMS = get_always('dorado_params_CALLER') ?: '' 5 | MODELPARAMS = get_always('dorado_params_MODEL') ?: '' 6 | 7 | //CALLERS PROCESSES 8 | 9 | process dorado{ 10 | conda "$CALLERENV"+".yaml" 11 | cpus THREADS 12 | cache 'lenient' 13 | //validExitStatus 0,1 14 | 15 | publishDir "${workflow.workDir}/../" , mode: 'link', 16 | saveAs: {filename -> 17 | if (filename.indexOf(".fastq.gz") > 0) "FASTQ/${CONDITION}/${file(filename).getName()}" 18 | else if (filename.indexOf(".log") > 0) "LOGS/BASECALL/${CONDITION}/${file(filename).getName()}" 19 | } 20 | 21 | input: 22 | path f5 23 | 24 | output: 25 | path "*.fastq.gz", emit: fastq 26 | path "*.log", emit: log 27 | 28 | script: 29 | fn = file(f5).getSimpleName() 30 | oc = fn+".fastq.gz" 31 | ol = fn+".log" 32 | sortmem = '30%' 33 | 34 | """ 35 | $CALLERBIN download --model $MODELPARAMS &> $ol && $CALLERBIN basecaller $CALLERPARAMS $MODELPARAMS . 2>> $ol 1> tmp.bam && samtools view -h tmp.bam|samtools fastq -n - | pigz 1> $oc 2>> $ol && rm -rf tmp.bam 36 | """ 37 | } 38 | 39 | workflow BASECALL{ 40 | take: collection 41 | 42 | main: 43 | 44 | P5SAMPLES = SAMPLES.collect{ 45 | element -> return "${workflow.workDir}/../RAW/"+element+"*.pod5" 46 | } 47 | 48 | p5samples_ch = Channel.fromPath(P5SAMPLES.sort()) 49 | 50 | dorado(p5samples_ch.collate(1)) 51 | 52 | emit: 53 | fastq = dorado.out.fastq 54 | logs = dorado.out.log 55 | } -------------------------------------------------------------------------------- /workflows/premultiqc.smk: -------------------------------------------------------------------------------- 1 | rule qcall: 2 | input: expand("QC/Multi/{condition}/multiqc_report.html", condition=str.join(os.sep, conditiononly(SAMPLES[0], config))) 3 | 4 | if paired == 'paired': 5 | rule multiqc: 6 | input: expand("QC/{rawfile}_{read}_fastqc.zip", rawfile=list(SAMPLES), read=['R1','R2']), 7 | output: html = report("QC/Multi/{condition}/multiqc_report.html", category="QC"), 8 | tmp = temp("QC/Multi/{condition}/tmp"), 9 | lst = "QC/Multi/{condition}/qclist.txt" 10 | log: "LOGS/{condition}/multiqc.log" 11 | conda: "qc.yaml" 12 | threads: 1 13 | shell: "OUT=$(dirname {output.html}); for i in {input};do echo $(dirname \"${{i}}\") >> {output.tmp};done; cat {output.tmp} |sort -u > {output.lst};export LC_ALL=en_US.utf8; export LC_ALL=C.UTF-8; multiqc -f --exclude picard --exclude gatk -k json -z -s -o $OUT -l {output.lst} 2> {log}" 14 | 15 | else: 16 | rule multiqc: 17 | input: expand("QC/{rawfile}_fastqc.zip", rawfile=list(SAMPLES)), 18 | output: html = report("QC/Multi/{condition}/multiqc_report.html", category="QC"), 19 | tmp = temp("QC/Multi/{condition}/tmp"), 20 | lst = "QC/Multi/{condition}/qclist.txt" 21 | log: "LOGS/{condition}/multiqc.log" 22 | conda: "qc.yaml" 23 | threads: 1 24 | shell: "OUT=$(dirname {output.html}); for i in {input};do echo $(dirname \"${{i}}\") >> {output.tmp};done; cat {output.tmp} |sort -u > {output.lst};export LC_ALL=en_US.utf8; export LC_ALL=C.UTF-8; multiqc -f --exclude picard --exclude gatk -k json -z -s -o $OUT -l {output.lst} 2> {log}" 25 | -------------------------------------------------------------------------------- /scripts/Shells/MergeGeneExpression_Cufflinks.sh: -------------------------------------------------------------------------------- 1 | #!/bin/env bash 2 | COND=$1 3 | SOURCE=$2 4 | PATTERN=$3 5 | SAMPLES=$4 6 | 7 | #cut -d$'\t' -f1 L12270_09_WT_d4_1_2_IN_R1_mapped_sorted.transcript_counts > tmp_counts 8 | 9 | for i in *$COND\_*$SOURCE\_*$PATTERN\_unique/genes.fpkm_tracking;do 10 | echo $i 11 | tail -n+2 $i|cut -d$'\t' -f1,5,10 >> GENECOUNTS_$COND\_$SOURCE\_tmp 12 | done 13 | 14 | #env sa=$SAMPLES perl -lan -F'\t' -e 'BEGIN{%exp=()}; $exp{$F[0]}+=$F[1]; END{foreach $key(keys %exp){print $key,"\t",$exp{$key}/$ENV{sa}}}' COUNTS_$COND\_$SOURCE\_tmp |sort -k1,1d > COUNTS_$COND\_$SOURCE.transcript.fpkm 15 | 16 | #rm -f COUNTS_$COND\_$SOURCE\_tmp 17 | 18 | #join -1 1 -2 1 <(sort -k1,1d Gene_sum_bfx656.hg38.e81) <(sort -k1,1d COUNTS/Featurecounter/bfx656/Gene_sum_mapped_sorted_unique.counts) > Compare_D_L_uni 19 | #join -1 1 -2 1 <(sort -k1,1d Gene_sum_bfx656.hg38.e81) <(sort -k1,1d COUNTS/Featurecounter/bfx656/Gene_sum_mapped_sortedall) > Compare_D_L 20 | 21 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh KO_d4 IP R1 3 22 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh KO_d0 IP R1 3 23 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh WT_d4 IP R1 3 24 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh WT_d0 IP R1 3 25 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh WT_d0 IN R1 3 26 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh WT_d4 IN R1 3 27 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh KO_d4 IN R1 3 28 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh KO_d0 IN R1 3 29 | -------------------------------------------------------------------------------- /scripts/lib/_lib.R: -------------------------------------------------------------------------------- 1 | ## FUNCS 2 | get_gene_name <- function(id, df) { 3 | if (!"gene_id" %in% colnames(df)) { 4 | message("WARNING: gene_id not found as colname, will be replaced by first match of colname with ID") 5 | colnames(df)[grepl("id$", names(df), ignore.case = TRUE)][1] <- "gene_id" 6 | } 7 | if (!"gene_name" %in% colnames(df)) { 8 | message("WARNING: gene_name not found as colname, will be replaced by gene column, please make sure the gtf file is in the correct format") 9 | df$gene_name <- df$gene 10 | } 11 | name_list <- df$gene_name[df["type"] == "gene" & df["gene_id"] == id] 12 | if (length(unique(name_list)) == 1) { 13 | return(name_list[1]) 14 | } else { 15 | message(paste("WARNING: ambigous gene id: ", id)) 16 | return(paste(unique(name_list), sep = "|")) 17 | } 18 | } 19 | 20 | 21 | get_exon_name <- function(id, df) { 22 | if (!"gene_id" %in% colnames(df)) { 23 | message("WARNING: gene_id not found as colname, will be replaced by first match of colname with ID") 24 | colnames(df)[grepl("id$", names(df), ignore.case = TRUE)][1] <- "gene_id" 25 | } 26 | if (!"gene_name" %in% colnames(df)) { 27 | message("WARNING: gene_name not found as colname, will be replaced by gene column, please make sure the gtf file is in the correct format") 28 | df$gene_name <- df$gene 29 | } 30 | name_list <- df$gene_name[df["type"] == "exon" & df["gene_id"] == id] 31 | if (length(unique(name_list)) == 1) { 32 | return(name_list[1]) 33 | } else { 34 | message(paste("WARNING: ambigous gene id: ", id)) 35 | return(paste(unique(name_list), sep = "|")) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /envs/edger_DAS.yaml: -------------------------------------------------------------------------------- 1 | name: edger_DAS 2 | channels: 3 | - bioconda 4 | - conda-forge 5 | - defaults 6 | dependencies: 7 | - bioconductor-biobase =2.46.0 8 | - bioconductor-biocgenerics =0.32.0 9 | - bioconductor-biocparallel =1.20.0 10 | - bioconductor-biostrings =2.54.0 11 | - bioconductor-delayedarray =0.12.0 12 | - bioconductor-edger =3.28.0 13 | - bioconductor-genomeinfodb =1.22.0 14 | - bioconductor-genomeinfodbdata =1.2.2 15 | - bioconductor-genomicalignments =1.22.0 16 | - bioconductor-genomicranges =1.38.0 17 | - bioconductor-iranges =2.20.0 18 | - bioconductor-limma =3.42.0 19 | - bioconductor-rhtslib =1.18.0 20 | - bioconductor-rsamtools =2.2.0 21 | - bioconductor-rtracklayer =1.46.0 22 | - bioconductor-s4vectors =0.24.0 23 | - bioconductor-summarizedexperiment =1.16.0 24 | - bioconductor-xvector =0.26.0 25 | - bioconductor-zlibbioc =1.32.0 26 | - r-assertthat =0.2.1 27 | - r-base =3.6.2 28 | - r-bh =1.69.0_1 29 | - r-bitops =1.0_6 30 | - r-cli =1.1.0 31 | - r-crayon =1.3.4 32 | - r-dplyr =0.8.0.1 33 | - r-fansi =0.4.0 34 | - r-formatr =1.7 35 | - r-futile.logger =1.4.3 36 | - r-futile.options =1.0.1 37 | - r-glue =1.3.1 38 | - r-lambda.r =1.2.4 39 | - r-lattice =0.20_38 40 | - r-locfit =1.5_9.1 41 | - r-magrittr =1.5 42 | - r-matrix =1.2_18 43 | - r-matrixstats =0.57.0 44 | - r-pillar =1.3.1 45 | - r-pkgconfig =2.0.2 46 | - r-plogr =0.2.0 47 | - r-purrr =0.3.2 48 | - r-r6 =2.4.0 49 | - r-rcpp =1.0.3 50 | - r-rcurl =1.98_1.1 51 | - r-rlang =0.3.4 52 | - r-snow =0.4_3 53 | - r-statmod =1.4.33 54 | - r-tibble =2.1.1 55 | - r-tidyselect =0.2.5 56 | - r-utf8 =1.1.4 57 | - r-xml =3.99_0.3 58 | - readline =8.2 59 | - sed =4.7 -------------------------------------------------------------------------------- /envs/edger_DEU.yaml: -------------------------------------------------------------------------------- 1 | name: edger_DEU 2 | channels: 3 | - bioconda 4 | - conda-forge 5 | - defaults 6 | dependencies: 7 | - bioconductor-biobase =2.46.0 8 | - bioconductor-biocgenerics =0.32.0 9 | - bioconductor-biocparallel =1.20.0 10 | - bioconductor-biostrings =2.54.0 11 | - bioconductor-delayedarray =0.12.0 12 | - bioconductor-edger =3.28.0 13 | - bioconductor-genomeinfodb =1.22.0 14 | - bioconductor-genomeinfodbdata =1.2.2 15 | - bioconductor-genomicalignments =1.22.0 16 | - bioconductor-genomicranges =1.38.0 17 | - bioconductor-iranges =2.20.0 18 | - bioconductor-limma =3.42.0 19 | - bioconductor-rhtslib =1.18.0 20 | - bioconductor-rsamtools =2.2.0 21 | - bioconductor-rtracklayer =1.46.0 22 | - bioconductor-s4vectors =0.24.0 23 | - bioconductor-summarizedexperiment =1.16.0 24 | - bioconductor-xvector =0.26.0 25 | - bioconductor-zlibbioc =1.32.0 26 | - r-assertthat =0.2.1 27 | - r-base =3.6.2 28 | - r-bh =1.69.0_1 29 | - r-bitops =1.0_6 30 | - r-cli =1.1.0 31 | - r-crayon =1.3.4 32 | - r-dplyr =0.8.0.1 33 | - r-fansi =0.4.0 34 | - r-formatr =1.7 35 | - r-futile.logger =1.4.3 36 | - r-futile.options =1.0.1 37 | - r-glue =1.3.1 38 | - r-lambda.r =1.2.4 39 | - r-lattice =0.20_38 40 | - r-locfit =1.5_9.1 41 | - r-magrittr =1.5 42 | - r-matrix =1.2_18 43 | - r-matrixstats =0.57.0 44 | - r-pillar =1.3.1 45 | - r-pkgconfig =2.0.2 46 | - r-plogr =0.2.0 47 | - r-purrr =0.3.2 48 | - r-r6 =2.4.0 49 | - r-rcpp =1.0.3 50 | - r-rcurl =1.98_1.1 51 | - r-rlang =0.3.4 52 | - r-snow =0.4_3 53 | - r-statmod =1.4.33 54 | - r-tibble =2.1.1 55 | - r-tidyselect =0.2.5 56 | - r-utf8 =1.1.4 57 | - r-xml =3.99_0.3 58 | - readline =8.2 59 | - sed =4.7 -------------------------------------------------------------------------------- /workflows/guppy.smk: -------------------------------------------------------------------------------- 1 | CALLERBIN, CALLERENV = env_bin_from_config(config,'BASECALL') 2 | 3 | wildcard_constraints: 4 | rawfile = '|'.join(SAMPLES) 5 | 6 | rule themall: 7 | input: expand("FASTQ/{rawfile}.fastq.gz", rawfile = SAMPLES) 8 | 9 | rule call_base: 10 | input: f5 = "RAW/{rawfile}.fast5" 11 | output: fq = "FASTQ/{rawfile}.fastq.gz", 12 | summary = "FASTQ/{rawfile}_summary.txt", 13 | telemetry = "FASTQ/{rawfile}_telemetry.js" 14 | log: "LOGS/BASECALL/{rawfile}_guppy.log" 15 | conda: ""+CALLERENV+".yaml" 16 | threads: MAXTHREAD 17 | params: caller = CALLERBIN, 18 | cpara = lambda wildcards, input: tool_params(SAMPLES[0], None, config, 'BASECALL', CALLERENV)['OPTIONS'].get('BASECALL', ""), 19 | cmodel = lambda wildcards, input: tool_params(SAMPLES[0], None, config, 'BASECALL', CALLERENV)['OPTIONS'].get('MODEL', ""), 20 | f5dir = lambda wildcards, input: os.path.dirname(input.f5), 21 | f5file = lambda wildcards, input: os.path.basename(input.f5), 22 | fqdir = lambda wildcards, output: os.path.dirname(output.fq) 23 | shell: " echo \"{params.f5file}\" > {params.f5dir}/f5list && {params.caller} {params.cpara} -c {params.cmodel} --compress_fastq -i {params.f5dir} --input_file_list {params.f5dir}/f5list -s {params.f5dir}/BASECALL 2> {log} && cat {params.f5dir}/BASECALL/pass/fastq_runid_*.fastq.gz > {output.fq} && rm -f {params.f5dir}/BASECALL/pass/fastq_runid_*.fastq.gz && cat {params.f5dir}/BASECALL/*.log >> {log} && rm -f {params.f5dir}/BASECALL/*.log && mv -f {params.f5dir}/BASECALL/sequencing_summary.txt {output.summary} && mv -f {params.f5dir}/BASECALL/sequencing_telemetry.js {output.telemetry} && rm -f {params.f5dir}/f5list" -------------------------------------------------------------------------------- /scripts/Shells/MergeExpression_RNAcounter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/env bash 2 | COND=$1 3 | SOURCE=$2 4 | PATTERN=$3 5 | SAMPLES=$4 6 | 7 | cut -d$'\t' -f1 L12270_09_WT_d4_1_2_IN_R1_mapped_sorted.transcript_counts > tmp_counts 8 | 9 | for i in *$COND\_*$SOURCE\_*$PATTERN\.transcript_counts;do 10 | echo $i 11 | join -1 1 -2 1 <(sort -k1,1d tmp_counts) <(sort -k1,1d $i|cut -d$'\t' -f1,2,3) > COUNTS_$COND\_$SOURCE && cp -f COUNTS_$COND\_$SOURCE tmp_counts 12 | done 13 | rm -f tmp_counts 14 | 15 | awk 'BEGIN{FS=" ";OFS=FS}{s=1;t=0;for(i=2;i<=NF;i+=2){if($i >=1){t+=$i;s++}} print $1,t/s;t=0;s=1}' COUNTS_$COND\_$SOURCE |sort -k1,1d > $SOURCE\_$COND\_$PATTERN\.transcript.counts 16 | 17 | #join -1 1 -2 1 <(sort -k1,1d Gene_sum_bfx656.hg38.e81) <(sort -k1,1d COUNTS/Featurecounter/bfx656/Gene_sum_mapped_sorted_unique.counts) > Compare_D_L_uni 18 | #join -1 1 -2 1 <(sort -k1,1d Gene_sum_bfx656.hg38.e81) <(sort -k1,1d COUNTS/Featurecounter/bfx656/Gene_sum_mapped_sortedall) > Compare_D_L 19 | 20 | #bash ../../../Workflows/scripts/Shells/MergeExpression_RNAcounter.sh KO_d4 IP mapped_sorted 3 21 | #bash ../../../Workflows/scripts/Shells/MergeExpression_RNAcounter.sh KO_d0 IP mapped_sorted 3 22 | #bash ../../../Workflows/scripts/Shells/MergeExpression_RNAcounter.sh WT_d4 IP mapped_sorted 3 23 | #bash ../../../Workflows/scripts/Shells/MergeExpression_RNAcounter.sh WT_d0 IP mapped_sorted 3 24 | #bash ../../../Workflows/scripts/Shells/MergeExpression_RNAcounter.sh WT_d0 IN mapped_sorted 3 25 | #bash ../../../Workflows/scripts/Shells/MergeExpression_RNAcounter.sh WT_d4 IN mapped_sorted 3 26 | #bash ../../../Workflows/scripts/Shells/MergeExpression_RNAcounter.sh KO_d4 IN mapped_sorted 3 27 | #bash ../../../Workflows/scripts/Shells/MergeExpression_RNAcounter.sh KO_d0 IN mapped_sorted 3 28 | -------------------------------------------------------------------------------- /envs/perl.yaml: -------------------------------------------------------------------------------- 1 | name: perl 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | - r 7 | dependencies: 8 | - grep >=2.14 9 | - perl >=5.26.2 10 | - perl-app-cpanminus >=1.7044 11 | - perl-autoloader >=5.74 12 | - perl-bioperl >=1.6.924 13 | - perl-capture-tiny >=0.48 14 | - perl-carp >=1.38 15 | - perl-constant >=1.33 16 | - perl-data-dumper >=2.173 17 | - perl-dbi >=1.642 18 | - perl-dynaloader >=1.25 19 | - perl-encode >=2.88 20 | - perl-exporter >=5.72 21 | - perl-extutils-cppguess >=0.12 22 | - perl-extutils-makemaker >=7.36 23 | - perl-file-find >=1.27 24 | - perl-file-path >=2.16 25 | - perl-file-slurp >=9999.27 26 | - perl-file-spec >=3.48_01 27 | - perl-file-temp >=0.2304 28 | - perl-findbin-real >=1.05 29 | - perl-getopt-long >=2.50 30 | - perl-ipc-cmd >=1.02 31 | - perl-list-util >=1.38 32 | - perl-locale-maketext-simple >=0.21 33 | - perl-math-cdf >=0.1 34 | - perl-math-round >=0.07 35 | - perl-module-corelist >=5.20190524 36 | - perl-module-load >=0.32 37 | - perl-module-load-conditional >=0.68 38 | - perl-module-metadata >=1.000036 39 | - perl-params-check >=0.38 40 | - perl-parent >=0.236 41 | - perl-path-class >=0.37 42 | - perl-perl-ostype >=1.010 43 | - perl-perlio-gzip >=0.20 44 | - perl-pod-escapes >=1.07 45 | - perl-pod-simple >=3.35 46 | - perl-pod-usage >=1.69 47 | - perl-posix >=1.38_03 48 | - perl-set-intervaltree >=0.12 49 | - perl-socket >=2.027 50 | - perl-symbol >=1.07 51 | - perl-test >=1.26 52 | - perl-test-harness >=3.42 53 | - perl-threaded >=5.26.0 54 | - perl-tie-hash >=1.05 55 | - perl-tie-hash-indexed >=0.05 56 | - perl-time-hires >=1.9760 57 | - perl-version >=0.9924 58 | - perl-xsloader >=0.24 59 | - perl-yaml >=1.29 60 | - pigz >=2.6 61 | 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #temps 2 | *~ 3 | \#* 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | #lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | 62 | # Flask stuff: 63 | instance/ 64 | .webassets-cache 65 | 66 | # Scrapy stuff: 67 | .scrapy 68 | 69 | # Sphinx documentation 70 | docs/_build/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # pyenv 79 | .python-version 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule 83 | 84 | # SageMath parsed files 85 | *.sage.py 86 | 87 | # Environments 88 | .env 89 | .venv 90 | env/ 91 | venv/ 92 | ENV/ 93 | env.bak/ 94 | venv.bak/ 95 | 96 | # Spyder project settings 97 | .spyderproject 98 | .spyproject 99 | 100 | # Rope project settings 101 | .ropeproject 102 | 103 | # mkdocs documentation 104 | /site 105 | 106 | # mypy 107 | .mypy_cache/ 108 | -------------------------------------------------------------------------------- /scripts/Shells/UniqueSam_woPicard.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | in=$1 4 | out=$2 5 | threads=$3 6 | specialmappers="${4:-}" 7 | 8 | if [[ "$1" == *.gz* ]] 9 | then 10 | samtools view -H <(zcat $in) | grep '@HD' | pigz -p $threads -f > $out 11 | samtools view -H <(zcat $in) | grep '@SQ' | sort -t$'\t' -k1,1 -k2,2V | pigz -p $threads -f >> $out 12 | samtools view -H <(zcat $in) | grep '@RG' | pigz -p $threads -f >> $out 13 | samtools view -H <(zcat $in) | grep '@PG' | pigz -p $threads -f >> $out 14 | else 15 | samtools view -H <(cat $in)|grep '@HD' | pigz -p $threads -f > $out 16 | samtools view -H <(cat $in)|grep '@SQ' | sort -t$'\t' -k1,1 -k2,2V |pigz -p $threads -f >> $out 17 | samtools view -H <(cat $in)|grep '@RG' | pigz -p $threads -f >> $out 18 | samtools view -H <(cat $in)|grep '@PG' | pigz -p $threads -f >> $out 19 | fi 20 | 21 | if [[ "$1" == *bwa* ]] || [[ "$specialmappers" == *bwa* ]] 22 | then 23 | if [[ "$1" == *.gz* ]] 24 | then 25 | zcat $in | grep -v "^@"| grep -v -e $'\t''XA:Z:' -e $'\t''SA:Z:' | pigz -p $threads -f >> $out 26 | else 27 | cat $in | grep -v "^@"| grep -v -e $'\t''XA:Z:' -e $'\t''SA:Z:' | pigz -p $threads -f >> $out 28 | fi 29 | elif [[ "$1" == *minimap* ]] || [[ "$specialmappers" == *minimap* ]] 30 | then 31 | if [[ "$1" == *.gz* ]] 32 | then 33 | zcat $in | grep -v "^@"| perl -wlane 'print if $F[4] >=60'| pigz -p $threads -f >> $out 34 | else 35 | cat $in | grep -v "^@"| perl -wlane 'print if $F[4] >=60' | pigz -p $threads -f >> $out 36 | fi 37 | else 38 | if [[ "$1" == *.gz* ]] 39 | then 40 | zcat $in | grep -v "^@" | grep -w -P "NH:i:1|tp:A:P" | pigz -p $threads -f >> $out 41 | else 42 | cat $in | grep -v "^@" | grep -w -P "NH:i:1|tp:A:P" | pigz -p $threads -f >> $out 43 | fi 44 | fi 45 | -------------------------------------------------------------------------------- /workflows/header.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | // includes 4 | // include {} from "../lib/Collection.groovy" 5 | 6 | // NOTE 7 | // ALWAYS COMMENT LINES WITH '//', DO NOT USE MULTI LINE COMMENTS AS THE PARSER WILL NOT IGNORE MIDDLE LINES AND THIS WILL CAUSE CHAOS 8 | 9 | //Version Check 10 | nextflowVersion = '>=20.01.0.5264' 11 | nextflow.enable.dsl=2 12 | 13 | //define unset Params 14 | def get_always(parameter){ 15 | if (!params.containsKey(parameter)){ 16 | params.put(parameter, null) 17 | } 18 | return params[parameter] 19 | } 20 | 21 | //Params from CL 22 | REFERENCE = "${workflow.workDir}/../"+get_always('REFERENCE') 23 | REFDIR = "${workflow.workDir}/../"+get_always('REFDIR') 24 | BINS = get_always('BINS') 25 | THREADS = get_always('MAXTHREAD') 26 | PAIRED = get_always('PAIRED') ?: null 27 | RUNDEDUP = get_always('RUNDEDUP') ?: null 28 | PREDEDUP = get_always('PREDEDUP') ?: null 29 | STRANDED = get_always('STRANDED') ?: null 30 | IP = get_always('IP') ?: null 31 | CONDITION = get_always('CONDITION') ?: null 32 | COMBO = get_always('COMBO') ?: '' 33 | SCOMBO = get_always('SCOMBO') ?: '' 34 | SAMPLES = get_always('SAMPLES').split(',') ?: null 35 | LONGSAMPLES = get_always('LONGSAMPLES').split(',') ?: null 36 | SHORTSAMPLES = get_always('SHORTSAMPLES').split(',') ?: null 37 | SETS = get_always('SETS') ?: null 38 | //dummy 39 | dummy = Channel.fromPath("${workflow.workDir}/../LOGS/MONSDA.log") 40 | 41 | //SAMPLE CHANNELS 42 | if (PAIRED == 'paired' || PAIRED == 'singlecell'){ 43 | RSAMPLES = SAMPLES.collect{ 44 | element -> return "${workflow.workDir}/../FASTQ/"+element+"_{R2,R1}.*fastq.gz" 45 | } 46 | }else{ 47 | RSAMPLES=SAMPLES.collect{ 48 | element -> return "${workflow.workDir}/../FASTQ/"+element+".*fastq.gz" 49 | } 50 | } 51 | 52 | samples_ch = Channel.fromPath(RSAMPLES) -------------------------------------------------------------------------------- /scripts/Shells/MergeExpression_Cufflinks.sh: -------------------------------------------------------------------------------- 1 | #!/bin/env bash 2 | COND=$1 3 | SOURCE=$2 4 | PATTERN=$3 5 | SAMPLES=$4 6 | 7 | #cut -d$'\t' -f1 L12270_09_WT_d4_1_2_IN_R1_mapped_sorted.transcript_counts > tmp_counts 8 | 9 | for i in *$COND\_*$SOURCE\_*$PATTERN\/transcripts.gtf;do 10 | echo $i 11 | cut -d$'\t' -f9 $i|perl -lan -F'; ' -e 'BEGIN{$trans;%exp=()};for(0..$#F){($line=$F[$_])=~s/\"//g;$line=~s/^\s//g;@tmp=split(/\s/,$line);if($tmp[0] eq "transcript_id"){$trans=$tmp[1]} if($tmp[0] eq "FPKM"){$exp{$trans}=$tmp[1]}}END{foreach $key(keys %exp){print $key,"\t",$exp{$key}}}' - >> COUNTS_$COND\_$SOURCE\_tmp 12 | done 13 | 14 | env sa=$SAMPLES perl -lan -F'\t' -e 'BEGIN{%exp=()}; $exp{$F[0]}+=$F[1]; END{foreach $key(keys %exp){print $key,"\t",$exp{$key}/$ENV{sa}}}' COUNTS_$COND\_$SOURCE\_tmp |sort -k1,1d > COUNTS_$COND\_$SOURCE.transcript.fpkm 15 | 16 | #rm -f COUNTS_$COND\_$SOURCE\_tmp 17 | 18 | #join -1 1 -2 1 <(sort -k1,1d Gene_sum_bfx656.hg38.e81) <(sort -k1,1d COUNTS/Featurecounter/bfx656/Gene_sum_mapped_sorted_unique.counts) > Compare_D_L_uni 19 | #join -1 1 -2 1 <(sort -k1,1d Gene_sum_bfx656.hg38.e81) <(sort -k1,1d COUNTS/Featurecounter/bfx656/Gene_sum_mapped_sortedall) > Compare_D_L 20 | 21 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh KO_d4 IP R1 3 22 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh KO_d0 IP R1 3 23 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh WT_d4 IP R1 3 24 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh WT_d0 IP R1 3 25 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh WT_d0 IN R1 3 26 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh WT_d4 IN R1 3 27 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh KO_d4 IN R1 3 28 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh KO_d0 IN R1 3 29 | -------------------------------------------------------------------------------- /workflows/ciri2.smk: -------------------------------------------------------------------------------- 1 | CBIN, CENV = env_bin_from_config(config, 'CIRCS') 2 | 3 | if not 'bwa' in combo or not 'bwa' in scombo: 4 | log.warning('Ciri2 needs BWA input, can only be used with BWA in mapping step') 5 | 6 | if not rundedup: 7 | rule themall: 8 | input: expand("CIRCS/{combo}/{file}_circs", combo=combo, file=samplecond(SAMPLES, config)) 9 | else: 10 | rule themall: 11 | input: expand("CIRCS/{combo}/{file}_{type}", combo=combo, file=samplecond(SAMPLES, config), type=['sorted', 'sorted_dedup']) 12 | 13 | rule FindCircs: 14 | input: sam = expand("MAPPED/{scombo}/{{file}}_mapped_sorted.sam.gz", scombo=scombo), 15 | ref = REFERENCE, 16 | anno = ANNOTATION 17 | output: circs = "CIRCS/{combo}/{file}_circs", 18 | tmp = temp(directory("CIRCS/{combo}/{file}_tmp")), 19 | ts = temp("CIRCS/{combo}/{file}_tmp.sam"), 20 | ta = temp("CIRCS/{combo}/{file}_tmp.gtf"), 21 | tf = temp("CIRCS/{combo}/{file}_tmp.fa") 22 | log: "LOGS/CIRCS/{combo}/{file}_ciri2.log" 23 | conda: ""+CENV+".yaml" 24 | threads: MAXTHREAD 25 | params: cpara = lambda wildcards: tool_params(wildcards.file, None, config, "CIRCS", CENV)['OPTIONS'].get('CIRC', ""), 26 | circ = CBIN 27 | shell: "set +o pipefail; export LC_ALL=C; if [[ -n \"$(zcat {input.sam} | head -c 1 | tr \'\\0\\n\' __)\" ]] ;then mkdir -p {output.tmp} && zcat {input.sam}|samtools sort -n -@ {threads} -u -O sam -T {output.tmp} > {output.ts} && zcat {input.anno} > {output.ta} && zcat {input.ref} > {output.tf} && perl {params.circ} -I {output.ts} -O {output.circs} -F {output.tf} -T {threads} -A {output.ta} -G {log} {params.cpara} &>> {log}; else gzip < /dev/null > {output.circs}; echo \"File {input.sam} empty\" >> {log}; fi; touch CIRIerror.log && cat CIRIerror.log >> {log} && echo '' > CIRIerror.log && touch {output.circs} && mkdir -p {output.tmp}" 28 | -------------------------------------------------------------------------------- /workflows/umitools_dedup.nf: -------------------------------------------------------------------------------- 1 | DEDUPENV=get_always('DEDUPENV') 2 | DEDUPBIN=get_always('DEDUPBIN') 3 | 4 | DEDUPPARAMS = get_always('umitools_params_DEDUP') ?: '' 5 | 6 | process dedup_bam{ 7 | conda "$DEDUPENV"+".yaml" 8 | cpus THREADS 9 | cache 'lenient' 10 | //validExitStatus 0,1 11 | 12 | publishDir "${workflow.workDir}/../" , mode: 'link', 13 | saveAs: {filename -> 14 | if (filename.endsWith("_dedup.bam")) "MAPPED/${COMBO}/${CONDITION}/${file(filename).getName()}" 15 | else if (filename.indexOf("_dedup.bam.bai") > 0) "MAPPED/${COMBO}/${CONDITION}/${file(filename).getName()}" 16 | else if (filename.indexOf("dedup.log") > 0) "LOGS/${COMBO}/${CONDITION}/DEDUP/${file(filename).getName()}" 17 | else null 18 | } 19 | 20 | input: 21 | path todedup 22 | path bami 23 | 24 | output: 25 | path "*_dedup.bam", emit: bam 26 | path "*_dedup.bam.bai", emit: bai 27 | path "*_dedup.log", emit: logs 28 | 29 | script: 30 | bams = todedup[0] 31 | bais = todedup[1] 32 | outf = bams.getSimpleName()+"_dedup.bam" 33 | outl = bams.getSimpleName()+"_dedup.log" 34 | if (PAIRED == 'paired'){ 35 | """ 36 | mkdir tmp && $DEDUPBIN dedup $DEDUPPARAMS --temp-dir tmp --log=$outl --paired --stdin=$bams --stdout=$outf && samtools index $outf >> $outl 37 | """ 38 | } 39 | else{ 40 | """ 41 | mkdir tmp && $DEDUPBIN dedup $DEDUPPARAMS --temp-dir tmp --log=$outl --stdin=$bams --stdout=$outf && samtools index $outf >> $outl 42 | """ 43 | } 44 | } 45 | 46 | workflow DEDUPBAM{ 47 | take: 48 | map 49 | mapi 50 | mapu 51 | mapui 52 | 53 | main: 54 | dedup_bam(map.concat(mapu), mapi.concat(mapui)) 55 | 56 | emit: 57 | dedup = dedup_bam.out.bam 58 | dedupbai = dedup_bam.out.bai 59 | deduplog = dedup_bam.out.logs 60 | } 61 | 62 | 63 | -------------------------------------------------------------------------------- /workflows/simulatetrim.nf: -------------------------------------------------------------------------------- 1 | T1SAMPLES = null 2 | T2SAMPLES = null 3 | 4 | process trim{ 5 | //conda "$TOOLENV"+".yaml" 6 | cpus THREADS 7 | cache 'lenient' 8 | //validExitStatus 0,1 9 | 10 | publishDir "${workflow.workDir}/../" , mode: 'copy', 11 | saveAs: {filename -> 12 | if (filename.indexOf("_trimmed.fastq.gz") > 0) "TRIMMED_FASTQ/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.fastq.gz" 13 | else if (filename.indexOf("report.txt") >0) "TRIMMED_FASTQ/${COMBO}/${CONDITION}/Trimming_report.txt" 14 | else null 15 | } 16 | 17 | input: 18 | path reads 19 | 20 | output: 21 | path "*trimmed.fastq.gz" , emit: trim 22 | path "Trimming_report.txt", emit: rep 23 | 24 | script: 25 | if (PAIRED == 'paired' || PAIRED == 'singlecell'){ 26 | r1 = reads[0] 27 | r2 = reads[1] 28 | a="Trimming_report.txt" 29 | b=file(r1).getName().replace(".fastq.gz", "_trimmed.fastq.gz") 30 | c=file(r2).getName().replace(".fastq.gz", "_trimmed.fastq.gz") 31 | """ 32 | ln -sf $r1 $b ; ln -sf $r2 $c; echo "simulated $r1 $r2 trimming" > $a 33 | """ 34 | }else{ 35 | a="Trimming_report.txt" 36 | b=file(reads).getName().replace(".fastq.gz", "_trimmed.fastq.gz") 37 | """ 38 | ln -sf $reads $b ; echo "simulated $reads trimming" > $a 39 | """ 40 | } 41 | } 42 | 43 | workflow TRIMMING{ 44 | take: 45 | collection 46 | 47 | main: 48 | 49 | if ( PREDEDUP == 'enabled' ){ 50 | trim(collection) 51 | } else if ( collection.toList().contains('MONSDA.log') || collection.isEmpty()){ 52 | if (PAIRED == 'paired' || PAIRED == 'singlecell'){ 53 | trim(samples_ch.collate(2)) 54 | } 55 | else{ 56 | trim(samples_ch.collate(1)) 57 | } 58 | } else{ 59 | trim(collection) 60 | } 61 | 62 | emit: 63 | trimmed = trim.out.trim 64 | report = trim.out.rep 65 | } -------------------------------------------------------------------------------- /workflows/guppy.nf: -------------------------------------------------------------------------------- 1 | CALLERENV = get_always('BASECALLENV') 2 | CALLERBIN = get_always('BASECALLBIN') 3 | 4 | CALLERPARAMS = get_always('guppy_params_BASECALL') ?: '' 5 | MODELPARAMS = get_always('guppy_params_MODEL') ?: '' 6 | 7 | //CALLERS PROCESSES 8 | 9 | process guppy{ 10 | conda "$CALLERENV"+".yaml" 11 | cpus THREADS 12 | cache 'lenient' 13 | //validExitStatus 0,1 14 | 15 | publishDir "${workflow.workDir}/../" , mode: 'link', 16 | saveAs: {filename -> 17 | if (filename.indexOf(".fastq.gz") > 0) "FASTQ/${CONDITION}/${file(filename).getName()}" 18 | else if (filename.indexOf("_summary.txt") > 0) "FASTQ/${CONDITION}/${file(filename).getName()}" 19 | else if (filename.indexOf("_telemetry.js") > 0) "FASTQ/${CONDITION}/${file(filename).getName()}" 20 | else if (filename.indexOf(".log") > 0) "LOGS/BASECALL/${CONDITION}/${file(filename).getName()}" 21 | } 22 | 23 | input: 24 | path f5 25 | 26 | output: 27 | path ".fastq.gz", emit: fastq 28 | path "*_telemetry.js", emit: telemetry 29 | path "*_summary.txt", emit: summary 30 | path "*.log", emit: log 31 | 32 | script: 33 | fn = file(f5).getSimpleName() 34 | oc = fn+".fastq.gz" 35 | ol = fn+".log" 36 | sortmem = '30%' 37 | 38 | """ 39 | mkdir -p TMP; echo \"${f5}\" > f5list && $CALLERBIN $CALLERPARAMS -c $MODELPARAMS --compress_fastq -i . --input_file_list f5list -s TMP 2> $ol && cat TMP/pass/fastq_runid_*.fastq.gz > $oc && cat TMP/*.log >> $ol && mv -f TMP/sequencing_summary.txt . && mv -f TMP/sequencing_telemetry.js . && rm -rf TMP 40 | """ 41 | } 42 | 43 | workflow BASECALL{ 44 | take: collection 45 | 46 | main: 47 | 48 | F5SAMPLES = SAMPLES.collect{ 49 | element -> return "${workflow.workDir}/../RAW/"+element+"*.fast5" 50 | } 51 | 52 | f5samples_ch = Channel.fromPath(F5SAMPLES.sort()) 53 | 54 | guppy(f5samples_ch.collate(1)) 55 | 56 | emit: 57 | fastq = guppy.out.fastq 58 | logs = guppy.out.log 59 | } -------------------------------------------------------------------------------- /workflows/picard_dedup.nf: -------------------------------------------------------------------------------- 1 | DEDUPENV=get_always('DEDUPENV') 2 | DEDUPBIN=get_always('DEDUPBIN') 3 | DEDUPPARAMS = get_always('picard_params_DEDUP') ?: '' 4 | JAVAPARAMS = get_always('picard_params_JAVA') ?: '' 5 | 6 | process dedup_bam{ 7 | conda "$DEDUPENV"+".yaml" 8 | cpus THREADS 9 | cache 'lenient' 10 | //validExitStatus 0,1 11 | 12 | publishDir "${workflow.workDir}/../" , mode: 'link', 13 | saveAs: {filename -> 14 | if (filename.endsWith("_dedup.bam")) "MAPPED/${COMBO}/${CONDITION}/${file(filename).getName()}" 15 | else if (filename.indexOf("_dedup.bam.bai") > 0) "MAPPED/${COMBO}/${CONDITION}/${file(filename).getName()}" 16 | else if (filename.indexOf("dedup.log") > 0) "LOGS/${COMBO}/${CONDITION}/DEDUP/${file(filename).getName()}" 17 | else if (filename.indexOf("metrix.txt") > 0) "MAPPED/${COMBO}/${CONDITION}/${file(filename).getName()}" 18 | else null 19 | } 20 | 21 | input: 22 | path todedup 23 | path bami 24 | 25 | output: 26 | path "*_dedup.bam", emit: bam 27 | path "*_dedup.bam.bai", emit: bai 28 | path "*_dedup.log", emit: logs 29 | path "*_dedup_metrix.txt", emit: metrics 30 | 31 | script: 32 | bams = todedup[0] 33 | bais = todedup[1] 34 | outf = bams.getSimpleName()+"_dedup.bam" 35 | outl = bams.getSimpleName()+"_dedup.log" 36 | outm = bams.getSimpleName()+"_dedup_metrix.txt" 37 | """ 38 | mkdir -p TMP && $DEDUPBIN $JAVAPARAMS MarkDuplicates --REMOVE_DUPLICATES true --ASSUME_SORT_ORDER coordinate --TMP_DIR TMP --INPUT $bams --OUTPUT $outf --METRICS_FILE $outm $DEDUPPARAMS &> $outl && samtools index $outf &>> $outl 39 | """ 40 | } 41 | 42 | workflow DEDUPBAM{ 43 | take: 44 | map 45 | mapi 46 | mapu 47 | mapui 48 | 49 | main: 50 | //dedup_bam(collection) 51 | dedup_bam(map.concat(mapu), mapi.concat(mapui)) 52 | 53 | emit: 54 | dedup = dedup_bam.out.bam 55 | dedupbai = dedup_bam.out.bai 56 | deduplog = dedup_bam.out.logs 57 | } 58 | 59 | 60 | -------------------------------------------------------------------------------- /tests/test_Utils.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from MONSDA.Utils import NestedDefaultDict, rmempty, comment_remover, dict_inst, get_from_dict, yield_from_dict 3 | 4 | class TestUtils(unittest.TestCase): 5 | 6 | def test_NestedDefaultDict(self): 7 | # Test initialization and basic functionality 8 | nested_dict = NestedDefaultDict(lambda: NestedDefaultDict(int)) 9 | nested_dict['a']['b'] = 1 10 | self.assertEqual(nested_dict['a']['b'], 1) 11 | # Test default factory 12 | self.assertEqual(nested_dict['c']['d'], 0) 13 | 14 | def test_rmempty(self): 15 | # Assuming rmempty function removes empty directories 16 | # This test might need to create temporary directories and files to fully test rmempty functionality 17 | pass 18 | 19 | def test_comment_remover(self): 20 | # Assuming comment_remover function removes comments from a list of strings 21 | input_text = ["code line 1", "# this is a comment", "code line 2 # inline comment"] 22 | expected_output = ["code line 1", "code line 2 "] 23 | self.assertEqual(comment_remover(input_text), expected_output) 24 | 25 | def test_dict_inst(self): 26 | # Assuming dict_inst function checks if an instance is a dictionary 27 | self.assertTrue(dict_inst({'key': 'value'})) 28 | self.assertFalse(dict_inst(['not', 'a', 'dict'])) 29 | 30 | def test_get_from_dict(self): 31 | # Assuming get_from_dict function retrieves a value from a nested dictionary using a list of keys 32 | data_dict = {'a': {'b': {'c': 'd'}}} 33 | map_list = ['a', 'b', 'c'] 34 | self.assertEqual(get_from_dict(data_dict, map_list), 'd') 35 | 36 | def test_yield_from_dict(self): 37 | # Assuming yield_from_dict function yields items from a dictionary that match a given key 38 | data_dict = {'a': 1, 'b': 2, 'c': {'a': 3, 'b': 4}} 39 | key = 'a' 40 | expected_output = [1, 3] 41 | self.assertEqual(list(yield_from_dict(key, data_dict)), expected_output) 42 | 43 | if __name__ == '__main__': 44 | unittest.main() -------------------------------------------------------------------------------- /workflows/wip/pycoqc.smk: -------------------------------------------------------------------------------- 1 | rule pycoqc_raw: 2 | input: "FASTQ/{rawfile}.fastq.gz" 3 | output: report("QC/{rawfile}_pycoqc.zip", category="QC") 4 | log: "LOGS/{rawfile}/pycoqc_raw.log" 5 | conda: "../envs/qc.yaml" 6 | threads: 20 7 | params: dir=lambda w: expand("QC/{source}",source=source_from_sample(w.file)) 8 | shell: "for i in {input}; do OUT=$(dirname {output});pycoqc --quiet -o $OUT -t {threads} --noextract -f fastq {input} 2> {log};done" 9 | 10 | rule pycoqc_trimmed: 11 | input: "TRIMMED_FASTQ/{rawfile}_trimmed.fastq.gz", 12 | "QC/{rawfile}_pycoqc.zip" 13 | output: report("QC/{rawfile}_trimmed_pycoqc.zip", category="QC") 14 | log: "LOGS/{rawfile}/pycoqc_trimmed.log" 15 | conda: "../envs/qc.yaml" 16 | threads: 20 17 | params: dir=lambda w: expand("QC/{source}",source=source_from_sample(w.file)) 18 | shell: "for i in {input[0]}; do OUT=$(dirname {output});pycoqc --quiet -o $OUT -t {threads} --noextract -f fastq {input[0]} 2> {log};done" 19 | 20 | rule pyqc_mapped: 21 | input: "SORTED_MAPPED/{file}_mapped_sorted.sam.gz" 22 | output: report("QC/{file}_mapped_sorted_pycoqc.zip", category="QC") 23 | log: "LOGS/{file}/pycoqc_mapped.log" 24 | params: dir=lambda w: expand("QC/{source}",source=source_from_sample(w.file)) 25 | conda: "../envs/qc.yaml" 26 | threads: 20 27 | shell: "for i in {input}; do OUT=$(dirname {output});pycoqc --quiet -o $OUT -t {threads} --noextract -f sam_mapped {input} 2> {log};done" 28 | 29 | rule pyqc_uniquemapped: 30 | input: "UNIQUE_MAPPED/{file}_mapped_sorted_unique.bam", 31 | "UNIQUE_MAPPED/{file}_mapped_sorted_unique.bam.bai" 32 | output: report("QC/{file}_mapped_sorted_unique_pycoqc.zip", category="QC") 33 | log: "LOGS/{file}/pycoqc_uniquemapped.log" 34 | conda: "../envs/qc.yaml" 35 | threads: 20 36 | params: dir=lambda w: expand("QC/{source}",source=source_from_sample(w.file)) 37 | # params: dir=expand("QC/{source}",source=SOURCE) 38 | shell: "for i in {input[0]}; do OUT=$(dirname {output});pycoqc --quiet -o $OUT -t {threads} --noextract -f bam {input[0]} 2> {log};done" 39 | -------------------------------------------------------------------------------- /profile_snakemake/slurm-submit.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Snakemake SLURM submit script. 4 | """ 5 | import warnings # use warnings.warn() rather than print() to output info in this script 6 | 7 | from snakemake.utils import read_job_properties 8 | 9 | import slurm_utils 10 | 11 | # cookiecutter arguments 12 | SBATCH_DEFAULTS = """ """ 13 | CLUSTER_CONFIG = "cluster_config.yaml" 14 | ADVANCED_ARGUMENT_CONVERSION = {"yes": True, "no": False}[ 15 | "no" 16 | ] 17 | 18 | RESOURCE_MAPPING = { 19 | "time": ("time", "runtime", "walltime"), 20 | "mem": ("mem", "mem_mb", "ram", "memory"), 21 | "mem-per-cpu": ("mem-per-cpu", "mem_per_cpu", "mem_per_thread"), 22 | "nodes": ("nodes", "nnodes"), 23 | } 24 | 25 | # parse job 26 | jobscript = slurm_utils.parse_jobscript() 27 | job_properties = read_job_properties(jobscript) 28 | 29 | sbatch_options = {} 30 | cluster_config = slurm_utils.load_cluster_config(CLUSTER_CONFIG) 31 | 32 | # 1) sbatch default arguments 33 | sbatch_options.update(slurm_utils.parse_sbatch_defaults(SBATCH_DEFAULTS)) 34 | 35 | # 2) cluster_config defaults 36 | sbatch_options.update(cluster_config["__default__"]) 37 | 38 | # 3) Convert resources (no unit conversion!) and threads 39 | sbatch_options.update( 40 | slurm_utils.convert_job_properties(job_properties, RESOURCE_MAPPING) 41 | ) 42 | 43 | # 4) cluster_config for particular rule 44 | sbatch_options.update(cluster_config.get(job_properties.get("rule"), {})) 45 | 46 | # 5) cluster_config options 47 | sbatch_options.update(job_properties.get("cluster", {})) 48 | 49 | # 6) Advanced conversion of parameters 50 | if ADVANCED_ARGUMENT_CONVERSION: 51 | sbatch_options = slurm_utils.advanced_argument_conversion(sbatch_options) 52 | 53 | # 7) Format pattern in snakemake style 54 | sbatch_options = slurm_utils.format_values(sbatch_options, job_properties) 55 | 56 | # ensure sbatch output dirs exist 57 | for o in ("output", "error"): 58 | slurm_utils.ensure_dirs_exist(sbatch_options[o]) if o in sbatch_options else None 59 | 60 | # submit job and echo id back to Snakemake (must be the only stdout) 61 | print(slurm_utils.submit_job(jobscript, **sbatch_options)) 62 | -------------------------------------------------------------------------------- /profile_snakemake/slurm-status.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import re 3 | import subprocess as sp 4 | import shlex 5 | import sys 6 | import time 7 | import logging 8 | 9 | logger = logging.getLogger("__name__") 10 | 11 | STATUS_ATTEMPTS = 20 12 | 13 | jobid = sys.argv[1] 14 | 15 | # noqa: E999,E225 16 | cluster = "--cluster=cluster" 17 | # noqa: E225 18 | 19 | for i in range(STATUS_ATTEMPTS): 20 | try: 21 | sacct_res = sp.check_output(shlex.split(f"sacct {cluster} -P -b -j {jobid} -n")) 22 | res = { 23 | x.split("|")[0]: x.split("|")[1] 24 | for x in sacct_res.decode().strip().split("\n") 25 | } 26 | break 27 | except sp.CalledProcessError as e: 28 | logger.error("sacct process error") 29 | logger.error(e) 30 | except IndexError as e: 31 | pass 32 | # Try getting job with scontrol instead in case sacct is misconfigured 33 | try: 34 | sctrl_res = sp.check_output( 35 | shlex.split(f"scontrol {cluster} -o show job {jobid}") 36 | ) 37 | m = re.search(r"JobState=(\w+)", sctrl_res.decode()) 38 | res = {jobid: m.group(1)} 39 | break 40 | except sp.CalledProcessError as e: 41 | logger.error("scontrol process error") 42 | logger.error(e) 43 | if i >= STATUS_ATTEMPTS - 1: 44 | print("failed") 45 | exit(0) 46 | else: 47 | time.sleep(1) 48 | 49 | status = res[jobid] 50 | 51 | if status == "BOOT_FAIL": 52 | print("failed") 53 | elif status == "OUT_OF_MEMORY": 54 | print("failed") 55 | elif status.startswith("CANCELLED"): 56 | print("failed") 57 | elif status == "COMPLETED": 58 | print("success") 59 | elif status == "DEADLINE": 60 | print("failed") 61 | elif status == "FAILED": 62 | print("failed") 63 | elif status == "NODE_FAIL": 64 | print("failed") 65 | elif status == "PREEMPTED": 66 | print("failed") 67 | elif status == "TIMEOUT": 68 | print("failed") 69 | # Unclear whether SUSPENDED should be treated as running or failed 70 | elif status == "SUSPENDED": 71 | print("failed") 72 | else: 73 | print("running") 74 | -------------------------------------------------------------------------------- /workflows/trimgalore.nf: -------------------------------------------------------------------------------- 1 | TRIMENV=get_always('TRIMMINGENV') 2 | TRIMBIN=get_always('TRIMMINGBIN') 3 | 4 | TRIMPARAMS = get_always('trimgalore_params_TRIM') ?: '' 5 | //int cores = min(THREADS,4) 6 | //TRIMMING PROCESSES 7 | 8 | process trim{ 9 | conda "$TRIMENV"+".yaml" 10 | cpus 4//cores 11 | //validExitStatus 0,1 12 | 13 | publishDir "${workflow.workDir}/../" , mode: 'link', 14 | saveAs: {filename -> 15 | if (filename.indexOf("_trimmed.fastq.gz") > 0) "TRIMMED_FASTQ/${COMBO}/${CONDITION}/${file(filename).getSimpleName().replaceAll(/_val_\d{1}|_trimmed|_dedup/,"")}_trimmed.fastq.gz" 16 | else if (filename.indexOf("report.txt") >0) "TRIMMED_FASTQ/${COMBO}/${CONDITION}/${file(filename).getSimpleName().replaceAll(/.fastq.gz/,"")}_trimming_report.txt" 17 | else if (filename.indexOf(".log") >0) "LOGS/${COMBO}/${CONDITION}/TRIMMING/${file(filename).getSimpleName()}.log" 18 | else null 19 | } 20 | 21 | input: 22 | path reads 23 | 24 | output: 25 | path "*_trimmed.fastq.gz", emit: trim 26 | path "*trimming_report.txt", emit: rep 27 | 28 | script: 29 | if (PAIRED == 'paired'){ 30 | r1 = reads[0] 31 | r2 = reads[1] 32 | """ 33 | $TRIMBIN --cores ${task.cpus} --paired --gzip $TRIMPARAMS $r1 $r2 &> trim.log && rename 's/_dedup//g' *.fq.gz && rename 's/_R([1|2])_val_([1|2]).fq.gz/_R\\1_trimmed.fastq.gz/g' *.fq.gz && rename 's/.fastq.gz_trimming/_trimming/g' *.txt 34 | """ 35 | } 36 | else{ 37 | """ 38 | $TRIMBIN --cores ${task.cpus} --gzip $TRIMPARAMS $reads &> trim.log && rename 's/_dedup//g' *.fq.gz && rename 's/.fq.gz/.fastq.gz/g' *.fq.gz && rename 's/.fastq.gz_trimming/_trimming/g' *.txt 39 | """ 40 | } 41 | } 42 | 43 | workflow TRIMMING{ 44 | take: 45 | collection 46 | 47 | main: 48 | //check = collection.toList() 49 | if ( PREDEDUP == 'enabled' ){ // && !check.contains('MONSDA.log')){ 50 | trim(collection) 51 | }else { 52 | if (PAIRED == 'paired'){ 53 | trim(samples_ch.collate(2)) 54 | } else{ 55 | trim(samples_ch.collate(1)) 56 | } 57 | } 58 | 59 | emit: 60 | trimmed = trim.out.trim 61 | report = trim.out.rep 62 | } 63 | -------------------------------------------------------------------------------- /workflows/ciri2.nf: -------------------------------------------------------------------------------- 1 | CIRCENV = get_always('CIRCSENV') 2 | CIRCBIN = get_always('CIRCSBIN') 3 | CIRCREF = get_always('CIRCSREF') 4 | CIRCREFDIR = "${workflow.workDir}/../"+get_always('CIRCSREFDIR') 5 | CIRCANNO = get_always('CIRCSANNO') 6 | 7 | CIRCPARAMS = get_always('ciri2_params_CIRC') ?: '' 8 | 9 | //CIRCS PROCESSES 10 | 11 | process ciri2{ 12 | conda "$CIRCENV"+".yaml" 13 | cpus THREADS 14 | cache 'lenient' 15 | //validExitStatus 0,1 16 | 17 | publishDir "${workflow.workDir}/../" , mode: 'link', 18 | saveAs: {filename -> 19 | if (filename.indexOf("_circs") > 0) "CIRCS/${SCOMBO}/${CONDITION}/${file(filename).getSimpleName()}" 20 | else if (filename.indexOf(".log") > 0) "LOGS/${SCOMBO}/${CONDITION}/${file(filename).getSimpleName()}" 21 | } 22 | 23 | input: 24 | path fls 25 | 26 | output: 27 | path "*_circs", emit: circs 28 | path "log", emit: log 29 | 30 | script: 31 | ref = fls[0] 32 | anno = fls[1] 33 | reads = fls[2] 34 | fn = file(reads).getSimpleName() 35 | oc = fn+"_circs" 36 | ol = fn+".log" 37 | sortmem = '30%' 38 | 39 | """ 40 | set +o pipefail; export LC_ALL=C; if [[ -n \"\$(zcat ${reads} | head -c 1 | tr \'\\0\\n\' __)\" ]] ;then mkdir -p TMP && zcat ${reads}|samtools sort -n -@ ${task.cpus} -u -O sam -T TMP > ${fn}_tmp.sam && zcat ${anno} > ${fn}_tmp.gtf && zcat ${ref} > ${fn}_tmp.fa && perl $CIRCBIN -I ${fn}_tmp.sam -O ${fn}_circs -F ${fn}_tmp.fa -T ${task.cpus} -A ${fn}_tmp.gtf -G log $CIRCPARAMS &>> log; else gzip < /dev/null > ${fn}_circs; echo \"File ${reads} empty\" >> log; fi; touch CIRIerror.log && cat CIRIerror.log >> {log} && echo '' > CIRIerror.log && touch ${fn}_circs 41 | """ 42 | } 43 | 44 | workflow CIRCS{ 45 | take: collection 46 | 47 | main: 48 | 49 | MAPPEDSAMPLES = LONGSAMPLES.collect{ 50 | element -> return "${workflow.workDir}/../MAPPED/${COMBO}/"+element+"*_mapped_sorted.sam.gz" 51 | } 52 | 53 | mapsamples_ch = Channel.fromPath(MAPPEDSAMPLES.sort()) 54 | annofile = Channel.fromPath(CIRCANNO) 55 | genomefile = Channel.fromPath(CIRCREF) 56 | 57 | ciri2(genomefile.combine(annofile.combine(mapsamples_ch.collate(1)))) 58 | 59 | emit: 60 | circs = ciri2.out.circs 61 | logs = ciri2.out.log 62 | } -------------------------------------------------------------------------------- /workflows/sra.nf: -------------------------------------------------------------------------------- 1 | FETCHENV=get_always('FETCHENV') 2 | FETCHBIN=get_always('FETCHBIN') 3 | 4 | FETCHPARAMS = get_always('sra_params_PREFETCH') ?: '' 5 | DOWNPARAMS = get_always('sra_params_DOWNLOAD') ?: '' 6 | 7 | 8 | //FETCH PROCESSES 9 | 10 | process prefetch_sra{ 11 | conda "$FETCHENV"+".yaml" 12 | cpus THREADS 13 | cache 'lenient' 14 | //validExitStatus 0,1 15 | 16 | publishDir "${workflow.workDir}/../" , mode: 'link', 17 | saveAs: {filename -> 18 | if (filename.indexOf(".log") >0) "LOGS/$CONDITION/FETCH/Prefetch_SRA.log" 19 | else null 20 | } 21 | 22 | input: 23 | val reads 24 | 25 | output: 26 | path "*.sra", emit: sra 27 | 28 | script: 29 | fn = reads+".sra" 30 | """ 31 | export NCBI_SETTINGS=\"$FETCHPARAMS\" 32 | prefetch $reads -o $fn &> prefetch.log 33 | """ 34 | } 35 | 36 | process download_sra{ 37 | conda "$FETCHENV"+".yaml" 38 | cpus THREADS 39 | cache 'lenient' 40 | //validExitStatus 0,1 41 | 42 | publishDir "${workflow.workDir}/../" , mode: 'link', 43 | saveAs: {filename -> 44 | if (filename.indexOf(".fastq.gz") > 0) "FASTQ/$CONDITION/${file(filename).getSimpleName()}.fastq.gz" 45 | else if (filename.indexOf(".log") >0) "LOGS/$CONDITION/FETCH/SRA.log" 46 | else null 47 | } 48 | 49 | input: 50 | path sras 51 | 52 | output: 53 | path "*fastq.gz", emit: fq 54 | 55 | script: 56 | if (PAIRED == 'paired'){ 57 | """ 58 | export NCBI_SETTINGS=\"$FETCHPARAMS\" 59 | fasterq-dump -e ${task.cpus} $DOWNPARAMS --split-files $sras &> sra.log ; rename 's/(.sra)*_([1|2])/_R\$2/' *.fastq; for i in *.fastq;do pigz -p ${task.cpus} \$i;done 60 | """ 61 | } 62 | else{ 63 | """ 64 | export NCBI_SETTINGS=\"$FETCHPARAMS\" 65 | fasterq-dump -e ${task.cpus} $DOWNPARAMS $sras &> sra.log ; rename 's/(.sra)*_([1|2])/_R\$2/' *.fastq ; for i in *.fastq;do pigz -p ${task.cpus} \$i;done 66 | """ 67 | } 68 | } 69 | 70 | workflow FETCH{ 71 | take: collection 72 | 73 | main: 74 | //SAMPLE CHANNELS 75 | samples_ch = Channel.of(SHORTSAMPLES) 76 | 77 | prefetch_sra(samples_ch) 78 | download_sra(prefetch_sra.out.sra) 79 | 80 | emit: 81 | fetched = download_sra.out.fq 82 | } 83 | -------------------------------------------------------------------------------- /workflows/manipulate_genome.smk: -------------------------------------------------------------------------------- 1 | rule UnzipGenome: 2 | input: ref = REFERENCE, 3 | output: fa = expand("{ref}.fa", ref=REFERENCE.replace('.fa.gz', '')), 4 | fai = expand("{ref}.fa.fai", ref=REFERENCE.replace('.fa.gz', '')), 5 | fas = expand("{ref}.chrom.sizes", ref=REFERENCE.replace('.fa.gz', '')) 6 | log: expand("LOGS/{combo}/indexfa.log", combo=combo) 7 | conda: "samtools.yaml" 8 | threads: 1 9 | params: bins = BINS 10 | shell: "set +o pipefail; zcat {input[0]} |perl -F\\\\040 -wane 'if($_ =~ /^>/){{chomp($F[0]);print \"\\n\".$F[0].\"\\n\"}} else{{($line=$_)=~s/\\r[\\n]*/\\n/gm; chomp($line=$_); print $line}}' |tail -n+2 > {output.fa} && {params.bins}/Preprocessing/indexfa.sh {output.fa} 2> {log} && cut -f1,2 {output.fai} > {output.fas}" 11 | #shell: "set +o pipefail; zcat {input[0]} |perl -F\\\\040 -wane 'if($_ =~ /^>/){{$F[0] = $F[0] =~ /^>chr/ ? $F[0] : \">chr\".substr($F[0],1);chomp($F[0]);print \"\\n\".$F[0].\"\\n\"}} else{{($line=$_)=~s/\\r[\\n]*/\\n/gm; chomp($line=$_); print $line}}' |tail -n+2 > {output.fa} && {params.bins}/Preprocessing/indexfa.sh {output.fa} 2> {log} && cut -f1,2 {output.fai} > {output.fas}" 12 | 13 | rule UnzipGenome_no_us: 14 | input: ref = REFERENCE, 15 | output: fa = expand("{ref}_us.fa", ref=REFERENCE.replace('.fa.gz', '')), 16 | fai = expand("{ref}_us.fa.fai", ref=REFERENCE.replace('.fa.gz', '')), 17 | fas = expand("{ref}_us.chrom.sizes", ref=REFERENCE.replace('.fa.gz', '')) 18 | log: expand("LOGS/{combo}/indexfa_us.log", combo=combo) 19 | conda: "samtools.yaml" 20 | threads: 1 21 | params: bins = BINS 22 | shell: "set +o pipefail; zcat {input[0]} |perl -F\\\\040 -wane 'if($_ =~ /^>/){{$F[0] = $F[0] =~ /^>chr/ ? $F[0] : \">chr\".substr($F[0],1))=~ s/\_/\./g;chomp($F[0]);print \"\\n\".$F[0].\"\\n\"}} else{{($line=$_)=~s/\\r[\\n]*/\\n/gm; chomp($line=$_); print $line}}' |tail -n+2 > {output.fa} && {params.bins}/Preprocessing/indexfa.sh {output.fa} 2> {log} && cut -f1,2 {output.fai} > {output.fas}" 23 | #shell: "set +o pipefail; zcat {input[0]} |perl -F\\\\040 -wane 'if($_ =~ /^>/){{$F[0] = $F[0] =~ /^>chr/ ? $F[0] : \">chr\".substr($F[0],1))=~ s/\_/\./g;chomp($F[0]);print \"\\n\".$F[0].\"\\n\"}} else{{($line=$_)=~s/\\r[\\n]*/\\n/gm; chomp($line=$_); print $line}}' |tail -n+2 > {output.fa} && {params.bins}/Preprocessing/indexfa.sh {output.fa} 2> {log} && cut -f1,2 {output.fai} > {output.fas}" 24 | -------------------------------------------------------------------------------- /workflows/fastqc_dedup.nf: -------------------------------------------------------------------------------- 1 | QCENV=get_always('QCENV') 2 | QCBIN=get_always('QCBIN') 3 | QCPARAMS = get_always('fastqc_params_QC') ?: '' 4 | 5 | // RAW QC 6 | process qc_raw{ 7 | conda "$QCENV"+".yaml" 8 | cpus THREADS 9 | cache 'lenient' 10 | //validExitStatus 0,1 11 | 12 | publishDir "${workflow.workDir}/../" , mode: 'link', 13 | saveAs: {filename -> 14 | if (filename.indexOf("zip") > 0) "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip" 15 | else if (filename.indexOf("html") > 0) "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html" 16 | else null 17 | } 18 | 19 | input: 20 | path read 21 | 22 | output: 23 | path "*.{zip,html}", emit: fastqc_results 24 | 25 | script: 26 | """ 27 | fastqc --quiet -t ${task.cpus} $QCPARAMS --noextract -f fastq $read 28 | """ 29 | } 30 | 31 | workflow QC_RAW{ 32 | take: collection 33 | 34 | main: 35 | //SAMPLE CHANNELS 36 | if (PAIRED == 'paired'){ 37 | SAMPLES = SAMPLES.collect{ 38 | element -> return "${workflow.workDir}/../FASTQ/"+element+"_{R2,R1}.*fastq.gz" 39 | } 40 | }else{ 41 | SAMPLES=SAMPLES.collect{ 42 | element -> return "${workflow.workDir}/../FASTQ/"+element+".*fastq.gz" 43 | } 44 | } 45 | 46 | samples_ch = Channel.fromPath(SAMPLES.sort()) 47 | 48 | qc_raw(samples_ch.collect()) 49 | 50 | emit: 51 | qc = qc_raw.out.fastqc_results 52 | } 53 | 54 | 55 | // DEDUP QC 56 | 57 | process qc_dedup{ 58 | conda "$QCENV"+".yaml" 59 | cpus THREADS 60 | cache 'lenient' 61 | //validExitStatus 0,1 62 | 63 | publishDir "${workflow.workDir}/../" , mode: 'link', 64 | saveAs: {filename -> 65 | if (filename.indexOf("zip") > 0) "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip" 66 | else if (filename.indexOf("html") > 0) "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html" 67 | else null 68 | } 69 | 70 | input: 71 | path read 72 | 73 | output: 74 | path "*.{zip,html}", emit: fastqc_results 75 | 76 | script: 77 | """ 78 | fastqc --quiet -t ${task.cpus} $QCPARAMS --noextract -f fastq $read 79 | """ 80 | } 81 | 82 | workflow QC_DEDUP{ 83 | take: collection 84 | 85 | main: 86 | 87 | qc_dedup(collection.collect()) 88 | 89 | emit: 90 | qc = qc_dedup.out.fastqc_results 91 | } 92 | -------------------------------------------------------------------------------- /workflows/fastqc_trim.nf: -------------------------------------------------------------------------------- 1 | QCENV=get_always('QCENV') 2 | QCBIN=get_always('QCBIN') 3 | QCPARAMS = get_always('fastqc_params_QC') ?: '' 4 | 5 | // RAW QC 6 | process qc_raw{ 7 | conda "$QCENV"+".yaml" 8 | cpus THREADS 9 | cache 'lenient' 10 | //validExitStatus 0,1 11 | 12 | publishDir "${workflow.workDir}/../" , mode: 'link', 13 | saveAs: {filename -> 14 | if (filename.indexOf("zip") > 0) "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip" 15 | else if (filename.indexOf("html") > 0) "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html" 16 | else null 17 | } 18 | 19 | input: 20 | path read 21 | 22 | output: 23 | path "*.{zip,html}", emit: fastqc_results 24 | 25 | script: 26 | """ 27 | fastqc --quiet -t ${task.cpus} $QCPARAMS --noextract -f fastq $read 28 | """ 29 | } 30 | 31 | workflow QC_RAW{ 32 | take: collection 33 | 34 | main: 35 | //SAMPLE CHANNELS 36 | if (PAIRED == 'paired'){ 37 | SAMPLES = SAMPLES.collect{ 38 | element -> return "${workflow.workDir}/../FASTQ/"+element+"_{R2,R1}.*fastq.gz" 39 | } 40 | }else{ 41 | SAMPLES=SAMPLES.collect{ 42 | element -> return "${workflow.workDir}/../FASTQ/"+element+".*fastq.gz" 43 | } 44 | } 45 | 46 | samples_ch = Channel.fromPath(SAMPLES.sort()) 47 | 48 | qc_raw(samples_ch.collect()) 49 | 50 | emit: 51 | qc = qc_raw.out.fastqc_results 52 | } 53 | 54 | // TRIMMED QC 55 | 56 | process qc_trimmed{ 57 | conda "$QCENV"+".yaml" 58 | cpus THREADS 59 | cache 'lenient' 60 | //validExitStatus 0,1 61 | 62 | publishDir "${workflow.workDir}/../" , mode: 'link', 63 | saveAs: {filename -> 64 | if (filename.indexOf("zip") > 0) "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip" 65 | else if (filename.indexOf("html") > 0) "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html" 66 | else null 67 | } 68 | 69 | input: 70 | //val collect 71 | path read 72 | 73 | output: 74 | path "*.{zip,html}", emit: fastqc_results 75 | 76 | script: 77 | """ 78 | fastqc --quiet -t ${task.cpus} $QCPARAMS --noextract -f fastq $read 79 | """ 80 | } 81 | 82 | workflow QC_TRIMMING{ 83 | take: collection 84 | 85 | main: 86 | 87 | qc_trimmed(collection.collect()) 88 | 89 | emit: 90 | qc = qc_trimmed.out.fastqc_results 91 | } 92 | -------------------------------------------------------------------------------- /scripts/Analysis/DAS/FeatureCounts2DIEGO.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use autodie; 5 | use Getopt::Long; 6 | use vars qw ($help $inlist $outfile $annotation); 7 | 8 | $outfile="junction_table_dexgo"; 9 | GetOptions ( 10 | "i=s" => \$inlist, 11 | "h" => \$help, 12 | "o=s" => \$outfile, 13 | "a=s" => \$annotation 14 | ); 15 | usage() if ($help || !$inlist); 16 | 17 | open(my $IN, "<", $inlist); 18 | my @files; 19 | my @names; 20 | my $count=0; 21 | 22 | while(<$IN>) { 23 | chomp(my $line = $_); 24 | next if ($line =~ /^#/); 25 | my @F = split("\t",$line); 26 | $names[$count]=$F[0] if $F[0]; 27 | $files[$count]=$F[1] if $F[1]; 28 | $count++; 29 | } 30 | 31 | die "$#names not $#files!" if ($#names != $#files or $#names <1 ) ; 32 | 33 | #my $filelist=join(" ",@files); 34 | my $headerlist=join("\t",@names); 35 | close($IN); 36 | 37 | #my %annotation; 38 | #if($annotation) { 39 | # open($IN, "<",$annotation); 40 | #} 41 | 42 | open(my $OUT, ">>", $outfile); 43 | print $OUT "junction\ttype\t$headerlist\tgeneID\tgeneName\n"; 44 | 45 | my $oldid=""; 46 | my $zaehl=0; 47 | $count=0; 48 | 49 | my @countfiles = @files;#split(" ",$filelist); 50 | 51 | while (my $f = shift(@countfiles)){ 52 | open($IN,'<',$f); 53 | while(<$IN>) { 54 | next if ($_ =~ /^#/); 55 | my @F=split; 56 | $F[0]=~/(\S+):\d+$/; 57 | my $geneid=$1; 58 | if($geneid ne $oldid) { 59 | $count++; 60 | } 61 | $oldid=$geneid; 62 | $zaehl+=100; 63 | my $z2=$zaehl+50; 64 | print $OUT "chrfoo:$zaehl"."-$z2\tN_w"; 65 | for (my $i=1; $i<=$#F; $i+=2) { 66 | print $OUT "\t$F[$i]"; 67 | } 68 | print $OUT "\t$geneid\tbar$count\n"; 69 | } 70 | close($OUT); 71 | } 72 | 73 | printf STDERR "You now should hav a file $outfile to play with\nThank you for travelling with us, Good bye!\n"; 74 | 75 | 76 | sub usage { 77 | print STDERR "\nHTseq2DIEGO.pl\n"; 78 | print STDERR "usage: HTseq2DIEGO.pl -i [OPTIONS]\n"; 79 | print STDERR "\n"; 80 | print STDERR "[INPUT]\n"; 81 | print STDERR " -i file containing input files and ids\n \t\tid [tab] path.to/file\n"; 82 | print STDERR " -o output file name (default:junction_table_dexdas )\n"; 83 | print STDERR " -h this (usefull) help message\n"; 84 | print STDERR "[VERSION]\n"; 85 | print STDERR " 06-25-2012\n"; 86 | print STDERR "[BUGS]\n"; 87 | print STDERR " Please report bugs to salzamt\@bioinf.uni-leipzig.de\n"; 88 | print STDERR "\n"; 89 | exit(-1); 90 | } 91 | -------------------------------------------------------------------------------- /workflows/cutadapt.nf: -------------------------------------------------------------------------------- 1 | TRIMENV=get_always('TRIMMINGENV') 2 | TRIMBIN=get_always('TRIMMINGBIN') 3 | 4 | TRIMPARAMS = get_always('cutadapt_params_TRIM') ?: '' 5 | //int cores = min(THREADS,4) 6 | //TRIMMING PROCESSES 7 | 8 | process trim{ 9 | conda "$TRIMENV"+".yaml" 10 | cpus 4//cores 11 | //validExitStatus 0,1 12 | 13 | publishDir "${workflow.workDir}/../" , mode: 'link', 14 | saveAs: {filename -> 15 | if (filename.indexOf("_trimmed.fastq.gz") > 0) "TRIMMED_FASTQ/${COMBO}/${CONDITION}/${file(filename).getSimpleName().replaceAll(/_val_\d{1}|_trimmed|_dedup/,"")}_trimmed.fastq.gz" 16 | else if (filename.indexOf("report.txt") >0) "TRIMMED_FASTQ/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}" 17 | else if (filename.indexOf(".log") >0) "LOGS/${COMBO}/${CONDITION}/TRIMMING/${file(filename).getSimpleName()}.log" 18 | else null 19 | } 20 | 21 | input: 22 | path reads 23 | 24 | output: 25 | path "*_trimmed.fastq.gz", emit: trim 26 | path "*trimming_report.txt", emit: rep 27 | 28 | script: 29 | if (PAIRED == 'paired'){ 30 | r1 = reads[0] 31 | r2 = reads[1] 32 | o = file(r1).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimmed.fastq.gz" 33 | p = file(r2).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimmed.fastq.gz" 34 | r = file(r1).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimming_report.txt" 35 | """ 36 | $TRIMBIN --cores ${task.cpus} $TRIMPARAMS -o $o -p $p $r1 $r2 &> $r 37 | """ 38 | } 39 | else{ 40 | o = file(reads).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimmed.fastq.gz" 41 | r = file(reads).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimming_report.txt" 42 | """ 43 | $TRIMBIN --cores ${task.cpus} $TRIMPARAMS -o $o $reads &> $r 44 | """ 45 | } 46 | } 47 | 48 | workflow TRIMMING{ 49 | take: 50 | collection 51 | 52 | main: 53 | //check = collection.toList() 54 | if ( PREDEDUP == 'enabled' ){ // && !check.contains('MONSDA.log')){ 55 | trim(collection) 56 | }else { 57 | if (PAIRED == 'paired'){ 58 | trim(samples_ch.collate(2)) 59 | } else{ 60 | trim(samples_ch.collate(1)) 61 | } 62 | } 63 | 64 | emit: 65 | trimmed = trim.out.trim 66 | report = trim.out.rep 67 | } 68 | -------------------------------------------------------------------------------- /MONSDA/lib/Collection.groovy: -------------------------------------------------------------------------------- 1 | //import groovy.json.JsonSlurper 2 | 3 | def nfcoreHeader() { 4 | // Log colors ANSI codes 5 | c_black = params.monochrome_logs ? '' : "\033[0;30m"; 6 | c_blue = params.monochrome_logs ? '' : "\033[0;34m"; 7 | c_cyan = params.monochrome_logs ? '' : "\033[0;36m"; 8 | c_dim = params.monochrome_logs ? '' : "\033[2m"; 9 | c_green = params.monochrome_logs ? '' : "\033[0;32m"; 10 | c_purple = params.monochrome_logs ? '' : "\033[0;35m"; 11 | c_reset = params.monochrome_logs ? '' : "\033[0m"; 12 | c_white = params.monochrome_logs ? '' : "\033[0;37m"; 13 | c_yellow = params.monochrome_logs ? '' : "\033[0;33m"; 14 | 15 | return """ -${c_dim}--------------------------------------------------${c_reset}- 16 | ${c_green},--.${c_black}/${c_green},-.${c_reset} 17 | ${c_blue} ___ __ __ __ ___ ${c_green}/,-._.--~\'${c_reset} 18 | ${c_blue} |\\ | |__ __ / ` / \\ |__) |__ ${c_yellow}} {${c_reset} 19 | ${c_blue} | \\| | \\__, \\__/ | \\ |___ ${c_green}\\`-._,-`-,${c_reset} 20 | ${c_green}`._,._,\'${c_reset} 21 | ${c_purple} nf-core/mapping v${workflow.manifest.version}${c_reset} 22 | -${c_dim}--------------------------------------------------${c_reset}- 23 | """.stripIndent() 24 | } 25 | 26 | def checkHostname() { 27 | def c_reset = params.monochrome_logs ? '' : "\033[0m" 28 | def c_white = params.monochrome_logs ? '' : "\033[0;37m" 29 | def c_red = params.monochrome_logs ? '' : "\033[1;91m" 30 | def c_yellow_bold = params.monochrome_logs ? '' : "\033[1;93m" 31 | if (params.hostnames) { 32 | def hostname = "hostname".execute().text.trim() 33 | params.hostnames.each { prof, hnames -> 34 | hnames.each { hname -> 35 | if (hostname.contains(hname) && !workflow.profile.contains(prof)) { 36 | log.error "====================================================\n" + 37 | " ${c_red}WARNING!${c_reset} You are running with `-profile $workflow.profile`\n" + 38 | " but your machine hostname is ${c_white}'$hostname'${c_reset}\n" + 39 | " ${c_yellow_bold}It's highly recommended that you use `-profile $prof${c_reset}`\n" + 40 | "============================================================" 41 | } 42 | } 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /workflows/bbduk.nf: -------------------------------------------------------------------------------- 1 | TRIMENV=get_always('TRIMMINGENV') 2 | TRIMBIN=get_always('TRIMMINGBIN') 3 | 4 | TRIMPARAMS = get_always('bbduk_params_TRIM') ?: '' 5 | //int cores = min(THREADS,4) 6 | //TRIMMING PROCESSES 7 | 8 | process trim{ 9 | conda "$TRIMENV"+".yaml" 10 | cpus 4//cores 11 | //validExitStatus 0,1 12 | 13 | publishDir "${workflow.workDir}/../" , mode: 'link', 14 | saveAs: {filename -> 15 | if (filename.indexOf("_trimmed.fastq.gz") > 0) "TRIMMED_FASTQ/${COMBO}/${CONDITION}/${file(filename).getSimpleName().replaceAll(/_val_\d{1}|_trimmed|_dedup/,"")}_trimmed.fastq.gz" 16 | else if (filename.indexOf("report.txt") >0) "TRIMMED_FASTQ/${COMBO}/${CONDITION}/${file(filename).getSimpleName().replaceAll(/.fastq.gz/,"")}_trimming_report.txt" 17 | else if (filename.indexOf(".log") >0) "LOGS/${COMBO}/${CONDITION}/TRIMMING/${file(filename).getSimpleName()}.log" 18 | else null 19 | } 20 | 21 | input: 22 | path reads 23 | 24 | output: 25 | path "*_trimmed.fastq.gz", emit: trim 26 | path "*trimming_report.txt", emit: rep 27 | 28 | script: 29 | if (PAIRED == 'paired'){ 30 | r1 = reads[0] 31 | r2 = reads[1] 32 | o = file(r1).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimmed.fastq.gz" 33 | p = file(r2).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimmed.fastq.gz" 34 | r = file(r1).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimming_report.txt" 35 | """ 36 | $TRIMBIN $TRIMPARAMS t=${task.cpus} in1=$r1 in2=$r2 out1=$o out2=$p &> $r 37 | """ 38 | } 39 | else{ 40 | o = file(reads).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimmed.fastq.gz" 41 | r = file(reads).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimming_report.txt" 42 | """ 43 | $TRIMBIN $TRIMPARAMS t=${task.cpus} in=$r1 out=$o &> $r 44 | """ 45 | } 46 | } 47 | 48 | workflow TRIMMING{ 49 | take: 50 | collection 51 | 52 | main: 53 | //check = collection.toList() 54 | if ( PREDEDUP == 'enabled' ){ // && !check.contains('MONSDA.log')){ 55 | trim(collection) 56 | }else { 57 | if (PAIRED == 'paired'){ 58 | trim(samples_ch.collate(2)) 59 | } else{ 60 | trim(samples_ch.collate(1)) 61 | } 62 | } 63 | 64 | emit: 65 | trimmed = trim.out.trim 66 | report = trim.out.rep 67 | } 68 | -------------------------------------------------------------------------------- /workflows/fastp.nf: -------------------------------------------------------------------------------- 1 | TRIMENV=get_always('TRIMMINGENV') 2 | TRIMBIN=get_always('TRIMMINGBIN') 3 | 4 | TRIMPARAMS = get_always('fastp_params_TRIM') ?: '' 5 | //int cores = min(THREADS,4) 6 | //TRIMMING PROCESSES 7 | 8 | process trim{ 9 | conda "$TRIMENV"+".yaml" 10 | cpus 4//cores 11 | //validExitStatus 0,1 12 | 13 | publishDir "${workflow.workDir}/../" , mode: 'link', 14 | saveAs: {filename -> 15 | if (filename.indexOf("_trimmed.fastq.gz") > 0) "TRIMMED_FASTQ/${COMBO}/${CONDITION}/${file(filename).getSimpleName().replaceAll(/_val_\d{1}|_trimmed|_dedup/,"")}_trimmed.fastq.gz" 16 | else if (filename.indexOf("report.txt") >0) "TRIMMED_FASTQ/${COMBO}/${CONDITION}/${file(filename).getSimpleName().replaceAll(/.fastq.gz/,"")}_trimming_report.txt" 17 | else if (filename.indexOf(".log") >0) "LOGS/${COMBO}/${CONDITION}/TRIMMING/${file(filename).getSimpleName()}.log" 18 | else null 19 | } 20 | 21 | input: 22 | path reads 23 | 24 | output: 25 | path "*_trimmed.fastq.gz", emit: trim 26 | path "*trimming_report.txt", emit: rep 27 | 28 | script: 29 | if (PAIRED == 'paired'){ 30 | r1 = reads[0] 31 | r2 = reads[1] 32 | o = file(r1).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimmed.fastq.gz" 33 | p = file(r2).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimmed.fastq.gz" 34 | r = file(r1).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimming_report.txt" 35 | """ 36 | $TRIMBIN $TRIMPARAMS --thread ${task.cpus} --in1 $r1 --in2 $r2 --out1 $o --out2 $p &> $r 37 | """ 38 | } 39 | else{ 40 | o = file(reads).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimmed.fastq.gz" 41 | r = file(reads).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimming_report.txt" 42 | """ 43 | $TRIMBIN $TRIMPARAMS --threads ${task.cpus} --i $r1 --o $o &> $r 44 | """ 45 | } 46 | } 47 | 48 | workflow TRIMMING{ 49 | take: 50 | collection 51 | 52 | main: 53 | //check = collection.toList() 54 | if ( PREDEDUP == 'enabled' ){ // && !check.contains('MONSDA.log')){ 55 | trim(collection) 56 | }else { 57 | if (PAIRED == 'paired'){ 58 | trim(samples_ch.collate(2)) 59 | } else{ 60 | trim(samples_ch.collate(1)) 61 | } 62 | } 63 | 64 | emit: 65 | trimmed = trim.out.trim 66 | report = trim.out.rep 67 | } 68 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | MONSDA 2 | ====== 3 | 4 | .. toctree:: 5 | :hidden: 6 | :maxdepth: 2 7 | :caption: GETTING STARTED 8 | 9 | source/installation 10 | source/first 11 | 12 | .. toctree:: 13 | :hidden: 14 | :maxdepth: 2 15 | :caption: PREPARING YOUR PROJECT 16 | 17 | source/preparation 18 | source/configurator 19 | 20 | .. toctree:: 21 | :hidden: 22 | :maxdepth: 2 23 | :caption: EXECUTING MONSDA 24 | 25 | source/runsmk 26 | source/cluster 27 | 28 | 29 | .. toctree:: 30 | :hidden: 31 | :maxdepth: 2 32 | :caption: TUTORIAL 33 | 34 | source/tutorial 35 | 36 | 37 | .. toctree:: 38 | :hidden: 39 | :maxdepth: 2 40 | :caption: WORKFLOW AND TOOL OVERVIEW 41 | 42 | source/workflows 43 | 44 | 45 | .. toctree:: 46 | :hidden: 47 | :maxdepth: 2 48 | :caption: DETAILS 49 | 50 | source/wrapper 51 | source/conditiontree 52 | source/config 53 | 54 | .. toctree:: 55 | :hidden: 56 | :maxdepth: 2 57 | :caption: CONTRIBUTE 58 | 59 | source/integrate 60 | source/contribute 61 | 62 | 63 | Welcome to **MONSDA**, Modular Organizer of Nextflow and Snakemake driven hts Data Analysis 64 | 65 | Automizing HTS analysis from data download, preprocessing and mapping to postprocessing/analysis and track generation centered on a single config file. **MONSDA** can create **Snakemake** and **Nextflow** workflows centered on a user friendly, sharable **Json** config file and reproducible subworkflows. These workflows can either be saved to disk for manual inspection and execution or automatically executed. 66 | 67 | For details on **Snakemake** and **Nextflow** and their features please refer to the corresponding Snakemake_ or Nextflow_ documentation. 68 | 69 | .. _Snakemake: https://Snakemake.readthedocs.io/en/stable/tutorial/tutorial.html 70 | .. _Nextflow: https://www.Nextflow.io/docs/latest/index.html 71 | 72 | In general it is necessary to write a configuration file containing workflows to execute, information on paths, files to process and settings beyond default for mapping tools and others. 73 | The template on which **MONSDA** is based on can be found in the **config** directory. 74 | 75 | For **MONSDA** to be as FAIR as possible, one needs to use **conda** or the alternative **mamba**. For details on either please refer to the corresponding conda_ or mamba_ manual. 76 | 77 | .. _conda: https://docs.conda.io/en/latest/ 78 | .. _mamba: https://mamba.readthedocs.io/en/latest/ 79 | 80 | This workflow organizer makes heavy use of **conda** and especially the bioconda_ channel. 81 | 82 | .. _bioconda: https://bioconda.github.io 83 | -------------------------------------------------------------------------------- /scripts/Analysis/PreprocessPeaks.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | #### use things ### 4 | use strict; 5 | use warnings; 6 | use autodie; 7 | use PerlIO::gzip; 8 | use Getopt::Long qw( :config posix_default bundling no_ignore_case ); 9 | use Cwd; 10 | ### use own modules 11 | use FindBin::Real qw(Bin); # locate this script 12 | use lib Bin() . "/../lib"; 13 | use Collection; 14 | 15 | my ( $dir, $odir, $peakfile, $filter); 16 | my $VERBOSE=0; 17 | pod2usage(-verbose => 0) 18 | unless GetOptions( 19 | "dir|d=s" => \$dir, 20 | "odir|o=s" => \$odir, 21 | "peak|p=s" => \$peakfile, 22 | "filter|f=s" => \$filter, 23 | "help|h" => sub{pod2usage(-verbose => 1)}, 24 | "man|m" => sub{pod2usage(-verbose => 2)}, 25 | "verbose" => sub{ $VERBOSE++ } 26 | ); 27 | 28 | #my $pwd = cwd(); 29 | $dir = cwd() unless ($dir); 30 | $odir =~ s/$odir/\Q$odir\E/g if($odir); 31 | $odir = "$dir"."\/Subpeaks" unless ($odir); 32 | $filter = qr(\Q$filter\E) if($filter); 33 | 34 | my $pid = $$; 35 | (my $job = `cat /proc/$pid/cmdline`)=~ s/\0/ /g; 36 | print STDERR $job,"\n"; 37 | 38 | chdir ("$dir"); 39 | 40 | #Read beds; 41 | push my @beds,split(',',$peakfile); 42 | 43 | print STDERR "Processing Bed File and annotating profile\n"; 44 | 45 | #parse bedfile 46 | my ($unique,$chl) = Collection::parse_bedgraph(\@beds);#, $unique, $chl); 47 | 48 | print STDERR "Printing bed with profile\n"; 49 | 50 | foreach my $pk (keys %{$unique}){ 51 | my @tempuni = split(/\_/,$pk); 52 | push @tempuni , split(/\_/,$unique->{$pk}); 53 | (my $chromosome = $tempuni[0])=~ s/=/\_/g; 54 | $chromosome =~ s/(:)+/_/g; 55 | my $start = $tempuni[1]; 56 | my $end = $tempuni[2]; 57 | my $strand = $tempuni[3]; 58 | my $name = $tempuni[4]; 59 | my $score = $tempuni[5]; 60 | my $summit = $tempuni[6]; 61 | my $rest = $tempuni[7]; 62 | 63 | if ($filter){ 64 | next if $name !~ /$filter/; 65 | } 66 | 67 | $strand = '.' if $strand eq 'u'; 68 | 69 | my $profile; 70 | for ($start..$end-1){ 71 | $profile->{$_}=$score; 72 | } 73 | my $area; 74 | my @tmp; 75 | for my $loci (sort{$a <=> $b} keys %{$profile}){ 76 | # push @tmp, join(':',$loci,$profile->{$loci}); 77 | $summit = $profile->{$loci} if ( $summit < $profile->{$loci} ); 78 | $area+=$profile->{$loci}; 79 | } 80 | if ($rest eq 'undef'){ 81 | $rest = $area; 82 | } 83 | else{ 84 | $rest=join('\t',$area,$rest); 85 | } 86 | # my $peakprofile = join("|",@tmp); 87 | my $peakprofile = ($end-$start).':'.$score; #Changing to more sparse peak profile 88 | print STDOUT "$chromosome\t$start\t$end\t$peakprofile\t$summit\t$strand\t$rest\n"; 89 | } 90 | -------------------------------------------------------------------------------- /workflows/manipulate_genome.nf: -------------------------------------------------------------------------------- 1 | process UnzipGenome{ 2 | conda "samtools.yaml" 3 | cpus 1 4 | cache 'lenient' 5 | //validExitStatus 0,1 6 | 7 | publishDir "${workflow.workDir}/../" , mode: 'link', 8 | saveAs: {filename -> 9 | if (filename.indexOf(".fa.fai") > 0) "${REFDIR}/${file(filename).getName()}" 10 | else if (filename.indexOf(".fa") > 0) "${REFDIR}/${file(filename).getName()}" 11 | else if (filename.indexOf(".chrom.sizes") > 0) "${REFDIR}/${file(filename).getName()}" 12 | else if (filename == "log") "LOGS/${SCOMBO}/${COMBO}_indexfa.log" 13 | } 14 | 15 | input: 16 | path ref 17 | 18 | output: 19 | path "*.fa", emit: unzipped 20 | path "*.fa.fai", emit: index 21 | path "*.chrom.sizes", emit: chromsize 22 | path "log", emit: log 23 | 24 | script: 25 | fa = ref.getSimpleName()+".fa" 26 | fai = ref.getSimpleName()+".fa.fai" 27 | cs = ref.getSimpleName()+".chrom.sizes" 28 | 29 | """ 30 | zcat $ref |perl -F'\\t' -wane 'if(\$_ =~ /^>/){{chomp(\$F[0]);print \"\\n\".\$F[0].\"\\n\"}} else{{(\$line=\$_)=~s/\\r[\\n]*/\\n/gm; chomp(\$line=\$_); print \$line}}' |tail -n+2 > $fa && $BINS/Preprocessing/indexfa.sh $fa 2> log && cut -f1,2 $fai > $cs 31 | """ 32 | } 33 | 34 | 35 | process UnzipGenome_no_us{ 36 | conda "samtools.yaml" 37 | cpus 1 38 | cache 'lenient' 39 | //validExitStatus 0,1 40 | 41 | publishDir "${workflow.workDir}/../" , mode: 'link', 42 | saveAs: {filename -> 43 | if (filename.indexOf("_us.fa.fai") > 0) "${REFDIR}/${file(filename).getName()}" 44 | else if (filename.indexOf("_us.fa") > 0) "${REFDIR}/${file(filename).getName()}" 45 | else if (filename.indexOf("_us.chrom.sizes") > 0) "${REFDIR}/${file(filename).getName()}" 46 | else if (filename == "log") "LOGS/${SCOMBO}/${COMBO}_indexfa_us.log" 47 | } 48 | 49 | input: 50 | path ref 51 | 52 | output: 53 | path "*.fa", emit: unzipped 54 | path "*.fa.fai", emit: index 55 | path "*.chrom.sizes", emit: chromsize 56 | path "log", emit: log 57 | 58 | script: 59 | fa = ref.getSimpleName()+"_us.fa" 60 | fai = ref.getSimpleName()+"_us.fa.fai" 61 | cs = ref.getSimpleName()+"_us.chrom.sizes" 62 | 63 | """ 64 | zcat $ref |perl -F'\\t' -wane 'if(\$_ =~ /^>/){{\$F[0] = \$F[0] =~ /^>chr/ ? \$F[0] : \">chr\".substr(\$F[0],1) =~ s/_/./g;chomp(\$F[0]);print \"\\n\".\$F[0].\"\\n\"}} else{{(\$line=\$_)=~s/\\r[\\n]*/\\n/gm; chomp(\$line=\$_); print \$line}}' |tail -n+2 > $fa && $BINS/Preprocessing/indexfa.sh $fa 2> log && cut -f1,2 $fai > $cs 65 | """ 66 | } -------------------------------------------------------------------------------- /scripts/Universal/countCCA.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use strict; 3 | use warnings; 4 | use PerlIO::gzip; 5 | 6 | my $in = shift; #bam file 7 | my $in2 = shift; #cluster sequences 8 | 9 | 10 | open BAM, "samtools view $in |"; 11 | open SEQ, "<:gzip(autopop)", "$in2" or die "can t open $in2\n"; 12 | 13 | 14 | my %hash =(); 15 | my $c1 =0; 16 | my $c2 =0; 17 | my $c3 =0; 18 | my $c4 =0; 19 | my $c5 = 0; 20 | 21 | 22 | while(){ 23 | chomp $_; 24 | 25 | if($_ =~m /^>(.*)/){ 26 | my $id = $1; 27 | my $seq = ; 28 | chomp $seq; 29 | $hash{$id} = $seq; 30 | } 31 | } 32 | 33 | my $all; 34 | while(){ 35 | $all++; 36 | chomp $_; 37 | 38 | my @line = split(/\t/,$_); 39 | 40 | my $cluster = $line[2]; #Reference sequence NAME 41 | my $start = $line[3]; #1-based leftmost mapping POSition 42 | my $seq = $line[9]; #segment SEQuence (read) 43 | my $seqL = length($seq); 44 | 45 | my $cigar = $line[5]; 46 | my @array = split(/(M|I|D|N|S|H|P|X)/,$cigar); 47 | 48 | #sum number of deletions or sub number of insertions from the read length 49 | my $del = 0; 50 | my $ins = 0; 51 | for(my $i=0; $i < scalar @array ; $i++){ 52 | if($array[$i] eq "D"){ 53 | $del += $array[$i-1]; 54 | } 55 | if($array[$i] eq "I"){ 56 | $ins += $array[$i-1]; 57 | } 58 | } 59 | 60 | $seqL += $del; 61 | $seqL -= $ins; 62 | 63 | my $len = length($hash{$cluster}); #length of the cluster seq with CCACCA 64 | my $tailCCA = substr $seq, -3; 65 | my $tailCC = substr $seq, -2; 66 | my $tailC = substr $seq, -1; 67 | my $tailCCACCA = substr $seq, -6; 68 | 69 | ### only reads with CCA and mapping position at 3' end -3 (CCA) position 70 | if(($start -1 + $seqL) == ($len - 3) && $tailCCA eq "CCA"){ 71 | $c1++; 72 | } 73 | ### only reads with CCACCA and mapping position at 3' end position 74 | elsif(($start -1 + $seqL) == ($len) && $tailCCACCA eq "CCACCA"){ 75 | $c2++; 76 | } 77 | ### only reads with no CCA and mapping position at 3' end -6 (CCACCA) position 78 | elsif(($start -1 + $seqL) == ($len - 6)){ 79 | $c3++; 80 | } 81 | ### only reads with C and mapping position at 3' end -5 (CACCA) position 82 | elsif(($start -1 + $seqL) == ($len - 5) && $tailC eq "C"){ 83 | $c4++; 84 | } 85 | ### only reads with CC and mapping position at 3' end -5 (ACCA) position 86 | elsif(($start -1 + $seqL) == ($len - 4) && $tailCC eq "CC"){ 87 | $c5++; 88 | } 89 | } 90 | 91 | 92 | print join ("\t", "all", "CCACCA", "CCA", "CC", "C", "no"); 93 | print "\n"; 94 | print join ("\t", $all, $c2, $c1, $c5, $c4, $c3); 95 | print "\n"; 96 | 97 | -------------------------------------------------------------------------------- /workflows/bbduk.smk: -------------------------------------------------------------------------------- 1 | TRIMBIN, TRIMENV = env_bin_from_config(config,'TRIMMING') 2 | 3 | if paired == 'paired': 4 | rule bbduk_trim: 5 | input: r1 = lambda wildcards: "FASTQ/{rawfile}_R1.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_R1_dedup.fastq.gz", 6 | r2 = lambda wildcards: "FASTQ/{rawfile}_R2.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_R2_dedup.fastq.gz" 7 | output: o1 = "TRIMMED_FASTQ/{combo}/{file}_R1_val_1.fq.gz" if not prededup else "TRIMMED_FASTQ/{combo}/{file}_R1_dedup_val_1.fq.gz", 8 | o2 = "TRIMMED_FASTQ/{combo}/{file}_R2_val_2.fq.gz" if not prededup else "TRIMMED_FASTQ/{combo}/{file}_R2_dedup_val_2.fq.gz" 9 | log: "LOGS/{combo}/{file}_trim.log" 10 | conda: ""+TRIMENV+".yaml" 11 | threads: MAXTHREAD 12 | params: odir = lambda wildcards, output:os.path.dirname(output.o1), 13 | tpara = lambda wildcards: tool_params(wildcards.file, None, config, "TRIMMING", TRIMENV).get('TRIM', ""), 14 | trim=TRIMBIN 15 | shell: "{params.trim} t={threads} in1={input.r1} in2={input.r2} out1={output.o1} out2={output.o2} {params.tpara}" 16 | 17 | rule bbduk_rename: 18 | input: o1 = rules.bbduk_trim.output.o1, 19 | o2 = rules.bbduk_trim.output.o2 20 | output: r1 = "TRIMMED_FASTQ/{combo}/{file}_R1_trimmed.fastq.gz", 21 | r2 = "TRIMMED_FASTQ/{combo}/{file}_R2_trimmed.fastq.gz" 22 | conda: ""+TRIMENV+".yaml" 23 | threads: 1 24 | shell: "mv {input.o1} {output.r1} && mv {input.o2} {output.r2}" 25 | else: 26 | rule bbduk_trim: 27 | input: r1 = lambda wildcards: "FASTQ/{rawfile}.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_dedup.fastq.gz" 28 | output: o1 = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fq.gz" if not prededup else "TRIMMED_FASTQ/{combo}/{file}_dedup_trimmed.fq.gz" 29 | log: "LOGS/{combo}/{file}_trim.log" 30 | conda: ""+TRIMENV+".yaml" 31 | threads: MAXTHREAD 32 | params: odir = lambda wildcards, output: os.path.dirname(output.o1), 33 | tpara = lambda wildcards: tool_params(wildcards.file, None, config, "TRIMMING", TRIMENV).get('TRIM',""), 34 | trim = TRIMBIN, 35 | shell: "{params.trim} t={threads} in={input.r1} out={output.o1} {params.tpara}" 36 | 37 | rule bbduk_rename: 38 | input: o1 = rules.bbduk_trim.output.o1 39 | output: r1 = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fastq.gz" 40 | conda: ""+TRIMENV+".yaml" 41 | threads: 1 42 | shell: "mv {input.o1} {output.r1}" 43 | -------------------------------------------------------------------------------- /workflows/mapping.smk: -------------------------------------------------------------------------------- 1 | rule sortsam: 2 | input: mapps = rules.mapping.output.mapped 3 | output: sortedsam = report("MAPPED/{combo}/{file}_mapped_sorted.sam.gz", category="SORTING"), 4 | tmphead = temp("MAPPED/{combo}/{file}_mapped_header.gz"), 5 | tmpfile = temp("TMP/{combo}/{file}") 6 | log: "LOGS/{combo}/{file}/sortsam.log" 7 | conda: "samtools.yaml" 8 | threads: MAXTHREAD 9 | priority: 100 10 | params: linkto = lambda wildcards, output: os.path.basename(output.sortedsam), 11 | sortmem = lambda wildcards, threads: int(30/MAXTHREAD*threads) 12 | shell: "set +o pipefail;samtools view -H {input.mapps}|grep -P '^@HD' |pigz -p {threads} -f > {output.tmphead} ; samtools view -H {input.mapps}|grep -P '^@SQ'|sort -t$'\t' -k1,1 -k2,2V |pigz -p {threads} -f >> {output.tmphead} ; samtools view -H {input.mapps}|grep -P '^@RG'|pigz -p {threads} -f >> {output.tmphead} ; samtools view -H {input.mapps}|grep -P '^@PG'|pigz -p {threads} -f >> {output.tmphead} ; export LC_ALL=C;samtools view -h {input.mapps} | grep -v \"^@\"|sort --parallel={threads} -S {params.sortmem}% -T TMP -t$'\t' -k3,3V -k4,4n - |pigz -p {threads} -f > {output.tmpfile} ; cat {output.tmphead} {output.tmpfile} > {output.sortedsam} 2> {log}"# && rm -f {input.mapps} && touch {input.mapps}" 13 | 14 | rule sam2bam: 15 | input: sortedsam = rules.sortsam.output.sortedsam 16 | output: bam = report("MAPPED/{combo}/{file}_mapped_sorted.bam", category="2BAM"), 17 | bamindex = "MAPPED/{combo}/{file}_mapped_sorted.bam.bai" 18 | log: "LOGS/{combo}/{file}/sam2bam.log" 19 | conda: "samtools.yaml" 20 | threads: MAXTHREAD 21 | params: bins = BINS 22 | shell: "zcat {input.sortedsam} | samtools view -bS - > {output.bam} && samtools index {output.bam} 2> {log}" 23 | 24 | rule uniqsam: 25 | input: sortedsam = rules.sortsam.output.sortedsam, 26 | bam = rules.sam2bam.output 27 | output: uniqsam = report("MAPPED/{combo}/{file}_mapped_sorted_unique.sam.gz", category="UNIQUE") 28 | log: "LOGS/{combo}/{file}/uniqsam.log" 29 | conda: "base.yaml" 30 | threads: MAXTHREAD 31 | params: bins=BINS 32 | shell: "{params.bins}/Shells/UniqueSam_woPicard.sh {input.sortedsam} {output.uniqsam} {threads} 2> {log}" 33 | 34 | rule sam2bamuniq: 35 | input: uniqsam = rules.uniqsam.output, 36 | bam = rules.sam2bam.output 37 | output: uniqbam = report("MAPPED/{combo}/{file}_mapped_sorted_unique.bam", category="2BAM"), 38 | uniqbamindex = "MAPPED/{combo}/{file}_mapped_sorted_unique.bam.bai" 39 | log: "LOGS/{combo}/{file}/sam2bamuniq.log" 40 | conda: "samtools.yaml" 41 | threads: MAXTHREAD 42 | priority: 50 43 | params: bins = BINS 44 | shell: "zcat {input.uniqsam} | samtools view -bS - > {output.uniqbam} && samtools index {output.uniqbam} 2> {log}" 45 | -------------------------------------------------------------------------------- /workflows/fastp.smk: -------------------------------------------------------------------------------- 1 | TRIMBIN, TRIMENV = env_bin_from_config(config,'TRIMMING') 2 | 3 | if paired == 'paired': 4 | rule fastp_trim: 5 | input: r1 = lambda wildcards: "FASTQ/{rawfile}_R1.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_R1_dedup.fastq.gz", 6 | r2 = lambda wildcards: "FASTQ/{rawfile}_R2.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_R2_dedup.fastq.gz" 7 | output: o1 = "TRIMMED_FASTQ/{combo}/{file}_R1_val_1.fq.gz" if not prededup else "TRIMMED_FASTQ/{combo}/{file}_R1_dedup_val_1.fq.gz", 8 | o2 = "TRIMMED_FASTQ/{combo}/{file}_R2_val_2.fq.gz" if not prededup else "TRIMMED_FASTQ/{combo}/{file}_R2_dedup_val_2.fq.gz" 9 | log: "LOGS/{combo}/{file}_trim.log" 10 | conda: ""+TRIMENV+".yaml" 11 | threads: MAXTHREAD 12 | params: odir = lambda wildcards, output:os.path.dirname(output.o1), 13 | tpara = lambda wildcards: tool_params(wildcards.file, None, config, "TRIMMING", TRIMENV).get('TRIM', ""), 14 | trim=TRIMBIN 15 | shell: "{params.trim} --thread {threads} --in1 {input.r1} --in2 {input.r2} --out1 {output.o1} --out2 {output.o2} {params.tpara}" 16 | 17 | rule fastp_rename: 18 | input: o1 = rules.fastp_trim.output.o1, 19 | o2 = rules.fastp_trim.output.o2 20 | output: r1 = "TRIMMED_FASTQ/{combo}/{file}_R1_trimmed.fastq.gz", 21 | r2 = "TRIMMED_FASTQ/{combo}/{file}_R2_trimmed.fastq.gz" 22 | conda: ""+TRIMENV+".yaml" 23 | threads: 1 24 | shell: "mv {input.o1} {output.r1} && mv {input.o2} {output.r2}" 25 | else: 26 | rule fastp_trim: 27 | input: r1 = lambda wildcards: "FASTQ/{rawfile}.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_dedup.fastq.gz" 28 | output: o1 = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fq.gz" if not prededup else "TRIMMED_FASTQ/{combo}/{file}_dedup_trimmed.fq.gz" 29 | log: "LOGS/{combo}/{file}_trim.log" 30 | conda: ""+TRIMENV+".yaml" 31 | threads: MAXTHREAD 32 | params: odir = lambda wildcards, output: os.path.dirname(output.o1), 33 | tpara = lambda wildcards: tool_params(wildcards.file, None, config, "TRIMMING", TRIMENV).get('TRIM',""), 34 | trim = TRIMBIN, 35 | shell: "{params.trim} --thread {threads} -i {input.r1} -o {output.o1} {params.tpara}" 36 | 37 | rule fastp_rename: 38 | input: o1 = rules.fastp_trim.output.o1 39 | output: r1 = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fastq.gz" 40 | conda: ""+TRIMENV+".yaml" 41 | threads: 1 42 | shell: "mv {input.o1} {output.r1}" 43 | -------------------------------------------------------------------------------- /scripts/Analysis/GOA.R: -------------------------------------------------------------------------------- 1 | #source("http://bioconductor.org/biocLite.R") 2 | #biocLite("topGO") 3 | #if (!requireNamespace("BiocManager", quietly=TRUE)) 4 | # install.packages("BiocManager") 5 | #BiocManager::install("topGO") 6 | 7 | suppressPackageStartupMessages({ 8 | require(topGO) 9 | require(Rgraphviz) 10 | }) 11 | 12 | #define notin 13 | `%notin%` = Negate(`%in%`) 14 | 15 | args <- commandArgs(TRUE) 16 | background <- args[1] 17 | test <- args[2] 18 | GOs <- args[3] 19 | 20 | #run GO 21 | geneID2GO <- readMappings(GOs, sep = "\t", IDsep = ",") 22 | expressedGenes <- read.table(background,sep="\t") 23 | Genes <- read.table(test,sep="\t") 24 | GenesOI <- Genes$V1 25 | geneList <- factor(as.integer(expressedGenes$V1 %in% GenesOI)) 26 | names(geneList) <- expressedGenes$V1 27 | 28 | GOdata <- new("topGOdata", 29 | ontology = "MF", 30 | allGenes = geneList, 31 | geneSel = GenesOI, 32 | annot = annFUN.gene2GO, # the new annotation function 33 | gene2GO = geneID2GO) ## the gene ID to GOs dataset 34 | 35 | test.stat <- new("classicCount", testStatistic = GOFisherTest, name = "Fisher test") 36 | resultFisher <- getSigGroups(GOdata, test.stat) 37 | pvalFis <- score(resultFisher) 38 | resultWeight <- getSigGroups(GOdata, test.stat) 39 | pvalWeight <- score(resultWeight, whichGO = names(pvalFis)) 40 | cor(pvalFis, pvalWeight) 41 | geneData(resultWeight) 42 | allRes <- GenTable(GOdata, classic = resultFisher, weight = resultWeight, orderBy = "weight", ranksOf = "classic", topNodes = 20) 43 | write.table(allRes, file = paste("TopGO_MF",test,sep="_"), append = FALSE, quote = TRUE, sep = "\t", eol = "\n", na = "NA", dec = ".", row.names = TRUE, col.names = TRUE, qmethod = "double") 44 | printGraph(GOdata, resultFisher, firstSigNodes = 10, fn.prefix = paste("TopGO_MFGraph",test,sep="_"), useInfo = "all", pdfSW = TRUE) 45 | 46 | GOdata <- new("topGOdata", 47 | ontology = "BP", 48 | allGenes = geneList, 49 | geneSel = GenesOI, 50 | annot = annFUN.gene2GO, # the new annotation function 51 | gene2GO = geneID2GO) ## the gene ID to GOs dataset 52 | 53 | test.stat <- new("classicCount", testStatistic = GOFisherTest, name = "Fisher test") 54 | resultFisher <- getSigGroups(GOdata, test.stat) 55 | pvalFis <- score(resultFisher) 56 | resultWeight <- getSigGroups(GOdata, test.stat) 57 | pvalWeight <- score(resultWeight, whichGO = names(pvalFis)) 58 | cor(pvalFis, pvalWeight) 59 | geneData(resultWeight) 60 | allRes <- GenTable(GOdata, classic = resultFisher, weight = resultWeight, orderBy = "weight", ranksOf = "classic", topNodes = 20) 61 | write.table(allRes, file = paste("TopGO_BP",test,sep="_"), append = FALSE, quote = TRUE, sep = "\t", eol = "\n", na = "NA", dec = ".", row.names = TRUE, col.names = TRUE, qmethod = "double") 62 | printGraph(GOdata, resultFisher, firstSigNodes = 10, fn.prefix = paste("TopGO_BPGraph",test,sep="_"), useInfo = "all", pdfSW = TRUE) 63 | 64 | 65 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | 16 | sys.path.insert(0, os.path.abspath(".")) 17 | sys.path.insert(0, os.path.abspath("..")) 18 | sys.path.insert(0, os.path.abspath("../MONSDA")) 19 | 20 | from MONSDA import _version 21 | 22 | __version__ = _version.get_versions()["version"] 23 | 24 | # -- Project information ----------------------------------------------------- 25 | 26 | project = "MONSDA" 27 | copyright = "2020, Joerg Fallmann" 28 | author = "Joerg Fallmann" 29 | 30 | # The full version, including alpha/beta/rc tags 31 | release = __version__ 32 | 33 | # -- General configuration --------------------------------------------------- 34 | # Master file to be generated 35 | 36 | master_doc = "index" 37 | 38 | # Add any Sphinx extension module names here, as strings. They can be 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 40 | # ones. 41 | extensions = [ 42 | "sphinx.ext.autodoc", 43 | "sphinx.ext.doctest", 44 | "sphinx.ext.mathjax", 45 | "sphinx.ext.napoleon", 46 | "sphinx.ext.graphviz", 47 | "recommonmark", 48 | "sphinx_rtd_theme", 49 | ] 50 | 51 | # Add any paths that contain templates here, relative to this directory. 52 | templates_path = ["source/_templates"] 53 | 54 | # List of patterns, relative to source directory, that match files and 55 | # directories to ignore when looking for source files. 56 | # This pattern also affects html_static_path and html_extra_path. 57 | exclude_patterns = [] 58 | 59 | 60 | # -- Options for HTML output ------------------------------------------------- 61 | 62 | # The theme to use for HTML and HTML Help pages. See the documentation for 63 | # a list of builtin themes. 64 | # 65 | # html_theme = 'classic' 66 | html_theme = "sphinx_rtd_theme" 67 | 68 | html_theme_options = { 69 | # Toc options 70 | "collapse_navigation": True, 71 | "sticky_navigation": True, 72 | "navigation_depth": 4, 73 | "includehidden": True, 74 | "titles_only": False, 75 | } 76 | 77 | # Add any paths that contain custom static files (such as style sheets) here, 78 | # relative to this directory. They are copied after the builtin static files, 79 | # so a file named "default.css" will overwrite the builtin "default.css". 80 | html_static_path = ["source/_static"] 81 | pygments_style = "sphinx" 82 | 83 | 84 | # Add custom css to prevent tables with wide side-scrolling 85 | def setup(app): 86 | app.add_css_file("css/custom.css") 87 | -------------------------------------------------------------------------------- /workflows/fastqc_raw.smk: -------------------------------------------------------------------------------- 1 | QCBIN, QCENV = env_bin_from_config(config, 'QC') 2 | 3 | if paired == 'paired': 4 | log.info('Running paired mode QC') 5 | rule qc_raw: 6 | input: r1 = "FASTQ/{rawfile}_{read}.fastq.gz" 7 | output: o1 = report("QC{combo}{rawfile}_{read}_fastqc.zip") 8 | log: "LOGS{combo}{rawfile}_fastqc_{read}_raw.log" 9 | conda: ""+QCENV+".yaml" 10 | threads: MAXTHREAD 11 | params: qpara = lambda wildcards: tool_params(SAMPLES[0], None, config, 'QC', QCENV)['OPTIONS'].get('QC', "") 12 | shell: "OUT=$(dirname {output.o1});fastqc --quiet -o $OUT -t {threads} --noextract {params.qpara} -f fastq {input.r1} 2> {log}" 13 | 14 | rule multiqc: 15 | input: expand(rules.qc_raw.output.o1, rawfile=list(SAMPLES), read=['R1','R2'], combo=combo) 16 | output: html = report("QC/Multi{combo}{condition}/multiqc_report.html", category="QC"), 17 | tmp = temp("QC/Multi{combo}{condition}/tmp"), 18 | lst = "QC/Multi{combo}{condition}/qclist_raw.txt" 19 | log: "LOGS{combo}{condition}_multiqc_raw.log" 20 | conda: ""+QCENV+".yaml" 21 | threads: 1 22 | params: qpara = lambda wildcards: tool_params(SAMPLES[0], None, config, 'QC', QCENV)['OPTIONS'].get('MULTI', "") 23 | shell: "OUT=$(dirname {output.html}); for i in {input};do echo $(dirname \"${{i}}\") >> {output.tmp};done; cat {output.tmp} |sort -u > {output.lst};export LC_ALL=en_US.utf8; export LC_ALL=C.UTF-8; multiqc -f {params.qpara} --exclude picard --exclude gatk -k json -z -s -o $OUT -l {output.lst} 2> {log}" 24 | 25 | else: 26 | rule qc_raw: 27 | input: r1 = "FASTQ/{rawfile}.fastq.gz" 28 | output: o1 = report("QC{combo}{rawfile}_fastqc.zip", category="QC") 29 | log: "LOGS{combo}{rawfile}_fastqc_raw.log" 30 | conda: ""+QCENV+".yaml" 31 | threads: MAXTHREAD 32 | params: qpara = lambda wildcards: tool_params(SAMPLES[0], None, config, 'QC', QCENV)['OPTIONS'].get('QC', "") 33 | shell: "OUT=$(dirname {output.o1});fastqc --quiet -o $OUT -t {threads} --noextract {params.qpara} -f fastq {input.r1} 2> {log}" 34 | 35 | rule multiqc: 36 | input: expand(rules.qc_raw.output.o1, rawfile=list(SAMPLES), combo=combo) 37 | output: html = report("QC/Multi{combo}{condition}/multiqc_report.html", category="QC"), 38 | tmp = temp("QC/Multi{combo}{condition}/tmp"), 39 | lst = "QC/Multi{combo}{condition}/qclist_raw.txt" 40 | log: "LOGS{combo}{condition}_multiqc_raw.log" 41 | conda: ""+QCENV+".yaml" 42 | threads: 1 43 | params: qpara = lambda wildcards: tool_params(SAMPLES[0], None, config, 'QC', QCENV)['OPTIONS'].get('MULTI', "") 44 | shell: "OUT=$(dirname {output.html}); for i in {input};do echo $(dirname \"${{i}}\") >> {output.tmp};done; cat {output.tmp} |sort -u > {output.lst};export LC_ALL=en_US.utf8; export LC_ALL=C.UTF-8; multiqc -f {params.qpara} --exclude picard --exclude gatk -k json -z -s -o $OUT -l {output.lst} 2> {log}" 45 | -------------------------------------------------------------------------------- /docs/source/wrapper.rst: -------------------------------------------------------------------------------- 1 | Wrapping Workflows 2 | ================== 3 | 4 | In general **MONSDA** is *Python3* software, that wraps workflows by assembling subworkflows or single tools from `.smk` or `.nf` templates. The idea here is that tools and subworkflows for similar tasks are designed in a way that starts from the same input and results in the same output. This is not only true for single workflow steps which can be performed by multiple tools, but also for the wrapped workflow management systems (WMS). In principal output generated in `Nextflow` mode should be suitable as input for `Snakemake` and vice versa. This means that, for example, mapping output generated in `Nextflow` mode can be used as input for *DE* analysis in `Snakemake` mode, while both work from the same **config.json**. 5 | 6 | As Snakemake is also written in *Python*, wrapping workflows is similar to the built-in way of submodule assembly, although we take care that submodules for the same task remain interchangeable. Wrapping Nextflow is slightly different, as `MONSDA` has to assemble *Groovy* text blocks, which does not make any difference to the end user, but requires to translate configuration from the config file to Nextflow parsable command lines. However, the idea of creating interchangeable subworkflows or tool specific code fragments stays the same. 7 | 8 | Independently of the wrapped WMS, workflows are split internally in three independent stages. *PREPROCESSING* includes all workflow steps that generate or manipulate FASTQ files, to make them available to the *PROCESSING* stage. This includes download of files from SRA, basecalling with Guppy and pre-quality-control, so all workflow steps that do not require identical input formats, but lead to similar output. 9 | 10 | *PROCESSING* starts from FASTQ files and includes trimming, deduplication, mapping and quality control for all subprocesses. 11 | 12 | *POSTPROCESSING* builds upon *PROCESSING* output and includes quantification, differential expression analysis on gene, transcript and exon level, generation of tracks for UCSC or other genome browsers, peak finding and circular RNA identification. In contrast to *PROCESSING* workflows, these steps do not require output to be of similar format but are able to work from the same input. 13 | 14 | In case dedicated workflows need to be established, as is the case for example for cyPhyRNA-Seq, the main idea is to split all preprocessing and processing steps in units that remain interchangeable, and deliver dedicated post-processing subworkflows which can work on their output. In the mentioned example we have quality control, trimming, mapping and deduplication embedded in standard workflows and dedicated, specific postprocessing of mapped reads wrapped in the PEAKS postprocessing step. 15 | 16 | For new workflows, we aim to split those into as small subunits as possible to make subworkflows available for other pipelines and add dedicated parts as postprocessing to currently established categories. In case new categories need to be defined, please contact us to discuss how this can be embedded. 17 | 18 | -------------------------------------------------------------------------------- /workflows/cutadapt.smk: -------------------------------------------------------------------------------- 1 | TRIMBIN, TRIMENV = env_bin_from_config(config,'TRIMMING') 2 | 3 | if paired == 'paired': 4 | rule cutadapt_trim: 5 | input: r1 = lambda wildcards: "FASTQ/{rawfile}_R1.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_R1_dedup.fastq.gz", 6 | r2 = lambda wildcards: "FASTQ/{rawfile}_R2.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_R2_dedup.fastq.gz" 7 | output: o1 = "TRIMMED_FASTQ/{combo}/{file}_R1_val_1.fq.gz" if not prededup else "TRIMMED_FASTQ/{combo}/{file}_R1_dedup_val_1.fq.gz", 8 | o2 = "TRIMMED_FASTQ/{combo}/{file}_R2_val_2.fq.gz" if not prededup else "TRIMMED_FASTQ/{combo}/{file}_R2_dedup_val_2.fq.gz" 9 | log: "LOGS/{combo}/{file}_trim.log" 10 | conda: ""+TRIMENV+".yaml" 11 | threads: min(int(MAXTHREAD/8),4) if min(int(MAXTHREAD/8),4) >= 1 else (4 if int(MAXTHREAD) >= 4 else 1) 12 | params: odir=lambda wildcards, output:os.path.dirname(output.o1), 13 | tpara = lambda wildcards: tool_params(wildcards.file, None, config, "TRIMMING", TRIMENV)['OPTIONS'].get('TRIM', ""), 14 | trim=TRIMBIN 15 | shell: "{params.trim} {params.tpara} --cores {threads} -o {output.o1} -p {output.o2} {input.r1} {input.r2} &> {log}" 16 | 17 | rule cutadapt_rename: 18 | input: o1 = rules.cutadapt_trim.output.o1, 19 | o2 = rules.cutadapt_trim.output.o2 20 | output: r1 = "TRIMMED_FASTQ/{combo}/{file}_R1_trimmed.fastq.gz", 21 | r2 = "TRIMMED_FASTQ/{combo}/{file}_R2_trimmed.fastq.gz" 22 | conda: ""+TRIMENV+".yaml" 23 | threads: 1 24 | shell: "mv {input.o1} {output.r1} && mv {input.o2} {output.r2}" 25 | 26 | else: 27 | rule cutadapt_trim: 28 | input: r1 = lambda wildcards: "FASTQ/{rawfile}.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_dedup.fastq.gz" 29 | output: o1 = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fq.gz" if not prededup else "TRIMMED_FASTQ/{combo}/{file}_dedup_trimmed.fq.gz" 30 | log: "LOGS/{combo}/{file}_trim.log" 31 | conda: ""+TRIMENV+".yaml" 32 | threads: min(int(MAXTHREAD/8),4) if min(int(MAXTHREAD/2),4) >= 1 else (4 if int(MAXTHREAD) >= 4 else 1) 33 | params: odir=lambda wildcards, output: os.path.dirname(output.o1), 34 | tpara = lambda wildcards:tool_params(wildcards.file, None, config, "TRIMMING", TRIMENV)['OPTIONS'].get('TRIM', ""), 35 | trim=TRIMBIN, 36 | shell: "{params.trim} {params.tpara} --cores {threads} -o {output.o1} {input.r1} > {log}" 37 | 38 | rule cutadapt_rename: 39 | input: o1 = rules.cutadapt_trim.output.o1 40 | output: r1 = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fastq.gz" 41 | conda: ""+TRIMENV+".yaml" 42 | threads: 1 43 | shell: "mv {input.o1} {output.r1}" 44 | -------------------------------------------------------------------------------- /workflows/minimap.smk: -------------------------------------------------------------------------------- 1 | MAPPERBIN, MAPPERENV = env_bin_from_config(config,'MAPPING') 2 | keydict = sub_dict(tool_params(SAMPLES[0], None, config, 'MAPPING', MAPPERENV)['OPTIONS'], ['INDEX']) 3 | keydict["REF"] = REFERENCE 4 | keydict["DECOY"] = DECOY 5 | keydict["ENV"] = MAPPERENV 6 | unik = get_dict_hash(keydict) 7 | 8 | rule generate_index: 9 | input: fa = REFERENCE 10 | output: idx = INDEX, 11 | uidx = expand("{refd}/INDICES/{mape}_{unikey}.idx", refd=REFDIR, mape=MAPPERENV, unikey=unik) 12 | log: expand("LOGS/{sets}/{mape}.idx.log", sets=SETS, mape=MAPPERENV) 13 | conda: ""+MAPPERENV+".yaml" 14 | threads: MAXTHREAD 15 | params: indexer=MAPPERBIN, 16 | ipara = lambda wildcards, input: tool_params(SAMPLES[0], None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('INDEX', ""), 17 | linkidx = lambda wildcards, output: str(os.path.abspath(output.uidx[0])) 18 | shell: "{params.indexer} -t {threads} -d {output.uidx} {params.ipara} {input.fa} 2> {log} && ln -s {params.linkidx} {output.idx}" 19 | 20 | if paired == 'paired': 21 | rule mapping: 22 | input: q1 = "TRIMMED_FASTQ/{combo}/{file}_R1_trimmed.fastq.gz", 23 | q2 = "TRIMMED_FASTQ/{combo}/{file}_R2_trimmed.fastq.gz", 24 | uidx = rules.generate_index.output.uidx[0] 25 | output: mapped = temp(report("MAPPED/{combo}/{file}_mapped.sam.gz", category="MAPPING")), 26 | unmapped1 = "UNMAPPED/{combo}/{file}_R1_unmapped.fastq.gz", 27 | unmapped2 = "UNMAPPED/{combo}/{file}_R2_unmapped.fastq.gz" 28 | log: "LOGS/{combo}/{file}/mapping.log" 29 | conda: ""+MAPPERENV+".yaml" 30 | threads: MAXTHREAD 31 | params: mpara = lambda wildcards: tool_params(wildcards.file, None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('MAP', ""), 32 | mapp = MAPPERBIN 33 | shell: "{params.mapp} -t {threads} {params.mpara} {input.uidx} {input.q1} {input.q2} 2> {log}| tee >(samtools view -h -F 4 |gzip > {output.mapped}) >(samtools view -h -f 4 |samtools collate -u -O -|samtools fastq -n -c 6 -1 {output.unmapped1} -2 {output.unmapped2} - ) 2>> {log} &>/dev/null && touch {output.unmapped1} {output.unmapped2}" 34 | 35 | else: 36 | rule mapping: 37 | input: query = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fastq.gz", 38 | uidx = rules.generate_index.output.uidx[0] 39 | output: mapped = temp(report("MAPPED/{combo}/{file}_mapped.sam.gz", category="MAPPING")), 40 | unmapped = "UNMAPPED/{combo}/{file}_unmapped.fastq.gz" 41 | log: "LOGS/{combo}/{file}/mapping.log" 42 | conda: ""+MAPPERENV+".yaml" 43 | threads: MAXTHREAD 44 | params: mpara = lambda wildcards: tool_params(wildcards.file, None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('MAP', ""), 45 | mapp = MAPPERBIN 46 | shell: "{params.mapp} -t {threads} {params.mpara} {input.uidx} {input.query} 2> {log}| tee >(samtools view -h -F 4 |gzip > {output.mapped}) >(samtools view -h -f 4 |samtools fastq -n - | pigz > {output.unmapped}) 2>> {log} &>/dev/null && touch {output.unmapped}" 47 | -------------------------------------------------------------------------------- /docs/source/first.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | First Steps 3 | ============ 4 | 5 | **MONSDA** acts a wrapper around **Snakemake** or **Nextflow** based on a user defined **config.json** file. This **config.json** holds all the information that is needed to run the jobs and will be parsed by **MONSDA** and split into independent sub-configs that can later be found in the directory **SubSnakes** or **SubFlows** respectively. Command line execution calls are stored in the directory *JOBS*, so users can manipulate those or rerun them manually as needed. By default, however, **MONSDA** will run those jobs automatically either locally, or through **Snakemake's** or **Nextflow's** integrated cluster interfaces. 6 | 7 | To successfully run an analysis pipeline, a few steps have to be followed: 8 | * Install MONSDA either via **bioconda** or **pip** following the instruction in :ref:`install` 9 | * Directory structure: The structure for the directories is dictated by :ref:`condition-tree` in the config file 10 | * Config file: This is the central part of a **MONSDA** run. Depending on :ref:`config-file` **MONSDA** will determine processing steps and generate corresponding config and workflow files to run each subworkflow until all processing steps are done. 11 | 12 | 13 | In general it is necessary to write a configuration file containing 14 | information on paths, files to process and settings beyond default for 15 | mapping tools and others. The template on which analysis is based on can 16 | be found in the **config** directory and will be explained in detail later. 17 | 18 | To create a working environment for this repository please install the 19 | **MONSDA.yaml** environment (if not installed via **bioconda**) as found in the **envs** directory 20 | like so: 21 | 22 | :: 23 | 24 | conda env create -n monsda -f envs/MONSDA.yaml 25 | 26 | The **envs** directory holds all the environments needed to run the pipelines in the **workflows** directory, 27 | these will be installed automatically when needed. 28 | 29 | For fast resolve of conda packages, we recommend conda-libmamba-solver_ which is a new solver for the conda package manager and speeds up conda without the need to install mamba and is shipped with **MONSDA**. However, the user if free to use mamba_ which is currently also the standard conda-frontend for Snakemake_. 30 | 31 | .. _mamba: https://mamba.readthedocs.io/en/latest/ 32 | .. _conda-libmamba-solver: https://github.com/conda-incubator/conda-libmamba-solver 33 | 34 | For distribution of jobs one can either rely on local hardware, use 35 | scheduling software like 36 | Slurm_ or the SGE_ 37 | or follow any other integration in 38 | Snakemake_ or Nextflow_ 39 | but be aware that most of these have not been tested for this 40 | repository and usually require additional system dependent setup and 41 | configuration. 42 | 43 | .. _Slurm: https://slurm.schedmd.com/documentation.html 44 | .. _SGE: https://docs.oracle.com/cd/E19957-01/820-0699/chp1-1/index.html 45 | .. _Snakemake: https://Snakemake.readthedocs.io/en/stable/executing/cluster-cloud.html 46 | .. _Nextflow: https://www.Nextflow.io/docs/latest/awscloud.html#aws-batch 47 | 48 | This manual will only show examples on local and SLURM usage. 49 | -------------------------------------------------------------------------------- /workflows/segemehl.smk: -------------------------------------------------------------------------------- 1 | MAPPERBIN, MAPPERENV = env_bin_from_config(config,'MAPPING') 2 | keydict = sub_dict(tool_params(SAMPLES[0], None, config, 'MAPPING', MAPPERENV)['OPTIONS'], ['INDEX']) 3 | keydict["REF"] = REFERENCE 4 | keydict["DECOY"] = DECOY 5 | keydict["ENV"] = MAPPERENV 6 | unik = get_dict_hash(keydict) 7 | 8 | rule generate_index: 9 | input: fa = REFERENCE 10 | output: idx = INDEX, 11 | uidx = expand("{refd}/INDICES/{mape}_{unikey}.idx", refd=REFDIR, mape=MAPPERENV, unikey=unik) 12 | log: expand("LOGS/{sets}/{mape}.idx.log", sets=SETS, mape=MAPPERENV) 13 | conda: ""+MAPPERENV+".yaml" 14 | threads: MAXTHREAD 15 | params: indexer = MAPPERBIN, 16 | ipara = lambda wildcards, input: tool_params(SAMPLES[0], None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('INDEX', ""), 17 | linkidx = lambda wildcards, output: str(os.path.abspath(output.uidx[0])) 18 | shell: "{params.indexer} --threads {threads} {params.ipara} -d {input.fa} -x {output.uidx} &> {log} && ln -fs {params.linkidx} {output.idx}" 19 | 20 | if paired == 'paired': 21 | rule mapping: 22 | input: r1 = "TRIMMED_FASTQ/{combo}/{file}_R1_trimmed.fastq.gz", 23 | r2 = "TRIMMED_FASTQ/{combo}/{file}_R2_trimmed.fastq.gz", 24 | uidx = rules.generate_index.output.uidx[0], 25 | fa = REFERENCE 26 | output: mapped = temp(report("MAPPED/{combo}/{file}_mapped.sam.gz", category="MAPPING")), 27 | unmapped1 = "UNMAPPED/{combo}/{file}_R1_unmapped.fastq.gz", 28 | unmapped2 = "UNMAPPED/{combo}/{file}_R2_unmapped.fastq.gz" 29 | log: "LOGS/{combo}/{file}/mapping.log" 30 | conda: ""+MAPPERENV+".yaml" 31 | threads: MAXTHREAD 32 | params: mpara = lambda wildcards: tool_params(wildcards.file, None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('MAP', ""), 33 | mapp=MAPPERBIN 34 | shell: "{params.mapp} {params.mpara} -d {input.fa} -i {input.uidx} -q {input.r1} -p {input.r2} --threads {threads} 2> {log}| tee >(samtools view -h -F 4 |gzip > {output.mapped}) >(samtools view -h -f 4 |samtools collate -u -O -|samtools fastq -n -c 6 -1 {output.unmapped1} -2 {output.unmapped2} - ) 2>> {log} &>/dev/null && touch {output.unmapped1} {output.unmapped2}" 35 | 36 | else: 37 | rule mapping: 38 | input: query = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fastq.gz", 39 | uidx = rules.generate_index.output.uidx[0], 40 | fa = REFERENCE 41 | output: mapped = temp(report("MAPPED/{combo}/{file}_mapped.sam.gz", category="MAPPING")), 42 | unmapped = "UNMAPPED/{combo}/{file}_unmapped.fastq.gz" 43 | log: "LOGS/{combo}/{file}/mapping.log" 44 | conda: ""+MAPPERENV+".yaml" 45 | threads: MAXTHREAD 46 | params: mpara = lambda wildcards: tool_params(wildcards.file, None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('MAP', ""), 47 | mapp=MAPPERBIN 48 | shell: "{params.mapp} {params.mpara} -d {input.fa} -i {input.uidx} -q {input.query} --threads {threads} 2> {log}| tee >(samtools view -h -F 4 |gzip > {output.mapped}) >(samtools view -h -f 4 |samtools fastq -n - | pigz > {output.unmapped}) 2>> {log} &>/dev/null && touch {output.unmapped}" 49 | -------------------------------------------------------------------------------- /docs/source/conditiontree.rst: -------------------------------------------------------------------------------- 1 | .. _condition-tree: 2 | 3 | The Condition-Tree 4 | ================== 5 | 6 | A key concept behind **MONSDA** is that config files are split according to conditions defined in the **config** file and each subworkflow is then run consecutively. This separates workflows in independent subworkflows, each potentially running on different input, with differing options and executables, without danger of interfering with each other. 7 | 8 | As described in :ref:`preparation`, you should have an idea of what to analyze and how to split up your conditions if needed. For each ID you work on, you can define no, one or multiple conditions and settings that will be used for the analysis. The condition-tree also defines the directory structure to follow for Input and Output directories. We assume a general tree to look something like 9 | 10 | .. code-block:: json 11 | 12 | 'ID' -> 'CONDITION' -> 'SETTING' 13 | 14 | Here *ID* is the first level and optional *Condition* the second. *Setting* is used by **MONSDA** to enable processing of the same samples under different settings or commandline options for e.g. mapping tools, trimming tools and later also postprocessing tools. **MONSDA** will also build an output directory based on the combination of tools used, e.g. fastqc-cutadapt-star-umitools, to indicate which combination of tools was used to generate the output and to prevent results from being mixed or overwritten. 15 | 16 | As an example, I want to analyze samples retrieved from LabA on 01012020 (yes that happens), with the mapping tools star and hisat, my condition-tree would look like this **LabA:01012020** and my FASTQ input directory would resemble that like **FASTQ/LabA/01012020**. The '01012020' directory would thereby contain all the fastq.gz files I need for analysis as stated in the corresponding config file. As we assume that settings may change but the input files will stay the same, **MONSDA** will search one level upwards of the deepest level in the condition-tree. This means, if you have a tree like: 17 | 18 | .. code-block:: json 19 | 20 | 'ID1' -> 'CONDITION1' -> 'SETTING1', 'SETTING2', 'SETTING3' 21 | 22 | You do not need to copy input from **FASTQ/LabA/01012020** to **FASTQ/LabA/01012020/SETTING1/2/3**, instead **MONSDA** will find the input in **FASTQ/LabA/01012020** and generate output directories which contain the *Setting* level. This works of course also if you want to analyze samples from different dates and same lab with same settings or different labs and so on. 23 | 24 | **MONSDA** will automatically define a unique *tool-key* based on currently enabled workflow steps and the combination of tools defined in the config file. From that information it will generate output folders like **MAPPING/LabA/01012020/star** and **MAPPING/LabA/01012020/hisat** if no other tools and workflow steps where configured to be used. 25 | 26 | Optionally a user can also run one or the other tool with different settings for example to benchmark tools. Define for example **map_stringent** and **map_relaxed** and indicate this on the *MAPPING* level in the config file. FASTQ input will still be found in **FASTQ/LabA/01012020** while output files will appear in **MAPPING/LabA/01012020/map_stringent/star** and **MAPPING/LabA/01012020/map_stringent/hisat** or **MAPPING/LabA/01012020/map_relaxed/star** and **MAPPING/LabA/01012020/map_relaxed/hisat** respectively. 27 | -------------------------------------------------------------------------------- /scripts/Analysis/AddStructure.py: -------------------------------------------------------------------------------- 1 | #/usr/bin/env python3 2 | # AddStructure.py --- 3 | # 4 | # Filename: AddStructure.py 5 | # Description: 6 | # Author: Joerg Fallmann 7 | # Maintainer: 8 | # Created: Tue Sep 10 18:00:42 2019 (+0200) 9 | # Version: 10 | # Package-Requires: () 11 | # Last-Updated: Wed Sep 11 09:10:01 2019 (+0200) 12 | # By: Joerg Fallmann 13 | # Update #: 33 14 | # URL: 15 | # Doc URL: 16 | # Keywords: 17 | # Compatibility: 18 | # 19 | # 20 | 21 | # Commentary: 22 | # 23 | # 24 | # 25 | # 26 | 27 | # Change Log: 28 | # 29 | # 30 | # 31 | # 32 | # This program is free software: you can redistribute it and/or modify 33 | # it under the terms of the GNU General Public License as published by 34 | # the Free Software Foundation, either version 3 of the License, or (at 35 | # your option) any later version. 36 | # 37 | # This program is distributed in the hope that it will be useful, but 38 | # WITHOUT ANY WARRANTY; without even the implied warranty of 39 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 40 | # General Public License for more details. 41 | # 42 | # You should have received a copy of the GNU General Public License 43 | # along with GNU Emacs. If not, see . 44 | # 45 | # 46 | 47 | # Code: 48 | ###Imports 49 | import sys,os 50 | import argparse 51 | import traceback as tb 52 | import gzip 53 | import RNA 54 | 55 | ###Arguments 56 | def parseargs(): 57 | parser = argparse.ArgumentParser(description='Add structure to (bed) file containing sequence') 58 | parser.add_argument("-f", "--field", type=int, default=0, help='Which field contains the sequence, default is last field (0)') 59 | parser.add_argument("-b", "--bed", type=str, help='Bed or other tab separated file containing the sequence') 60 | 61 | return parser.parse_args() 62 | 63 | ###CODE 64 | 65 | def addseq(field, bed): 66 | try: 67 | entries = parse_bed(bed) 68 | for line in entries: 69 | sequence = line.rstrip().split('\t')[field-1].upper() 70 | # create model details 71 | md = RNA.md() 72 | # create new fold_compound object 73 | fc = RNA.fold_compound(sequence, md) 74 | # compute minimum free energy (mfe) and corresponding structure 75 | (ss, mfe) = fc.mfe() 76 | 77 | sys.stdout.write('\t'.join([line.strip(),ss])+'\n') 78 | 79 | except Exception as err: 80 | exc_type, exc_value, exc_tb = sys.exc_info() 81 | tbe = tb.TracebackException( 82 | exc_type, exc_value, exc_tb, 83 | ) 84 | with open('error','a') as h: 85 | print(''.join(tbe.format()), file=h) 86 | 87 | def parse_bed(bed, annotated=None): 88 | try: 89 | if os.path.isfile(os.path.abspath(bed)): 90 | if '.gz' in bed: 91 | return gzip.open(bed,'rt') 92 | else: 93 | return open(bed,'rt') 94 | 95 | except Exception as err: 96 | exc_type, exc_value, exc_tb = sys.exc_info() 97 | tbe = tb.TracebackException( 98 | exc_type, exc_value, exc_tb, 99 | ) 100 | with open('error','a') as h: 101 | print(''.join(tbe.format()), file=h) 102 | 103 | 104 | ###MAIN 105 | if __name__ == '__main__': 106 | args=parseargs() 107 | addseq(args.field, args.bed) 108 | 109 | # 110 | # AddStructure.py ends here 111 | -------------------------------------------------------------------------------- /workflows/bwameth.smk: -------------------------------------------------------------------------------- 1 | MAPPERBIN, MAPPERENV = env_bin_from_config(config,'MAPPING') 2 | keydict = sub_dict(tool_params(SAMPLES[0], None, config, "MAPPING", MAPPERENV)["OPTIONS"],["INDEX"],) 3 | keydict["REF"] = REFERENCE 4 | keydict["DECOY"] = DECOY 5 | keydict["ENV"] = MAPPERENV 6 | unik = get_dict_hash(keydict) 7 | 8 | rule generate_index: 9 | input: ref = REFERENCE 10 | output: idx = directory(INDEX), 11 | uidx = expand("{refd}/INDICES/{mape}_{unikey}/{pref}", refd=REFDIR, mape=MAPPERENV, unikey=unik, pref=PREFIX+os.path.basename(REFERENCE).replace(".gz", "")) 12 | log: expand("LOGS/{sets}/{mape}.idx.log", sets=SETS, mape=MAPPERENV) 13 | conda: ""+MAPPERENV+".yaml" 14 | threads: 1 15 | params: indexer = 'bwameth.py index-mem2', 16 | ipara = lambda wildcards, input: tool_params(SAMPLES[0], None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('INDEX', ""), 17 | linkidx = lambda wildcards, output: str(os.path.abspath(str.join(os.sep, str(output.uidx[0]).split(os.sep)[:-1]))) if PREFIX != '' else str(os.path.abspath(str(output.uidx[0]))), 18 | shell: "if [[ -f \"{output.uidx}\" ]]; then ln -fs {params.linkidx} {output.idx} && touch {output.uidx} && echo \"Found bwa index, continue with mapping\";else zcat {input.ref} > {output.uidx} && {params.indexer} {output.uidx} {params.ipara} 2> {log} && ln -fs {params.linkidx} {output.idx};fi" 19 | 20 | if paired == 'paired': 21 | rule mapping: 22 | input: r1 = "TRIMMED_FASTQ/{combo}/{file}_R1_trimmed.fastq.gz", 23 | r2 = "TRIMMED_FASTQ/{combo}/{file}_R2_trimmed.fastq.gz", 24 | uidx = rules.generate_index.output.uidx[0] 25 | output: mapped = temp(report("MAPPED/{combo}/{file}_mapped.sam.gz", category="MAPPING")), 26 | unmapped1 = "UNMAPPED/{combo}/{file}_R1_unmapped.fastq.gz", 27 | unmapped2 = "UNMAPPED/{combo}/{file}_R2_unmapped.fastq.gz" 28 | log: "LOGS/{combo}/{file}/bwameth.log" 29 | conda: ""+MAPPERENV+".yaml" 30 | threads: MAXTHREAD 31 | params: mpara = lambda wildcards: tool_params(wildcards.file, None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get("MAP", ""), 32 | mapp = MAPPERBIN 33 | shell: "{params.mapp} {params.mpara} --threads {threads} --reference {input.uidx} {input.r1} {input.r2} 2> {log}| tee >(samtools view -h -F 4 |gzip > {output.mapped}) >(samtools view -h -f 4 |samtools collate -u -O -|samtools fastq -n -c 6 -1 {output.unmapped1} -2 {output.unmapped2} ) 2>> {log} &>/dev/null && touch {output.unmapped1} {output.unmapped2}" 34 | else: 35 | rule mapping: 36 | input: query = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fastq.gz", 37 | uidx = rules.generate_index.output.uidx[0], 38 | output: mapped = temp(report("MAPPED/{combo}/{file}_mapped.sam.gz", category="MAPPING")), 39 | unmapped = "UNMAPPED/{combo}/{file}_unmapped.fastq.gz" 40 | log: "LOGS/{combo}/{file}/mapping.log" 41 | conda: ""+MAPPERENV+".yaml" 42 | threads: MAXTHREAD 43 | params: mpara = lambda wildcards: tool_params(wildcards.file, None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('MAP', ""), 44 | mapp = MAPPERBIN 45 | shell: "{params.mapp} {params.mpara} --threads {threads} --reference {input.uidx} {input.query} 2> {log}| tee >(samtools view -h -F 4 |gzip > {output.mapped}) >(samtools view -h -f 4 |samtools fastq -n - | pigz > {output.unmapped}) 2>> {log} &>/dev/null && touch {output.unmapped}" -------------------------------------------------------------------------------- /workflows/fastqc_dedup_trim.nf: -------------------------------------------------------------------------------- 1 | QCENV=get_always('QCENV') 2 | QCBIN=get_always('QCBIN') 3 | QCPARAMS = get_always('fastqc_params_QC') ?: '' 4 | 5 | //QC RAW 6 | process qc_raw{ 7 | conda "$QCENV"+".yaml" 8 | cpus THREADS 9 | cache 'lenient' 10 | //validExitStatus 0,1 11 | 12 | publishDir "${workflow.workDir}/../" , mode: 'link', 13 | saveAs: {filename -> 14 | if (filename.indexOf("zip") > 0) "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip" 15 | else if (filename.indexOf("html") > 0) "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html" 16 | else null 17 | } 18 | 19 | input: 20 | path read 21 | 22 | output: 23 | path "*.{zip,html}", emit: fastqc_results 24 | 25 | script: 26 | """ 27 | fastqc --quiet -t ${task.cpus} $QCPARAMS --noextract -f fastq $read 28 | """ 29 | } 30 | 31 | workflow QC_RAW{ 32 | take: collection 33 | 34 | main: 35 | //SAMPLE CHANNELS 36 | if (PAIRED == 'paired'){ 37 | SAMPLES = SAMPLES.collect{ 38 | element -> return "${workflow.workDir}/../FASTQ/"+element+"_{R2,R1}.*fastq.gz" 39 | } 40 | }else{ 41 | SAMPLES=SAMPLES.collect{ 42 | element -> return "${workflow.workDir}/../FASTQ/"+element+".*fastq.gz" 43 | } 44 | } 45 | 46 | samples_ch = Channel.fromPath(SAMPLES.sort()) 47 | 48 | qc_raw(samples_ch.collect()) 49 | 50 | emit: 51 | qc = qc_raw.out.fastqc_results 52 | } 53 | 54 | 55 | //QC TRIM 56 | 57 | process qc_trimmed{ 58 | conda "$QCENV"+".yaml" 59 | cpus THREADS 60 | cache 'lenient' 61 | //validExitStatus 0,1 62 | 63 | publishDir "${workflow.workDir}/../" , mode: 'link', 64 | saveAs: {filename -> 65 | if (filename.indexOf("zip") > 0) "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip" 66 | else if (filename.indexOf("html") > 0) "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html" 67 | else null 68 | } 69 | 70 | input: 71 | //val collect 72 | path read 73 | 74 | output: 75 | path "*.{zip,html}", emit: fastqc_results 76 | 77 | script: 78 | """ 79 | fastqc --quiet -t ${task.cpus} $QCPARAMS --noextract -f fastq $read 80 | """ 81 | } 82 | 83 | workflow QC_TRIMMING{ 84 | take: collection 85 | 86 | main: 87 | 88 | qc_trimmed(collection.collect()) 89 | 90 | emit: 91 | qc = qc_trimmed.out.fastqc_results 92 | } 93 | 94 | // DEDUP QC 95 | 96 | process qc_dedup{ 97 | conda "$QCENV"+".yaml" 98 | cpus THREADS 99 | cache 'lenient' 100 | //validExitStatus 0,1 101 | 102 | publishDir "${workflow.workDir}/../" , mode: 'link', 103 | saveAs: {filename -> 104 | if (filename.indexOf("zip") > 0) "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip" 105 | else if (filename.indexOf("html") > 0) "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html" 106 | else null 107 | } 108 | 109 | input: 110 | path read 111 | 112 | output: 113 | path "*.{zip,html}", emit: fastqc_results 114 | 115 | script: 116 | """ 117 | fastqc --quiet -t ${task.cpus} $QCPARAMS --noextract -f fastq $read 118 | """ 119 | } 120 | 121 | workflow QC_DEDUP{ 122 | take: collection 123 | 124 | main: 125 | 126 | qc_dedup(collection.collect()) 127 | 128 | emit: 129 | qc = qc_dedup.out.fastqc_results 130 | } 131 | -------------------------------------------------------------------------------- /workflows/trimgalore.smk: -------------------------------------------------------------------------------- 1 | TRIMBIN, TRIMENV = env_bin_from_config(config,'TRIMMING') 2 | #outdir = 'TRIMMED_FASTQ' 3 | 4 | #wildcard_constraints: 5 | # file = '|'.join(list(samplecond(SAMPLES, config))), 6 | # read = "R1|R2" 7 | # outdir = outdir 8 | 9 | #rule trimthemall: 10 | # input: expand("{outdir}{file}_{read}_trimmed.fastq.gz", outdir=outdir, file=samplecond(SAMPLES, config), read=["R1","R2"]) if paired == \'paired\' else expand("{outdir}{file}_trimmed.fastq.gz", outdir=outdir, file=samplecond(SAMPLES, config)) 11 | 12 | if paired == 'paired': 13 | rule trimgalore_trim: 14 | input: r1 = lambda wildcards: "FASTQ/{rawfile}_R1.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_R1_dedup.fastq.gz", 15 | r2 = lambda wildcards: "FASTQ/{rawfile}_R2.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_R2_dedup.fastq.gz" 16 | output: o1 = "TRIMMED_FASTQ/{combo}/{file}_R1_val_1.fq.gz" if not prededup else "TRIMMED_FASTQ/{combo}/{file}_R1_dedup_val_1.fq.gz", 17 | o2 = "TRIMMED_FASTQ/{combo}/{file}_R2_val_2.fq.gz" if not prededup else "TRIMMED_FASTQ/{combo}/{file}_R2_dedup_val_2.fq.gz" 18 | log: "LOGS/{combo}/{file}_trim.log" 19 | conda: ""+TRIMENV+".yaml" 20 | threads: min(int(MAXTHREAD/2),4) if min(int(MAXTHREAD/2),4) >= 1 else (4 if int(MAXTHREAD) >= 4 else 1) 21 | params: odir=lambda wildcards, output:os.path.dirname(output.o1), 22 | tpara = lambda wildcards: tool_params(wildcards.file, None, config, "TRIMMING", TRIMENV)['OPTIONS'].get('TRIM', ""), 23 | trim=TRIMBIN 24 | shell: "{params.trim} --cores {threads} --paired --gzip {params.tpara} -o {params.odir} {input.r1} {input.r2} &> {log}" 25 | 26 | rule trimgalore_rename: 27 | input: o1 = rules.trimgalore_trim.output.o1, 28 | o2 = rules.trimgalore_trim.output.o2 29 | output: r1 = "TRIMMED_FASTQ/{combo}/{file}_R1_trimmed.fastq.gz", 30 | r2 = "TRIMMED_FASTQ/{combo}/{file}_R2_trimmed.fastq.gz" 31 | conda: ""+TRIMENV+".yaml" 32 | threads: 1 33 | shell: "mv {input.o1} {output.r1} && mv {input.o2} {output.r2}" 34 | 35 | else: 36 | rule trimgalore_trim: 37 | input: r1 = lambda wildcards: "FASTQ/{rawfile}.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_dedup.fastq.gz" 38 | output: o1 = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fq.gz" if not prededup else "TRIMMED_FASTQ/{combo}/{file}_dedup_trimmed.fq.gz" 39 | log: "LOGS/{combo}/{file}_trim.log" 40 | conda: ""+TRIMENV+".yaml" 41 | threads: min(int(MAXTHREAD/2),4) if min(int(MAXTHREAD/2),4) >= 1 else (4 if int(MAXTHREAD) >= 4 else 1) 42 | params: odir = lambda wildcards, output: os.path.dirname(output.o1), 43 | tpara = lambda wildcards: tool_params(wildcards.file, None, config, "TRIMMING", TRIMENV)['OPTIONS'].get('TRIM', ""), 44 | trim=TRIMBIN 45 | shell: "{params.trim} --cores {threads} --gzip {params.tpara} -o {params.odir} {input.r1} &> {log}" 46 | 47 | rule trimgalore_rename: 48 | input: o1 = rules.trimgalore_trim.output.o1 49 | output: r1 = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fastq.gz" 50 | conda: ""+TRIMENV+".yaml" 51 | threads: 1 52 | shell: "mv {input.o1} {output.r1}" 53 | -------------------------------------------------------------------------------- /envs/drimseq_DTU.yaml: -------------------------------------------------------------------------------- 1 | name: drimseq_DTU 2 | channels: 3 | - bioconda 4 | - conda-forge 5 | - defaults 6 | dependencies: 7 | - bioconductor-annotationdbi =1.52.0 8 | - bioconductor-biobase =2.50.0 9 | - bioconductor-biocfilecache =1.14.0 10 | - bioconductor-biocgenerics =0.36.0 11 | - bioconductor-biocparallel =1.24.0 12 | - bioconductor-biomart =2.46.0 13 | - bioconductor-biostrings =2.58.0 14 | - bioconductor-delayedarray =0.16.0 15 | - bioconductor-drimseq =1.18.0 16 | - bioconductor-edger =3.32.0 17 | - bioconductor-genomeinfodb =1.26.0 18 | - bioconductor-genomeinfodbdata =1.2.4 19 | - bioconductor-genomicalignments =1.26.0 20 | - bioconductor-genomicfeatures =1.42.0 21 | - bioconductor-genomicranges =1.42.0 22 | - bioconductor-iranges =2.24.0 23 | - bioconductor-limma =3.46.0 24 | - bioconductor-matrixgenerics =1.2.0 25 | - bioconductor-rhtslib =1.22.0 26 | - bioconductor-rsamtools =2.6.0 27 | - bioconductor-rtracklayer =1.50.0 28 | - bioconductor-s4vectors =0.28.0 29 | - bioconductor-summarizedexperiment =1.20.0 30 | - bioconductor-tximport =1.18.0 31 | - bioconductor-xvector =0.30.0 32 | - bioconductor-zlibbioc =1.36.0 33 | - r-askpass =1.1 34 | - r-assertthat =0.2.1 35 | - r-backports =1.2.1 36 | - r-base =4.0.3 37 | - r-bh =1.75.0_0 38 | - r-bit =4.0.4 39 | - r-bit64 =4.0.5 40 | - r-bitops =1.0_6 41 | - r-blob =1.2.1 42 | - r-brio =1.1.1 43 | - r-cachem =1.0.4 44 | - r-callr =3.5.1 45 | - r-cli =2.3.0 46 | - r-colorspace =2.0_0 47 | - r-crayon =1.4.1 48 | - r-curl =4.3 49 | - r-dbi =1.1.1 50 | - r-dbplyr =2.1.0 51 | - r-desc =1.2.0 52 | - r-diffobj =0.3.3 53 | - r-digest =0.6.27 54 | - r-dplyr =1.0.4 55 | - r-ellipsis =0.3.1 56 | - r-evaluate =0.14 57 | - r-fansi =0.4.2 58 | - r-farver =2.0.3 59 | - r-fastmap =1.1.0 60 | - r-formatr =1.7 61 | - r-futile.logger =1.4.3 62 | - r-futile.options =1.0.1 63 | - r-generics =0.1.0 64 | - r-ggplot2 =3.3.3 65 | - r-glue =1.4.2 66 | - r-gtable =0.3.0 67 | - r-hms =1.0.0 68 | - r-httr =1.4.2 69 | - r-isoband =0.2.3 70 | - r-jsonlite =1.7.2 71 | - r-labeling =0.4.2 72 | - r-lambda.r =1.2.4 73 | - r-lattice =0.20_41 74 | - r-lifecycle =1.0.0 75 | - r-locfit =1.5_9.4 76 | - r-magrittr =2.0.1 77 | - r-mass =7.3_53.1 78 | - r-matrix =1.3_2 79 | - r-matrixstats =0.58.0 80 | - r-memoise =2.0.0 81 | - r-mgcv =1.8_34 82 | - r-mime =0.10 83 | - r-munsell =0.5.0 84 | - r-nlme =3.1_152 85 | - r-openssl =1.4.3 86 | - r-pillar =1.4.7 87 | - r-pkgbuild =1.2.0 88 | - r-pkgconfig =2.0.3 89 | - r-pkgload =1.1.0 90 | - r-plogr =0.2.0 91 | - r-plyr =1.8.6 92 | - r-praise =1.0.0 93 | - r-prettyunits =1.1.1 94 | - r-processx =3.4.5 95 | - r-progress =1.2.2 96 | - r-ps =1.5.0 97 | - r-purrr =0.3.4 98 | - r-r6 =2.5.0 99 | - r-rappdirs =0.3.3 100 | - r-rcolorbrewer =1.1_2 101 | - r-rcpp =1.0.6 102 | - r-rcurl =1.98_1.2 103 | - r-rematch2 =2.1.2 104 | - r-reshape2 =1.4.4 105 | - r-rlang =0.4.10 106 | - r-rprojroot =2.0.2 107 | - r-rsqlite =2.2.3 108 | - r-rstudioapi =0.13 109 | - r-scales =1.1.1 110 | - r-snow =0.4_3 111 | - r-stringi =1.5.3 112 | - r-stringr =1.4.0 113 | - r-sys =3.4 114 | - r-testthat =3.0.2 115 | - r-tibble =3.0.6 116 | - r-tidyselect =1.1.0 117 | - r-utf8 =1.1.4 118 | - r-vctrs =0.3.6 119 | - r-viridislite =0.3.0 120 | - r-waldo =0.2.4 121 | - r-withr =2.4.1 122 | - r-xml =3.99_0.5 123 | - r-xml2 =1.3.2 124 | - r-zeallot =0.1.0 125 | - readline =8.2 126 | - sed =4.8 -------------------------------------------------------------------------------- /workflows/bwa2.smk: -------------------------------------------------------------------------------- 1 | MAPPERBIN, MAPPERENV = env_bin_from_config(config,'MAPPING') 2 | keydict = sub_dict(tool_params(SAMPLES[0], None, config, "MAPPING", MAPPERENV)["OPTIONS"],["INDEX"],) 3 | keydict["REF"] = REFERENCE 4 | keydict["DECOY"] = DECOY 5 | keydict["ENV"] = MAPPERENV 6 | unik = get_dict_hash(keydict) 7 | 8 | rule generate_index: 9 | input: ref = REFERENCE 10 | output: idx = directory(INDEX), 11 | uidx = expand("{refd}/INDICES/{mape}_{unikey}/{pref}", refd=REFDIR, mape=MAPPERENV, unikey=unik, pref=PREFIX) 12 | log: expand("LOGS/{sets}/{mape}.idx.log", sets=SETS, mape=MAPPERENV) 13 | conda: ""+MAPPERENV+".yaml" 14 | threads: 1 15 | params: indexer = MAPPERBIN.split(' ')[0], 16 | ipara = lambda wildcards, input: tool_params(SAMPLES[0], None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('INDEX', ""), 17 | linkidx = lambda wildcards, output: str(os.path.abspath(str.join(os.sep, str(output.uidx[0]).split(os.sep)[:-1]))) if PREFIX != '' else str(os.path.abspath(str(output.uidx[0]))), 18 | tolink = lambda wildcards, output: str(os.path.abspath(str.join(os.sep, str(output.idx).split(os.sep)[:-1]))) 19 | shell: "if [[ -f \"{output.uidx}\/*\" ]]; then ln -fs {params.linkidx} {output.idx} && touch {output.uidx} && echo \"Found bwa index, continue with mapping\" ; else {params.indexer} index -p {output.uidx} {params.ipara} {input.ref} 2> {log} && ln -fs {params.linkidx} {output.idx} && touch {output.uidx};fi" 20 | 21 | if paired == 'paired': 22 | rule mapping: 23 | input: r1 = "TRIMMED_FASTQ/{combo}/{file}_R1_trimmed.fastq.gz", 24 | r2 = "TRIMMED_FASTQ/{combo}/{file}_R2_trimmed.fastq.gz", 25 | index = rules.generate_index.output.uidx, 26 | ref = REFERENCE 27 | output: mapped = temp(report("MAPPED/{combo}/{file}_mapped.sam.gz", category="MAPPING")), 28 | unmapped1 = "UNMAPPED/{combo}/{file}_R1_unmapped.fastq.gz", 29 | unmapped2 = "UNMAPPED/{combo}/{file}_R2_unmapped.fastq.gz" 30 | log: "LOGS/{combo}/{file}/mapping.log" 31 | conda: ""+MAPPERENV+".yaml" 32 | threads: MAXTHREAD 33 | params: mpara = lambda wildcards: tool_params(wildcards.file, None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get("MAP", ""), 34 | mapp = MAPPERBIN 35 | #idx = lambda wildcards, input: str.join(os.sep,[str(input.index), PREFIX]) if PREFIX != '' else input.index 36 | shell: "{params.mapp} mem {params.mpara} -t {threads} {input.index} {input.r1} {input.r2} 2> {log}| tee >(samtools view -h -F 4 |gzip > {output.mapped}) >(samtools view -h -f 4 |samtools collate -u -O -|samtools fastq -n -c 6 -1 {output.unmapped1} -2 {output.unmapped2} ) 2>> {log} &>/dev/null && touch {output.unmapped1} {output.unmapped2}" 37 | 38 | else: 39 | rule mapping: 40 | input: query = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fastq.gz", 41 | uidx = rules.generate_index.output.uidx[0], 42 | ref = REFERENCE 43 | output: mapped = temp(report("MAPPED/{combo}/{file}_mapped.sam.gz", category="MAPPING")), 44 | unmapped = "UNMAPPED/{combo}/{file}_unmapped.fastq.gz" 45 | log: "LOGS/{combo}/{file}/mapping.log" 46 | conda: ""+MAPPERENV+".yaml" 47 | threads: MAXTHREAD 48 | params: mpara = lambda wildcards: tool_params(wildcards.file, None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('MAP', ""), 49 | mapp = MAPPERBIN 50 | #idx = lambda wildcards, input: str.join(os.sep,[str(input.index), PREFIX]) if PREFIX != '' else input.index 51 | shell: "{params.mapp} mem {params.mpara} -t {threads} {input.uidx} {input.query} 2> {log}| tee >(samtools view -h -F 4 |gzip > {output.mapped}) >(samtools view -h -f 4 |samtools fastq -n - | pigz > {output.unmapped}) 2>> {log} &>/dev/null && touch {output.unmapped}" -------------------------------------------------------------------------------- /workflows/salmon.smk: -------------------------------------------------------------------------------- 1 | COUNTBIN, COUNTENV = env_bin_from_config(config,'COUNTING') 2 | keydict = sub_dict(tool_params(SAMPLES[0], None, config, 'COUNTING', COUNTENV)['OPTIONS'], ['INDEX']) 3 | keydict["REF"] = REFERENCE 4 | keydict["DECOY"] = DECOY 5 | keydict["ENV"] = COUNTENV 6 | unik = get_dict_hash(keydict) 7 | 8 | rule themall: 9 | input: expand("COUNTS/{combo}/{file}_counts.sf.gz", combo=combo, file=samplecond(SAMPLES, config)) 10 | 11 | rule salmon_index: 12 | input: fa = REFERENCE 13 | output: idx = directory(INDEX), 14 | uidx = directory(expand("{refd}/INDICES/{mape}_{unikey}", refd=REFDIR, mape=COUNTENV, unikey=unik)) 15 | log: expand("LOGS/{sets}/{cape}.idx.log", sets=SETS, cape=COUNTENV) 16 | conda: ""+COUNTENV+".yaml" 17 | threads: MAXTHREAD 18 | params: mapp = COUNTBIN, 19 | ipara = lambda wildcards, input: tool_params(SAMPLES[0], None, config, 'COUNTING', COUNTENV)['OPTIONS'].get('INDEX', ""), 20 | decoy = f"-d {os.path.abspath(DECOY)}" if DECOY else '', 21 | linkidx = lambda wildcards, output: str(os.path.abspath(output.uidx[0])) 22 | shell: "set +euo pipefail; {params.mapp} index {params.ipara} {params.decoy} -p {threads} -t {input.fa} -i {output.uidx} &>> {log} && ln -fs {params.linkidx} {output.idx}" 23 | 24 | 25 | if paired == 'paired': 26 | rule mapping: 27 | input: r1 = expand("TRIMMED_FASTQ/{scombo}/{{file}}_R1_trimmed.fastq.gz", scombo=scombo), 28 | r2 = expand("TRIMMED_FASTQ/{scombo}/{{file}}_R2_trimmed.fastq.gz", scombo=scombo), 29 | uidx = rules.salmon_index.output.uidx[0] 30 | output: cnts = report("COUNTS/{combo}/{file}_counts.sf.gz", category="COUNTING"), 31 | ctsdir = report(directory("COUNTS/{combo}/{file}"), category="COUNTING") 32 | log: "LOGS/{combo}/{file}/salmonquant.log" 33 | conda: ""+COUNTENV+".yaml" 34 | threads: MAXTHREAD 35 | params: cpara = lambda wildcards: tool_params(wildcards.file, None, config, 'COUNTING', COUNTENV)['OPTIONS'].get('COUNT', ""), 36 | mapp=COUNTBIN, 37 | stranded = lambda x: '-l ISF' if (stranded == 'fr' or stranded == 'ISF') else '-l ISR' if (stranded == 'rf' or stranded == 'ISR') else '-l IU', 38 | linksf = lambda wildcards, output: str(os.path.abspath(output.ctsdir)) 39 | shell: "set +euo pipefail; {params.mapp} quant -p {threads} -i {input.uidx} {params.stranded} {params.cpara} -o {output.ctsdir} -1 {input.r1} -2 {input.r2} &>> {log} && gzip {output.ctsdir}/quant.sf && ln -fs {params.linksf}/quant.sf.gz {output.cnts} &>> {log}" 40 | 41 | else: 42 | rule mapping: 43 | input: r1 = expand("TRIMMED_FASTQ/{scombo}/{{file}}_trimmed.fastq.gz", scombo=scombo), 44 | uidx = rules.salmon_index.output.uidx[0] 45 | output: cnts = report("COUNTS/{combo}/{file}_counts.sf.gz", category="COUNTING"), 46 | ctsdir = report(directory("COUNTS/{combo}/{file}"), category="COUNTING") 47 | log: "LOGS/{combo}/{file}/salmonquant.log" 48 | conda: ""+COUNTENV+".yaml" 49 | threads: MAXTHREAD 50 | params: cpara = lambda wildcards: tool_params(wildcards.file, None, config, 'COUNTING', COUNTENV)['OPTIONS'].get('COUNT', ""), 51 | mapp=COUNTBIN, 52 | stranded = lambda x: '-l SF' if (stranded == 'fr' or stranded == 'SF') else '-l SR' if (stranded == 'rf' or stranded == 'SR') else '-l U', 53 | linksf = lambda wildcards, output: str(os.path.abspath(output.ctsdir)) 54 | shell: "set +euo pipefail; {params.mapp} quant -p {threads} -i {input.uidx} {params.stranded} {params.cpara} -o {output.ctsdir} -r {input.r1} &>> {log} && gzip {output.ctsdir}/quant.sf ; ln -fs {params.linksf}/quant.sf.gz {output.cnts} &>> {log}" 55 | -------------------------------------------------------------------------------- /workflows/kallisto.smk: -------------------------------------------------------------------------------- 1 | COUNTBIN, COUNTENV = env_bin_from_config(config,'COUNTING') 2 | keydict = sub_dict(tool_params(SAMPLES[0], None, config, 'COUNTING', COUNTENV)['OPTIONS'], ['INDEX']) 3 | keydict["REF"] = REFERENCE 4 | keydict["DECOY"] = DECOY 5 | keydict["ENV"] = COUNTENV 6 | unik = get_dict_hash(keydict) 7 | 8 | rule themall: 9 | input: expand("COUNTS/{combo}/{file}_counts.gz", combo=combo, file=samplecond(SAMPLES, config)) 10 | 11 | rule kallisto_index: 12 | input: fa = REFERENCE 13 | output: idx = INDEX, 14 | uidx = expand("{refd}/INDICES/{mape}_{unikey}.idx", refd=REFDIR, mape=COUNTENV, unikey=unik) 15 | log: expand("LOGS/{sets}/{cape}.idx.log", sets=SETS, cape=COUNTENV) 16 | conda: ""+COUNTENV+".yaml" 17 | threads: MAXTHREAD 18 | params: mapp = COUNTBIN, 19 | ipara = lambda wildcards, input: tool_params(SAMPLES[0], None, config, 'COUNTING', COUNTENV)['OPTIONS'].get('INDEX', ""), 20 | decoy = f"-d {os.path.abspath(DECOY)}" if DECOY else '', 21 | linkidx = lambda wildcards, output: str(os.path.abspath(output.uidx[0])) 22 | shell: "set +euo pipefail; {params.mapp} index {params.ipara} {params.decoy} -t {threads} -i {output.uidx} {input.fa} &>> {log} && ln -fs {params.linkidx} {output.idx}" 23 | 24 | 25 | if paired == 'paired': 26 | rule mapping: 27 | input: r1 = expand("TRIMMED_FASTQ/{scombo}/{{file}}_R1_trimmed.fastq.gz", scombo=scombo), 28 | r2 = expand("TRIMMED_FASTQ/{scombo}/{{file}}_R2_trimmed.fastq.gz", scombo=scombo), 29 | uidx = rules.kallisto_index.output.uidx[0] 30 | output: cnts = report("COUNTS/{combo}/{file}_counts.gz", category="COUNTING"), 31 | ctsdir = report(directory("COUNTS/{combo}/{file}"), category="COUNTING") 32 | log: "LOGS/{combo}/{file}/kallistoquant.log" 33 | conda: ""+COUNTENV+".yaml" 34 | threads: MAXTHREAD 35 | params: cpara = lambda wildcards: tool_params(wildcards.file, None, config, 'COUNTING', COUNTENV)['OPTIONS'].get('COUNT', ""), 36 | mapp=COUNTBIN, 37 | stranded = lambda x: '--fr-stranded' if (stranded == 'fr' or stranded == 'ISF') else '-rf-stranded' if (stranded == 'rf' or stranded == 'ISR') else '', 38 | linksf = lambda wildcards, output: str(os.path.abspath(output.ctsdir)) 39 | shell: "set +euo pipefail; {params.mapp} quant -t {threads} -i {input.uidx} {params.stranded} {params.cpara} -o {output.ctsdir} {input.r1} {input.r2} &>> {log} && gzip {output.ctsdir}/abundance.tsv && ln -fs {params.linksf}/abundance.tsv.gz {output.cnts} &>> {log}" 40 | 41 | else: 42 | rule mapping: 43 | input: r1 = expand("TRIMMED_FASTQ/{scombo}/{{file}}_trimmed.fastq.gz", scombo=scombo), 44 | uidx = rules.kallisto_index.output.uidx[0] 45 | output: cnts = report("COUNTS/{combo}/{file}_counts.gz", category="COUNTING"), 46 | ctsdir = report(directory("COUNTS/{combo}/{file}"), category="COUNTING") 47 | log: "LOGS/{combo}/{file}/kallistoquant.log" 48 | conda: ""+COUNTENV+".yaml" 49 | threads: MAXTHREAD 50 | params: cpara = lambda wildcards: tool_params(wildcards.file, None, config, 'COUNTING', COUNTENV)['OPTIONS'].get('COUNT', ""), 51 | mapp=COUNTBIN, 52 | stranded = lambda x: '--fr-stranded' if (stranded == 'fr' or stranded == 'ISF') else '-rf-stranded' if (stranded == 'rf' or stranded == 'ISR') else '', 53 | linksf = lambda wildcards, output: str(os.path.abspath(output.ctsdir)) 54 | shell: "set +euo pipefail; {params.mapp} quant -t {threads} -i {input.uidx} {params.stranded} {params.cpara} -o {output.ctsdir} --single {input.r1} &>> {log} && gzip {output.ctsdir}/abundance.tsv && ln -fs {params.linksf}/abundance.tsv.gz {output.cnts} &>> {log}" 55 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | from collections import defaultdict 5 | from glob import glob 6 | 7 | from setuptools import find_packages, setup 8 | 9 | import versioneer 10 | 11 | NAME = "MONSDA" 12 | DESCRIPTION = ( 13 | "MONSDA, Modular Organizer of Nextflow and Snakemake driven HTS Data Analysis" 14 | ) 15 | # Set __version__ done by versioneer 16 | # exec(open("MONSDA/__init__.py").read()) 17 | 18 | 19 | def generate_datafiles(): 20 | df = list() 21 | dirlist = defaultdict(list) 22 | 23 | libs = list() 24 | for l in glob("MONSDA/lib/*"): 25 | if any(x in l for x in [".pl", ".pm", ".py", ".sh", ".R", ".groovy"]): 26 | libs.append(os.path.relpath(l)) 27 | for l in libs: 28 | dirlist[str(os.path.join("share", os.path.dirname(l)))].append(l) 29 | 30 | scripts = list() 31 | for s in glob("scripts/**", recursive=True): 32 | if any(x in s for x in [".pl", ".pm", ".py", ".sh", ".R"]): 33 | scripts.append(os.path.relpath(s)) # os.path.join(s, os.path.split(s)[1])) 34 | for s in scripts: 35 | dirlist[str(os.path.join("share", "MONSDA", os.path.dirname(s)))].append(s) 36 | 37 | workflows = list() 38 | for d in glob("workflows/*"): 39 | if "wip" not in d: 40 | workflows.append( 41 | os.path.relpath(d) 42 | ) # os.path.join(d, os.path.split(d)[1])) 43 | for w in workflows: 44 | dirlist[os.path.join("share", "MONSDA", os.path.dirname(w))].append(w) 45 | 46 | envs = list() 47 | for e in glob("envs/*"): 48 | envs.append(os.path.relpath(e)) # os.path.join(d, os.path.split(d)[1])) 49 | for e in envs: 50 | dirlist[os.path.join("share", "MONSDA", os.path.dirname(e))].append(e) 51 | 52 | confs = list() 53 | for c in glob("configs/*"): 54 | if any(x in c for x in [".json"]): 55 | confs.append(os.path.relpath(c)) # os.path.join(d, os.path.split(d)[1])) 56 | for c in confs: 57 | dirlist[os.path.join("share", "MONSDA", os.path.dirname(c))].append(c) 58 | 59 | profiles = list() 60 | for p in glob("profile_*/**"): 61 | profiles.append(os.path.relpath(p)) # os.path.join(d, os.path.split(d)[1])) 62 | for p in profiles: 63 | dirlist[os.path.join("share", "MONSDA", os.path.dirname(p))].append(p) 64 | 65 | dirlist[""].append("LICENSE") 66 | 67 | for k, v in dirlist.items(): 68 | df.append((k, v)) 69 | 70 | return df 71 | 72 | 73 | # requires = open(os.path.abspath("requirements.txt")).read().strip().split("\n") 74 | 75 | setup( 76 | name=NAME, 77 | version=versioneer.get_version(), 78 | cmdclass=versioneer.get_cmdclass(), 79 | description=DESCRIPTION, 80 | author="Joerg Fallmann", 81 | author_email="fall@bioinf.uni-leipzig.de", 82 | packages=find_packages(include=["MONSDA", "MONSDA.*"]), 83 | include_package_data=True, 84 | data_files=generate_datafiles(), 85 | entry_points={ 86 | "console_scripts": [ 87 | "monsda = MONSDA.RunMONSDA:main", 88 | "monsda_configure = MONSDA.Configurator:main", 89 | ] 90 | }, 91 | # install_requires=requires, 92 | install_requires=[ 93 | "biopython>=1.83", 94 | "snakemake>=8.16.0", 95 | "black>=21.5b2", 96 | "flake8>=3.8.3", 97 | "isort>=5.13.2", 98 | "sphinx>=4.1.0", 99 | ], 100 | python_requires=">=3.12.0", 101 | setup_requires=["pytest-runner"], 102 | tests_require=["pytest"], 103 | zip_safe=False, 104 | license="LICENSE", 105 | url="https://github.com/jfallmann/MONSDA", 106 | long_description_content_type="text/markdown", 107 | long_description=open("README.md").read(), 108 | ) 109 | -------------------------------------------------------------------------------- /scripts/lib/Logger.py: -------------------------------------------------------------------------------- 1 | # logger.py --- 2 | # 3 | # Filename: logger.py 4 | # Description: 5 | # Author: Joerg Fallmann 6 | # Maintainer: 7 | # Created: Mon Aug 12 10:26:55 2019 (+0200) 8 | # Version: 9 | # Package-Requires: () 10 | # Last-Updated: Tue Sep 24 16:53:41 2019 (+0200) 11 | # By: Joerg Fallmann 12 | # Update #: 64 13 | # URL: 14 | # Doc URL: 15 | # Keywords: 16 | # Compatibility: 17 | # 18 | # 19 | 20 | # Commentary: 21 | # 22 | # 23 | # 24 | # 25 | 26 | # Change Log: 27 | # 28 | # 29 | # 30 | # 31 | # This program is free software: you can redistribute it and/or modify 32 | # it under the terms of the GNU General Public License as published by 33 | # the Free Software Foundation, either version 3 of the License, or (at 34 | # your option) any later version. 35 | # 36 | # This program is distributed in the hope that it will be useful, but 37 | # WITHOUT ANY WARRANTY; without even the implied warranty of 38 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 39 | # General Public License for more details. 40 | # 41 | # You should have received a copy of the GNU General Public License 42 | # along with GNU Emacs. If not, see . 43 | # 44 | # 45 | 46 | # Code: 47 | import logging 48 | import multiprocessing 49 | import os 50 | import sys 51 | import inspect 52 | import traceback as tb 53 | 54 | 55 | def makelogdir(logdir): 56 | if not os.path.isabs(logdir): 57 | logdir = os.path.abspath(logdir) 58 | if not os.path.exists(logdir): 59 | os.makedirs(logdir) 60 | return logdir 61 | 62 | 63 | def setup_logger( 64 | name, log_file, filemode="w", logformat=None, datefmt=None, level="WARNING" 65 | ): 66 | """Function setup as many loggers as you want""" 67 | 68 | logger = logging.getLogger(name) 69 | if log_file != "stdout" and log_file != "stderr": 70 | makelogdir(os.path.dirname(log_file)) 71 | handler = logging.FileHandler(log_file, mode=filemode) 72 | else: 73 | handler = logging.StreamHandler() 74 | 75 | handler.setFormatter(logging.Formatter(fmt=logformat, datefmt=datefmt)) 76 | 77 | logger.setLevel(level) 78 | logger.addHandler(handler) 79 | 80 | return logger 81 | 82 | 83 | if __name__ == "__main__": 84 | try: 85 | # set up logging to file 86 | logging = setup_logger( 87 | name="", 88 | log_file="stderr", 89 | logformat="%(asctime)s %(name)-12s %(levelname)-8s %(message)s", 90 | datefmt="%m-%d %H:%M", 91 | level="WARNING", 92 | ) 93 | 94 | # define a Handler which writes INFO messages or higher to the sys.stderr 95 | # console = logging.StreamHandler() 96 | # console.setLevel(logging.INFO) 97 | # set a format which is simpler for console use 98 | # formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s') 99 | # tell the handler to use this format 100 | # console.setFormatter(formatter) 101 | # add the handler to the root logger 102 | # logging.getLogger('').addHandler(console) 103 | 104 | # Now, we can log to the root logger, or any other logger. First the root... 105 | # logging.info('Imported logger.py') 106 | # Now, use this in code defining a couple of other loggers which might represent areas in your 107 | # application, e.g.: 108 | # log = logging.getLogger('logger.main') 109 | 110 | except Exception as err: 111 | exc_type, exc_value, exc_tb = sys.exc_info() 112 | tbe = tb.TracebackException( 113 | exc_type, 114 | exc_value, 115 | exc_tb, 116 | ) 117 | logging.error("".join(tbe.format())) 118 | 119 | 120 | # log.py ends here 121 | -------------------------------------------------------------------------------- /workflows/segemehl3_bisulfite.smk: -------------------------------------------------------------------------------- 1 | MAPPERBIN, MAPPERENV = env_bin_from_config(config,'MAPPING') 2 | keydict = sub_dict(tool_params(SAMPLES[0], None, config, 'MAPPING', MAPPERENV)['OPTIONS'], ['INDEX']) 3 | keydict["REF"] = REFERENCE 4 | keydict["DECOY"] = DECOY 5 | keydict["ENV"] = MAPPERENV 6 | unik = get_dict_hash(keydict) 7 | 8 | rule generate_index: 9 | input: fa = REFERENCE 10 | output: idx1 = INDEX, 11 | idx2 = INDEX2, 12 | uidx1 = expand("{refd}/INDICES/{mape}_{unikey}.idx", refd=REFDIR, mape=MAPPERENV, unikey=unik), 13 | uidx2 = expand("{refd}/INDICES/{mape}_{unikey}.idx2", refd=REFDIR, mape=MAPPERENV, unikey=unik+'_bs') 14 | log: expand("LOGS/{sets}/{mape}.idx.log", sets=SETS, mape=MAPPERENV) 15 | conda: ""+MAPPERENV.replace('bisulfite', '')+".yaml" 16 | threads: MAXTHREAD 17 | params: indexer = MAPPERBIN, 18 | ipara = lambda wildcards, input: tool_params(SAMPLES[0], None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('INDEX', ""), 19 | linkidx1 = lambda wildcards, output: str(os.path.abspath(output.uidx1[0])), 20 | linkidx2 = lambda wildcards, output: str(os.path.abspath(output.uidx2[0])) 21 | shell: "{params.indexer} --threads {threads} {params.ipara} -d {input.fa} -x {output.uidx1} -y {output.uidx2} &> {log} && ln -fs {params.linkidx1} {output.idx1} && ln -fs {params.linkidx2} {output.idx2}" 22 | 23 | if paired == 'paired': 24 | rule mapping: 25 | input: r1 = "TRIMMED_FASTQ/{combo}/{file}_R1_trimmed.fastq.gz", 26 | r2 = "TRIMMED_FASTQ/{combo}/{file}_R2_trimmed.fastq.gz", 27 | uidx1 = rules.generate_index.output.uidx1[0], 28 | uidx2 = rules.generate_index.output.uidx2[0], 29 | fa = REFERENCE 30 | output: mapped = temp(report("MAPPED/{combo}/{file}_mapped.sam.gz", category="MAPPING")), 31 | unmapped1 = "UNMAPPED/{combo}/{file}_R1_unmapped.fastq.gz", 32 | unmapped2 = "UNMAPPED/{combo}/{file}_R2_unmapped.fastq.gz" 33 | log: "LOGS/{combo}/{file}/mapping.log" 34 | conda: ""+MAPPERENV.replace('bisulfite', '')+".yaml" 35 | threads: MAXTHREAD 36 | params: mpara = lambda wildcards: tool_params(wildcards.file, None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('MAP', ""), 37 | mapp=MAPPERBIN 38 | shell: "set +o pipefail;{params.mapp} {params.mpara} -d {input.fa} -i {input.uidx1} -j {input.uidx2} -q {input.r1} -p {input.r2} --threads {threads} 2> {log}| tee >(samtools view -h -F 4 |gzip > {output.mapped}) >(samtools view -h -f 4 |samtools collate -u -O -|samtools fastq -n -c 6 -1 {output.unmapped1} -2 {output.unmapped2} - ) 2>> {log} &>/dev/null && touch {output.unmapped1} {output.unmapped2}" 39 | else: 40 | rule mapping: 41 | input: query = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fastq.gz", 42 | uidx1 = rules.generate_index.output.uidx1[0], 43 | uidx2= rules.generate_index.output.uidx2[0], 44 | fa = REFERENCE 45 | output: mapped = temp(report("MAPPED/{combo}/{file}_mapped.sam.gz", category="MAPPING")), 46 | unmapped = "UNMAPPED/{combo}/{file}_unmapped.fastq.gz" 47 | log: "LOGS/{combo}/{file}/mapping.log" 48 | conda: ""+MAPPERENV.replace('bisulfite', '')+".yaml" 49 | threads: MAXTHREAD 50 | params: mpara = lambda wildcards: tool_params(wildcards.file, None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('MAP', ""), 51 | mapp=MAPPERBIN 52 | shell: "set +o pipefail; {params.mapp} {params.mpara} -d {input.fa} -i {input.uidx1} -j {input.uidx2} -q {input.query} --threads {threads} 2> {log}| tee >(samtools view -h -F 4 |gzip > {output.mapped}) >(samtools view -h -f 4 |samtools fastq -n --verbosity 0 - | pigz > {output.unmapped}) 2>> {log} &>/dev/null && touch {output.unmapped}" 53 | -------------------------------------------------------------------------------- /envs/dexseq_DTU.yaml: -------------------------------------------------------------------------------- 1 | name: dexseq 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | - r 7 | dependencies: 8 | - bioconductor-annotate =1.76.0 9 | - bioconductor-annotationdbi =1.60.0 10 | - bioconductor-biobase =2.58.0 11 | - bioconductor-biocfilecache =2.6.0 12 | - bioconductor-biocgenerics =0.44.0 13 | - bioconductor-biocio =1.8.0 14 | - bioconductor-biocparallel =1.32.5 15 | - bioconductor-biomart =2.54.0 16 | - bioconductor-biostrings =2.66.0 17 | - bioconductor-data-packages =20230202 18 | - bioconductor-delayedarray =0.24.0 19 | - bioconductor-deseq2 =1.38.0 20 | - bioconductor-dexseq =1.44.0 21 | - bioconductor-drimseq =1.26.0 22 | - bioconductor-edger =3.40.0 23 | - bioconductor-genefilter =1.80.0 24 | - bioconductor-geneplotter =1.76.0 25 | - bioconductor-genomeinfodb =1.34.8 26 | - bioconductor-genomeinfodbdata =1.2.9 27 | - bioconductor-genomicalignments =1.34.0 28 | - bioconductor-genomicfeatures =1.50.2 29 | - bioconductor-genomicranges =1.50.0 30 | - bioconductor-iranges =2.32.0 31 | - bioconductor-keggrest =1.38.0 32 | - bioconductor-limma =3.54.0 33 | - bioconductor-matrixgenerics =1.10.0 34 | - bioconductor-rhtslib =2.0.0 35 | - bioconductor-rsamtools =2.14.0 36 | - bioconductor-rtracklayer =1.58.0 37 | - bioconductor-s4vectors =0.36.0 38 | - bioconductor-summarizedexperiment =1.28.0 39 | - bioconductor-tximport =1.26.0 40 | - bioconductor-xvector =0.38.0 41 | - bioconductor-zlibbioc =1.44.0 42 | - python =3.10.9 43 | - python_abi =3.10 44 | - pyyaml =6.0 45 | - r-askpass =1.1 46 | - r-assertthat =0.2.1 47 | - r-base =4.2.0 48 | - r-bh =1.81.0_1 49 | - r-bit =4.0.5 50 | - r-bit64 =4.0.5 51 | - r-bitops =1.0_7 52 | - r-blob =1.2.3 53 | - r-cachem =1.0.6 54 | - r-cli =3.6.0 55 | - r-codetools =0.2_19 56 | - r-colorspace =2.1_0 57 | - r-cpp11 =0.4.3 58 | - r-crayon =1.5.2 59 | - r-curl =4.3.3 60 | - r-dbi =1.1.3 61 | - r-dbplyr =2.3.0 62 | - r-digest =0.6.31 63 | - r-dplyr =1.1.0 64 | - r-ellipsis =0.3.2 65 | - r-fansi =1.0.4 66 | - r-farver =2.1.1 67 | - r-fastmap =1.1.0 68 | - r-filelock =1.0.2 69 | - r-formatr =1.14 70 | - r-futile.logger =1.4.3 71 | - r-futile.options =1.0.1 72 | - r-generics =0.1.3 73 | - r-ggplot2 =3.4.1 74 | - r-glue =1.6.2 75 | - r-gtable =0.3.1 76 | - r-hms =1.1.2 77 | - r-httr =1.4.4 78 | - r-hwriter =1.3.2.1 79 | - r-isoband =0.2.7 80 | - r-jsonlite =1.8.4 81 | - r-labeling =0.4.2 82 | - r-lambda.r =1.2.4 83 | - r-lattice =0.20_45 84 | - r-lifecycle =1.0.3 85 | - r-locfit =1.5_9.7 86 | - r-magrittr =2.0.3 87 | - r-mass =7.3_58.2 88 | - r-matrix =1.5_3 89 | - r-matrixstats =0.63.0 90 | - r-memoise =2.0.1 91 | - r-mgcv =1.8_41 92 | - r-mime =0.12 93 | - r-munsell =0.5.0 94 | - r-nlme =3.1_157 95 | - r-openssl =2.0.5 96 | - r-pillar =1.8.1 97 | - r-pkgconfig =2.0.3 98 | - r-plogr =0.2.0 99 | - r-plyr =1.8.8 100 | - r-png =0.1_8 101 | - r-prettyunits =1.1.1 102 | - r-progress =1.2.2 103 | - r-purrr =1.0.1 104 | - r-r6 =2.5.1 105 | - r-rappdirs =0.3.3 106 | - r-rcolorbrewer =1.1_3 107 | - r-rcpp =1.0.10 108 | - r-rcpparmadillo =0.11.4.4.0 109 | - r-rcurl =1.98_1.10 110 | - r-reshape2 =1.4.4 111 | - r-restfulr =0.0.15 112 | - r-rjson =0.2.21 113 | - r-rlang =1.0.6 114 | - r-rsqlite =2.2.20 115 | - r-scales =1.2.1 116 | - r-snow =0.4_4 117 | - r-statmod =1.4.36 118 | - r-stringi =1.7.6 119 | - r-stringr =1.5.0 120 | - r-survival =3.5_3 121 | - r-sys =3.4.1 122 | - r-tibble =3.1.8 123 | - r-tidyselect =1.2.0 124 | - r-utf8 =1.2.3 125 | - r-vctrs =0.5.2 126 | - r-viridislite =0.4.1 127 | - r-withr =2.5.0 128 | - r-xml =3.99_0.9 129 | - r-xml2 =1.3.3 130 | - r-xtable =1.8_4 131 | - r-yaml =2.3.7 132 | - readline =8.2 133 | - sed =4.8 -------------------------------------------------------------------------------- /envs/isoformswitchanalyzer.yaml: -------------------------------------------------------------------------------- 1 | name: isoformswitchanalyzer 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | - r 7 | dependencies: 8 | - bioconductor-annotate =1.66.0 9 | - bioconductor-annotationdbi =1.50.0 10 | - bioconductor-biobase =2.48.0 11 | - bioconductor-biocfilecache =1.12.0 12 | - bioconductor-biocgenerics =0.34.0 13 | - bioconductor-biocparallel =1.22.0 14 | - bioconductor-biomart =2.44.0 15 | - bioconductor-biostrings =2.56.0 16 | - bioconductor-bsgenome =1.56.0 17 | - bioconductor-delayedarray =0.14.0 18 | - bioconductor-deseq2 =1.28.0 19 | - bioconductor-dexseq =1.34.0 20 | - bioconductor-drimseq =1.16.0 21 | - bioconductor-edger =3.30.0 22 | - bioconductor-genefilter =1.70.0 23 | - bioconductor-geneplotter =1.66.0 24 | - bioconductor-genomeinfodb =1.24.0 25 | - bioconductor-genomeinfodbdata =1.2.3 26 | - bioconductor-genomicalignments =1.24.0 27 | - bioconductor-genomicranges =1.40.0 28 | - bioconductor-iranges =2.22.1 29 | - bioconductor-isoformswitchanalyzer =1.10.0 30 | - bioconductor-limma =3.44.1 31 | - bioconductor-rhtslib =1.20.0 32 | - bioconductor-rsamtools =2.4.0 33 | - bioconductor-rtracklayer =1.48.0 34 | - bioconductor-s4vectors =0.26.0 35 | - bioconductor-summarizedexperiment =1.18.1 36 | - bioconductor-tximport =1.16.0 37 | - bioconductor-xvector =0.28.0 38 | - bioconductor-zlibbioc =1.34.0 39 | - r-askpass =1.1 40 | - r-assertthat =0.2.1 41 | - r-backports =1.1.8 42 | - r-base =4.0.2 43 | - r-bh =1.72.0_3 44 | - r-bit =4.0.4 45 | - r-bit64 =4.0.2 46 | - r-bitops =1.0_6 47 | - r-blob =1.2.1 48 | - r-callr =3.4.3 49 | - r-cli =2.0.2 50 | - r-clipr =0.7.0 51 | - r-colorspace =1.4_1 52 | - r-crayon =1.3.4 53 | - r-curl =4.3 54 | - r-dbi =1.1.0 55 | - r-dbplyr =1.4.4 56 | - r-desc =1.2.0 57 | - r-digest =0.6.25 58 | - r-dplyr =1.0.2 59 | - r-ellipsis =0.3.1 60 | - r-evaluate =0.14 61 | - r-fansi =0.4.1 62 | - r-farver =2.0.3 63 | - r-formatr =1.7 64 | - r-futile.logger =1.4.3 65 | - r-futile.options =1.0.1 66 | - r-generics =0.0.2 67 | - r-ggplot2 =3.3.2 68 | - r-glue =1.4.1 69 | - r-gridextra =2.3 70 | - r-gtable =0.3.0 71 | - r-hms =0.5.3 72 | - r-httr =1.4.2 73 | - r-hwriter =1.3.2 74 | - r-isoband =0.2.2 75 | - r-jsonlite =1.7.0 76 | - r-labeling =0.3 77 | - r-lambda.r =1.2.4 78 | - r-lattice =0.20_41 79 | - r-lifecycle =0.2.0 80 | - r-locfit =1.5_9.4 81 | - r-magrittr =1.5 82 | - r-mass =7.3_52 83 | - r-matrix =1.2_18 84 | - r-matrixstats =0.56.0 85 | - r-memoise =1.1.0 86 | - r-mgcv =1.8_32 87 | - r-mime =0.9 88 | - r-munsell =0.5.0 89 | - r-nlme =3.1_149 90 | - r-openssl =1.4.2 91 | - r-pillar =1.4.6 92 | - r-pkgbuild =1.1.0 93 | - r-pkgconfig =2.0.3 94 | - r-pkgload =1.1.0 95 | - r-plogr =0.2.0 96 | - r-plyr =1.8.6 97 | - r-praise =1.0.0 98 | - r-prettyunits =1.1.1 99 | - r-processx =3.4.3 100 | - r-progress =1.2.2 101 | - r-ps =1.3.4 102 | - r-purrr =0.3.4 103 | - r-r6 =2.4.1 104 | - r-rappdirs =0.3.1 105 | - r-rcolorbrewer =1.1_2 106 | - r-rcpp =1.0.4.6 107 | - r-rcpparmadillo =0.9.900.2.0 108 | - r-rcurl =1.98_1.2 109 | - r-readr =1.3.1 110 | - r-reshape2 =1.4.4 111 | - r-rlang =0.4.7 112 | - r-rprojroot =1.3_2 113 | - r-rsqlite =2.2.0 114 | - r-rstudioapi =0.11 115 | - r-scales =1.1.1 116 | - r-snow =0.4_3 117 | - r-statmod =1.4.34 118 | - r-stringi =1.4.6 119 | - r-stringr =1.4.0 120 | - r-survival =3.2_3 121 | - r-sys =3.4 122 | - r-testthat =2.3.2 123 | - r-tibble =3.0.3 124 | - r-tidyselect =1.1.0 125 | - r-utf8 =1.1.4 126 | - r-vctrs =0.3.2 127 | - r-venndiagram =1.6.20 128 | - r-viridislite =0.3.0 129 | - r-withr =2.2.0 130 | - r-xml =3.99_0.3 131 | - r-xtable =1.8_4 132 | - r-zeallot =0.1.0 133 | - readline =8.2 134 | - sed =4.8 -------------------------------------------------------------------------------- /workflows/fastqc.nf: -------------------------------------------------------------------------------- 1 | QCENV=get_always('QCENV') 2 | QCBIN=get_always('QCBIN') 3 | QCPARAMS = get_always('fastqc_params_QC') ?: '' 4 | 5 | //QC RAW 6 | process qc_raw{ 7 | conda "$QCENV"+".yaml" 8 | cpus THREADS 9 | cache 'lenient' 10 | //validExitStatus 0,1 11 | 12 | publishDir "${workflow.workDir}/../" , mode: 'link', 13 | saveAs: {filename -> 14 | if (filename.indexOf("zip") > 0) "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip" 15 | else if (filename.indexOf("html") > 0) "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html" 16 | else null 17 | } 18 | 19 | input: 20 | path read 21 | 22 | output: 23 | path "*.{zip,html}", emit: fastqc_results 24 | 25 | script: 26 | """ 27 | fastqc --quiet -t ${task.cpus} $QCPARAMS --noextract -f fastq $read 28 | """ 29 | } 30 | 31 | workflow QC_RAW{ 32 | take: 33 | collection 34 | 35 | main: 36 | 37 | qc_raw(samples_ch) 38 | 39 | emit: 40 | qc = qc_raw.out.fastqc_results 41 | } 42 | 43 | //QC TRIM 44 | 45 | process qc_trimmed{ 46 | conda "$QCENV"+".yaml" 47 | cpus THREADS 48 | cache 'lenient' 49 | //validExitStatus 0,1 50 | 51 | publishDir "${workflow.workDir}/../" , mode: 'link', 52 | saveAs: {filename -> 53 | if (filename.indexOf("zip") > 0) "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip" 54 | else if (filename.indexOf("html") > 0) "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html" 55 | else null 56 | } 57 | 58 | input: 59 | path read 60 | 61 | output: 62 | path "*.{zip,html}", emit: fastqc_results 63 | 64 | script: 65 | """ 66 | fastqc --quiet -t ${task.cpus} $QCPARAMS --noextract -f fastq $read 67 | """ 68 | } 69 | 70 | workflow QC_TRIMMING{ 71 | take: collection 72 | 73 | main: 74 | 75 | qc_trimmed(collection) 76 | 77 | emit: 78 | qc = qc_trimmed.out.fastqc_results 79 | } 80 | 81 | 82 | //QC MAP 83 | 84 | process qc_mapped{ 85 | conda "$QCENV"+".yaml" 86 | cpus THREADS 87 | cache 'lenient' 88 | //validExitStatus 0,1 89 | 90 | publishDir "${workflow.workDir}/../" , mode: 'link', 91 | saveAs: {filename -> 92 | if (filename.indexOf("zip") > 0) "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip" 93 | else if (filename.indexOf("html") > 0) "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html" 94 | else null 95 | } 96 | 97 | input: 98 | path map 99 | 100 | output: 101 | path "*.{zip,html}", emit: fastqc_results 102 | 103 | script: 104 | """ 105 | fastqc --quiet -t ${task.cpus} $QCPARAMS -f bam $map 106 | """ 107 | } 108 | 109 | workflow QC_MAPPING{ 110 | take: collection 111 | main: 112 | 113 | qc_mapped(collection) 114 | 115 | emit: 116 | qc = qc_mapped.out.fastqc_results 117 | } 118 | 119 | // DEDUP QC 120 | 121 | process qc_dedup{ 122 | conda "$QCENV"+".yaml" 123 | cpus THREADS 124 | cache 'lenient' 125 | //validExitStatus 0,1 126 | 127 | publishDir "${workflow.workDir}/../" , mode: 'link', 128 | saveAs: {filename -> 129 | if (filename.indexOf("zip") > 0) "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip" 130 | else if (filename.indexOf("html") > 0) "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html" 131 | else null 132 | } 133 | 134 | input: 135 | path read 136 | 137 | output: 138 | path "*.{zip,html}", emit: fastqc_results 139 | 140 | script: 141 | """ 142 | fastqc --quiet -t ${task.cpus} $QCPARAMS --noextract -f fastq $read 143 | """ 144 | } 145 | 146 | workflow QC_DEDUP{ 147 | take: collection 148 | 149 | main: 150 | 151 | qc_dedup(collection) 152 | 153 | emit: 154 | qc = qc_dedup.out.fastqc_results 155 | } 156 | --------------------------------------------------------------------------------