├── .coveragerc
├── envs
    ├── qc.yaml
    ├── bbduk.yaml
    ├── salmon.yaml
    ├── kallisto.yaml
    ├── fastqc.yaml
    ├── zip.yaml
    ├── star.yaml
    ├── trimgalore.yaml
    ├── bbmap.yaml
    ├── fastp.yaml
    ├── picard.yaml
    ├── ciri2.yaml
    ├── cutadapt.yaml
    ├── dorado.yaml
    ├── bwa.yaml
    ├── bwa2.yaml
    ├── samtools.yaml
    ├── minimap.yaml
    ├── piranha.yaml
    ├── index.yaml
    ├── macs.yaml
    ├── segemehl3.yaml
    ├── umitools.yaml
    ├── segemehl.yaml
    ├── bwameth.yaml
    ├── sra.yaml
    ├── trimmomatic.yaml
    ├── trnascan.yaml
    ├── hisat2.yaml
    ├── bedtools.yaml
    ├── scyphy.yaml
    ├── ucsc.yaml
    ├── trimm.yaml
    ├── base.yaml
    ├── guppy.yaml
    ├── diego_DAS.yaml
    ├── countreads.yaml
    ├── countreads_de.yaml
    ├── summary.yaml
    ├── monsda.yaml
    ├── edger_DAS.yaml
    ├── edger_DEU.yaml
    ├── perl.yaml
    ├── drimseq_DTU.yaml
    ├── dexseq_DTU.yaml
    └── isoformswitchanalyzer.yaml
├── scripts
    ├── lib
    │   ├── __init.py__
    │   ├── _lib.R
    │   └── Logger.py
    ├── Preprocessing
    │   └── indexfa.sh
    ├── Shells
    │   ├── printFQEnds.sh
    │   ├── Sam2Bed.sh
    │   ├── Sam2Bam.sh
    │   ├── printEnds.sh
    │   ├── NonUniqueBam_woPicard.sh
    │   ├── UniqueBam_woPicard.sh
    │   ├── MergeGeneExpression_Cufflinks.sh
    │   ├── MergeExpression_RNAcounter.sh
    │   ├── UniqueSam_woPicard.sh
    │   └── MergeExpression_Cufflinks.sh
    ├── Analysis
    │   ├── GettRNAExpression.sh
    │   ├── SUMMARY
    │   │   └── header_summary.Rmd
    │   ├── CountFastqEnds.pl
    │   ├── DAS
    │   │   └── FeatureCounts2DIEGO.pl
    │   ├── PreprocessPeaks.pl
    │   ├── GOA.R
    │   └── AddStructure.py
    └── Universal
    │   ├── sam2fastq.pl
    │   └── countCCA.pl
├── workflows
    ├── unlock.smk
    ├── footer.smk
    ├── footer.nf
    ├── collect.nf
    ├── summary.smk
    ├── fastqc_raw.nf
    ├── multiqc.nf
    ├── picard_dedup.smk
    ├── dorado.smk
    ├── summary.nf
    ├── premultiqc.nf
    ├── simulatetrim.smk
    ├── dorado.nf
    ├── premultiqc.smk
    ├── guppy.smk
    ├── header.nf
    ├── ciri2.smk
    ├── umitools_dedup.nf
    ├── simulatetrim.nf
    ├── guppy.nf
    ├── picard_dedup.nf
    ├── wip
    │   └── pycoqc.smk
    ├── trimgalore.nf
    ├── ciri2.nf
    ├── sra.nf
    ├── manipulate_genome.smk
    ├── fastqc_dedup.nf
    ├── fastqc_trim.nf
    ├── cutadapt.nf
    ├── bbduk.nf
    ├── fastp.nf
    ├── manipulate_genome.nf
    ├── bbduk.smk
    ├── mapping.smk
    ├── fastp.smk
    ├── fastqc_raw.smk
    ├── cutadapt.smk
    ├── minimap.smk
    ├── segemehl.smk
    ├── bwameth.smk
    ├── fastqc_dedup_trim.nf
    ├── trimgalore.smk
    ├── bwa2.smk
    ├── salmon.smk
    ├── kallisto.smk
    ├── segemehl3_bisulfite.smk
    └── fastqc.nf
├── .gitattributes
├── MONSDA
    ├── __main__.py
    ├── __init__.py
    └── lib
    │   └── Collection.groovy
├── MANIFEST.in
├── requirements.txt
├── docs
    ├── source
    │   ├── _static
    │   │   └── css
    │   │   │   └── custom.css
    │   ├── integrate.rst
    │   ├── contribute.rst
    │   ├── cluster.rst
    │   ├── installation.rst
    │   ├── runsmk.rst
    │   ├── wrapper.rst
    │   ├── first.rst
    │   └── conditiontree.rst
    ├── requirements.txt
    ├── Makefile
    ├── make.bat
    ├── index.rst
    └── conf.py
├── profile_snakemake
    ├── slurm-jobscript.sh
    ├── config.yaml
    ├── cluster_config.yaml
    ├── slurm-submit.py
    └── slurm-status.py
├── tests
    ├── TestCondaEnvs.sh
    ├── test_nextflow.sh
    ├── test_snakemake.sh
    ├── manual_test.sh
    └── test_Utils.py
├── profile_nextflow
    └── nextflow.config
├── .readthedocs.yaml
├── .vscode
    └── settings.json
├── setup.cfg
├── configs
    └── tutorial_quick.json
├── pyproject.toml
├── .gitignore
└── setup.py


/.coveragerc:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/envs/qc.yaml:
--------------------------------------------------------------------------------
1 | fastqc.yaml


--------------------------------------------------------------------------------
/scripts/lib/__init.py__:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/envs/bbduk.yaml:
--------------------------------------------------------------------------------
1 | bbmap.yaml


--------------------------------------------------------------------------------
/workflows/unlock.smk:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | NextSnakes/_version.py export-subst
2 | 


--------------------------------------------------------------------------------
/MONSDA/__main__.py:
--------------------------------------------------------------------------------
1 | from MONSDA import main
2 | 
3 | main()
4 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include versioneer.py
2 | include MONSDA/_version.py
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | --index-url https://pypi.python.org/simple/
2 | 
3 | -e .
4 | 
5 | 


--------------------------------------------------------------------------------
/docs/source/_static/css/custom.css:
--------------------------------------------------------------------------------
1 | .tight-table td{
2 |     white-space: normal !important;
3 | }


--------------------------------------------------------------------------------
/MONSDA/__init__.py:
--------------------------------------------------------------------------------
1 | from . import _version
2 | 
3 | __version__ = _version.get_versions()["version"]
4 | 


--------------------------------------------------------------------------------
/profile_snakemake/slurm-jobscript.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # properties = {properties}
3 | {exec_job}
4 | 


--------------------------------------------------------------------------------
/envs/salmon.yaml:
--------------------------------------------------------------------------------
1 | name: salmon
2 | channels:
3 |   - conda-forge
4 |   - bioconda
5 | dependencies:
6 |   - salmon =1.10.3


--------------------------------------------------------------------------------
/envs/kallisto.yaml:
--------------------------------------------------------------------------------
1 | name: kallisto
2 | channels:
3 |   - conda-forge
4 |   - bioconda
5 | dependencies:
6 |   - kallisto =0.51.0


--------------------------------------------------------------------------------
/workflows/footer.smk:
--------------------------------------------------------------------------------
1 | onsuccess:
2 |     print("Workflow finished, no error")
3 | onerror:
4 | 	print("ERROR: "+str({log}))
5 | 


--------------------------------------------------------------------------------
/envs/fastqc.yaml:
--------------------------------------------------------------------------------
1 | name: qc
2 | channels:
3 | - bioconda
4 | - conda-forge
5 | dependencies:
6 | - multiqc =1.21
7 | - fastqc =0.12.1
8 | 


--------------------------------------------------------------------------------
/workflows/footer.nf:
--------------------------------------------------------------------------------
1 | workflow.onComplete {
2 |     script:
3 |     """
4 |     echo 'Workflow finished, no error'
5 |     """
6 | }
7 | 


--------------------------------------------------------------------------------
/envs/zip.yaml:
--------------------------------------------------------------------------------
1 | name: zip
2 | channels:
3 |   - conda-forge
4 |   - bioconda
5 |   - defaults
6 | dependencies:
7 |   - pigz =2.6
8 |   - zlib =1.2.11


--------------------------------------------------------------------------------
/envs/star.yaml:
--------------------------------------------------------------------------------
1 | name: star
2 | channels:
3 |   - conda-forge
4 |   - bioconda
5 |   - defaults
6 |   - r
7 | dependencies:
8 |   - samtools =1.16.1
9 |   - star =2.7.10b


--------------------------------------------------------------------------------
/envs/trimgalore.yaml:
--------------------------------------------------------------------------------
1 | name: trimgalore
2 | channels:
3 |   - conda-forge
4 |   - bioconda
5 |   - defaults
6 | dependencies:
7 |   - trim-galore =0.6.7
8 |   - rename =1.601


--------------------------------------------------------------------------------
/envs/bbmap.yaml:
--------------------------------------------------------------------------------
 1 | name: bbmap
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 | dependencies:
 7 |   - bbmap =39.01
 8 |   - rename =1.601
 9 | 
10 | 


--------------------------------------------------------------------------------
/envs/fastp.yaml:
--------------------------------------------------------------------------------
 1 | name: fastp
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 | dependencies:
 7 |   - fastp =0.23.4
 8 |   - rename =1.601
 9 | 
10 | 


--------------------------------------------------------------------------------
/envs/picard.yaml:
--------------------------------------------------------------------------------
1 | name: picardtools
2 | channels:
3 |   - conda-forge
4 |   - bioconda
5 |   - defaults
6 |   - r
7 | dependencies:
8 |   - picard =2.27.4
9 |   - samtools =1.16.1


--------------------------------------------------------------------------------
/tests/TestCondaEnvs.sh:
--------------------------------------------------------------------------------
1 | for i in ~/MONSDA/envs/*.yaml;do rm -rf ~/anaconda3/envs/tempenv;echo "INSTALLING $i" && conda env create -n tempenv -f $i && echo "DONE, NEXT";done
2 | 


--------------------------------------------------------------------------------
/envs/ciri2.yaml:
--------------------------------------------------------------------------------
 1 | name: ciri2
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 |   - r
 7 | dependencies:
 8 |   - perl =5.32.0
 9 |   - samtools =1.16.1
10 | 


--------------------------------------------------------------------------------
/envs/cutadapt.yaml:
--------------------------------------------------------------------------------
 1 | name: cutadapt
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 | dependencies:
 7 |   - cutadapt =4.1
 8 |   - rename =1.601
 9 | 
10 | 


--------------------------------------------------------------------------------
/envs/dorado.yaml:
--------------------------------------------------------------------------------
 1 | name: dorado
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 |   - r
 7 | dependencies:
 8 |   - samtools =1.16.1
 9 |   - pigz =2.6
10 | 


--------------------------------------------------------------------------------
/envs/bwa.yaml:
--------------------------------------------------------------------------------
 1 | name: bwa
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 |   - r
 7 | dependencies:
 8 |   - bwa =0.7.17
 9 |   - pigz =2.6
10 |   - samtools =1.16.1


--------------------------------------------------------------------------------
/envs/bwa2.yaml:
--------------------------------------------------------------------------------
 1 | name: bwa
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 |   - r
 7 | dependencies:
 8 |   - bwa-mem2 =2.2.1
 9 |   - pigz =2.6
10 |   - samtools =1.16.1


--------------------------------------------------------------------------------
/envs/samtools.yaml:
--------------------------------------------------------------------------------
 1 | name: samtools
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 | dependencies:
 7 |   - samtools =1.16.1
 8 |   - grep =2.14
 9 |   - pigz =2.6
10 | 


--------------------------------------------------------------------------------
/tests/test_nextflow.sh:
--------------------------------------------------------------------------------
1 | DEF="INFO"
2 | LVL="${1:-$DEF}"
3 | bash cleanup.sh && export NXF_EXECUTOR=slurm; MONSDA --nextflow -j 8 --configfile multitool.json --directory ${PWD} --loglevel $LVL
4 | 


--------------------------------------------------------------------------------
/envs/minimap.yaml:
--------------------------------------------------------------------------------
 1 | name: minimap
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 |   - r
 7 | dependencies:
 8 |   - minimap2 =2.24
 9 |   - pigz =2.6
10 |   - samtools =1.16.1


--------------------------------------------------------------------------------
/envs/piranha.yaml:
--------------------------------------------------------------------------------
 1 | name: piranha
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 |   - r
 7 | dependencies:
 8 |   - bamtools =2.5.1
 9 |   - piranha =1.2.1
10 |   - readline =8.2


--------------------------------------------------------------------------------
/envs/index.yaml:
--------------------------------------------------------------------------------
 1 | name: index
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 | dependencies:
 7 |   - samtools =1.16.1
 8 |   - curl =7.87.0
 9 |   - segemehl =0.2.0
10 |   - pigz =2.6


--------------------------------------------------------------------------------
/tests/test_snakemake.sh:
--------------------------------------------------------------------------------
1 | bash cleanup.sh && MONSDA -j 8 --configfile multitool.json --directory ${PWD} --conda-frontend mamba --profile slurm --conda-frontend mamba --conda-prefix /scratch/snakemake_conda_envs 
2 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | sphinx
 2 | sphinxcontrib-napoleon
 3 | sphinx-argparse
 4 | sphinx_rtd_theme
 5 | pytest-sphinx
 6 | docutils
 7 | autodoc
 8 | mathjax
 9 | recommonmark
10 | configargparse
11 | appdirs
12 | 


--------------------------------------------------------------------------------
/envs/macs.yaml:
--------------------------------------------------------------------------------
 1 | name: macs
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 |   - r
 7 | dependencies:
 8 |   - macs2 =2.2.7.1
 9 |   - readline =8.2
10 |   - samtools =1.16.1
11 |   - sed =4.8


--------------------------------------------------------------------------------
/envs/segemehl3.yaml:
--------------------------------------------------------------------------------
 1 | name: segemehl3
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - r
 6 | dependencies:
 7 |   - grep =2.14
 8 |   - samtools =1.16.1
 9 |   - segemehl =0.3.4
10 |   - pigz =2.6
11 | 


--------------------------------------------------------------------------------
/envs/umitools.yaml:
--------------------------------------------------------------------------------
 1 | name: umitools
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 |   - r
 7 | dependencies:
 8 |   - samtools =1.16.1
 9 |   - umi_tools =1.1.2
10 |   - dateutils =0.6.12


--------------------------------------------------------------------------------
/envs/segemehl.yaml:
--------------------------------------------------------------------------------
 1 | name: segemehl
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 | dependencies:
 7 |   - samtools =1.16.1
 8 |   - segemehl =0.2.0
 9 |   - grep =2.14
10 |   - pigz =2.6
11 | 


--------------------------------------------------------------------------------
/envs/bwameth.yaml:
--------------------------------------------------------------------------------
 1 | name: bwameth
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 |   - r
 7 | dependencies:
 8 |   - bwameth =0.2.7
 9 |   - bwa-mem2 =2.2.1
10 |   - pigz =2.6
11 |   - samtools =1.16.1


--------------------------------------------------------------------------------
/envs/sra.yaml:
--------------------------------------------------------------------------------
 1 | name: sratools
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 |   - r
 7 | dependencies:
 8 |   - perl =5.32.1
 9 |   - pigz =2.6
10 |   - rename =1.601
11 |   - sra-tools =2.11.0
12 | 


--------------------------------------------------------------------------------
/envs/trimmomatic.yaml:
--------------------------------------------------------------------------------
 1 | name: trimmomatic
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 | dependencies:
 7 |   - trimmomatic =0.39
 8 |   - python =3.11.0
 9 |   - fastqc =0.11.8
10 |   - rename =1.601


--------------------------------------------------------------------------------
/envs/trnascan.yaml:
--------------------------------------------------------------------------------
 1 | name: trnascan
 2 | channels:
 3 |   - bioconda
 4 |   - conda-forge
 5 |   - r
 6 |   - defaults
 7 | dependencies:
 8 |   - infernal =1.1.2
 9 |   - trnascan-se =2.0
10 |   - perl =5.26.2
11 |   - pigz =2.6
12 | 


--------------------------------------------------------------------------------
/envs/hisat2.yaml:
--------------------------------------------------------------------------------
 1 | name: hisat2
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 |   - r
 7 | dependencies:
 8 |   - hisat2 =2.2.1
 9 |   - samtools =1.16.1
10 |   - pigz =2.6
11 |   - pysam =0.19.1
12 |   - python =3.9
13 |   - rename =1.601
14 | 


--------------------------------------------------------------------------------
/envs/bedtools.yaml:
--------------------------------------------------------------------------------
 1 | name: bedtools
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 |   - r
 7 | dependencies:
 8 |   - bedtools =2.30.0
 9 |   - curl =7.87.0
10 |   - grep =3.4
11 |   - pigz =2.6
12 |   - pysam =0.20.0
13 |   - python =3.10.9
14 |   - samtools =1.16.1


--------------------------------------------------------------------------------
/envs/scyphy.yaml:
--------------------------------------------------------------------------------
 1 | name: scyphy
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 |   - r
 7 | dependencies:
 8 |   - bzip2 =1.0.8
 9 |   - piranha =1.2.1
10 |   - pyfaidx =0.7.1
11 |   - pysam =0.19.1
12 |   - python =3.9
13 |   - readline =8.2
14 |   - samtools =1.16.1
15 | 


--------------------------------------------------------------------------------
/envs/ucsc.yaml:
--------------------------------------------------------------------------------
 1 | name: ucsc
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 |   - r
 7 | dependencies:
 8 |   - grep =2.14
 9 |   - ucsc-bedgraphtobigwig =377
10 |   - ucsc-beditemoverlapcount =377
11 |   - ucsc-fatotwobit =377
12 |   - ucsc-twobitinfo =377
13 |   - pigz =2.6
14 | 
15 | 


--------------------------------------------------------------------------------
/profile_nextflow/nextflow.config:
--------------------------------------------------------------------------------
 1 | profiles {
 2 |   slurm {
 3 |     process.executor = 'slurm'
 4 |     process.memory = '10 GB'
 5 |     process.queue = 'main'
 6 |     withName: '_idx|_map' {
 7 |       memory = '160GB'
 8 |     }
 9 |   }
10 | 
11 |   local {
12 |     process.executor = 'local'
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/envs/trimm.yaml:
--------------------------------------------------------------------------------
 1 | name: trimm
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 | dependencies:
 7 |   - bbmap =38.22
 8 |   - cutadapt =1.18
 9 |   - fastqc =0.11.8
10 |   - trim-galore =0.5.0
11 |   - perl =5.26.2
12 |   - pigz =2.6
13 |   - pip =18.1
14 |   - python =3.9
15 |   - rename =1.601
16 | 


--------------------------------------------------------------------------------
/envs/base.yaml:
--------------------------------------------------------------------------------
 1 | name: base
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 | dependencies:
 7 |   - grep >=3.4
 8 |   - samtools =1.16.1
 9 |   - natsort >=8.1.0
10 |   - perl =5.32.1
11 |   - picard =2.27.4
12 |   - pigz =2.6
13 |   - python =3.10.4
14 |   - python_abi =3.10
15 |   - pyfaidx =0.5.9
16 |   - pysam =0.20.0


--------------------------------------------------------------------------------
/scripts/Preprocessing/indexfa.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | inf=$1
 4 | 
 5 | if [[ -s $inf ]]
 6 | then
 7 | 	if [[ "$inf" == *.gz* ]]	   
 8 | 	then
 9 | 		filename="${inf%.*}"
10 | 		zcat $inf > $filename && samtools faidx $filename && rm -f $filename
11 | 	else
12 | 		samtools faidx $inf
13 | 	fi
14 | else
15 | 	touch $inf.fai
16 | fi
17 | 


--------------------------------------------------------------------------------
/profile_snakemake/config.yaml:
--------------------------------------------------------------------------------
 1 | restart-times: 3
 2 | jobscript: "slurm-jobscript.sh"
 3 | cluster: "slurm-submit.py"
 4 | #cluster-status: "slurm-status.py"
 5 | max-jobs-per-second: 1
 6 | max-status-checks-per-second: 3
 7 | local-cores: 1
 8 | latency-wait: 60
 9 | #use-conda: True
10 | keep-going: True
11 | rerun-incomplete: True
12 | #printshellcmds: True
13 | 


--------------------------------------------------------------------------------
/envs/guppy.yaml:
--------------------------------------------------------------------------------
 1 | name: guppy
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 |   - r
 7 | dependencies:
 8 |   - gettext =0.19.8.1
 9 |   - libffi =3.2.1
10 |   - libgcc-ng =9.3.0
11 |   - libgomp =9.3.0
12 |   - libiconv =1.16
13 |   - libidn =7.45.0
14 |   - libidn11 =1.34
15 |   - libidn2 =2.3.0
16 |   - libstdcxx-ng =9.3.0
17 |   - libunistring =0.9.10
18 | 
19 | 


--------------------------------------------------------------------------------
/profile_snakemake/cluster_config.yaml:
--------------------------------------------------------------------------------
 1 | __default__:
 2 |     account: user # your account name
 3 |     partition: main # the partition to use
 4 |     time: 1500 # default time (minutes)
 5 |     nodes: 1
 6 |     output: "/SLURMLOG/{rule}.{wildcards}.out"
 7 |     error: "/SLURMLOG/{rule}.{wildcards}.err"
 8 |     #ntasks: 1
 9 |     #mem: 14GB # default memory
10 | 
11 | generate_index:
12 |     mem: 200GB
13 | 
14 | mapping:
15 |     mem: 200GB
16 | 


--------------------------------------------------------------------------------
/envs/diego_DAS.yaml:
--------------------------------------------------------------------------------
 1 | name: diego
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 |   - r
 7 | dependencies:
 8 |   - bedtools =2.30.0
 9 |   - diego =0.1.2
10 |   - graphviz =2.49.1
11 |   - imagemagick =7.1.0_10
12 |   - matplotlib-base =3.5.1
13 |   - numpy =1.22.3
14 |   - perl =5.32.1
15 |   - python-dateutil =2.8.2
16 |   - pysam =0.19.1
17 |   - python =3.9
18 |   - readline =8.2
19 |   - samtools =1.16.1
20 |   - scipy =1.8.0


--------------------------------------------------------------------------------
/scripts/Shells/printFQEnds.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | in=$1
 4 | out=$2
 5 | 
 6 | if [ ! -f $out ]; then
 7 | 	echo -ne "SAMPLE\tCCA" > $out
 8 | fi
 9 | 
10 | fn=${in#*/}
11 | 
12 | echo -ne "\n$fn\t" >> $out
13 | 
14 | for a in CCA;do
15 | 	zcat $in|perl -sae 'BEGIN{$c=0}{if($F[0] eq $e){$c+=$F[1]}}END{{print $c}}' -- -e=$a >> $out
16 | done
17 | 
18 | #zcat $fn|perl -sae 'print $fn."\t".join("\t",@F)' -- -e=$fn >> $out
19 | #echo -ne "\n" >> $out
20 | 


--------------------------------------------------------------------------------
/envs/countreads.yaml:
--------------------------------------------------------------------------------
 1 | name: countreads
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 |   - r
 7 | dependencies:
 8 |   - bcftools =1.8
 9 |   - gffutils =0.11.1
10 |   - grep =2.14
11 |   - htseq =2.0.2
12 |   - pigz =2.6
13 |   - pyfaidx =0.5.8
14 |   - pyparsing =2.4.6
15 |   - python =3.9
16 |   - pysam =0.19.1
17 |   - python-dateutil =2.8.1
18 |   - readline =8.2
19 |   - samtools =1.16.1
20 |   - simplejson =3.17.0
21 |   - subread =1.6.4
22 | 


--------------------------------------------------------------------------------
/envs/countreads_de.yaml:
--------------------------------------------------------------------------------
 1 | name: countreads_de
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 |   - r
 7 | dependencies:
 8 |   - bcftools =1.8
 9 |   - gffutils =0.11.1
10 |   - grep =2.14
11 |   - htseq =2.0.2
12 |   - pigz =2.6
13 |   - pyfaidx =0.5.8
14 |   - pyparsing =2.4.6
15 |   - pysam =0.19.1
16 |   - python =3.9
17 |   - python-dateutil =2.8.1
18 |   - readline =8.2
19 |   - samtools =1.16.1
20 |   - simplejson =3.17.0
21 |   - subread =1.6.4
22 | 


--------------------------------------------------------------------------------
/scripts/Shells/Sam2Bed.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | FILE=$1
 3 | OUT=$(basename $FILE)
 4 | 
 5 | if [[ "$FILE" == *.gz* ]]
 6 | then
 7 |     zcat $FILE|grep "HWI-ST"|awk '{FS="\t";OFS="\t"}{if(and($2,16)){print $3,$4-1,$4+length($10),$1,$2,"-"} else {print $3,$4-1,$4+length($10),$1,$2,"+"}}' - > $OUT"_Unique.bed";
 8 | else
 9 |     cat $FILE|grep "HWI-ST"|awk '{FS="\t";OFS="\t"}{if(and($2,16)){print $3,$4-1,$4+length($10),$1,$2,"-"} else {print $3,$4-1,$4+length($10),$1,$2,"+"}}' - > $OUT"_Unique.bed";
10 | fi
11 | 
12 | 


--------------------------------------------------------------------------------
/scripts/Analysis/GettRNAExpression.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | in=$1
 4 | out=$2
 5 | 
 6 | if [ ! -f $out ]; then
 7 | 	echo -e "SAMPLE\ttRNA\tCount" > $out
 8 | fi
 9 | 
10 | fn=${in#*/}
11 | 
12 | zgrep -w CCA $in | perl -sae 'BEGIN{$c={}}{if($F[1] eq "seq"){@trnas=split(",",$F[6]);foreach $rna (@trnas){$c->{$rna}+=$F[3]}}}END{foreach $rna (keys %{$c}){print "$e\t$rna\t$c->{$rna}\n"}}' -- -e=$fn >> $out
13 | 
14 | #awk -v e="$a" 'BEGIN{FS="\t";OFS="";c=0}{c+=$3}END{if(e != "A"){print c,"\t"}else{print c}}' >> $out
15 | 


--------------------------------------------------------------------------------
/scripts/Analysis/SUMMARY/header_summary.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "MONSDA SUMMARY REPORT"
 3 | date: "`r Sys.Date()`"
 4 | author: "`r Sys.getenv('LOGNAME')`"
 5 | output:
 6 |   rmdformats::readthedown:
 7 |     code_folding: show
 8 |     self_contained: false
 9 |     thumbnails: false
10 |     lightbox: false
11 | pkgdown:
12 |   as_is: true
13 | params:
14 |   root: ''
15 |   outdir: ''
16 | ---
17 | 
18 | 
19 | ```{r setup, include=FALSE}
20 | library(knitr)
21 | knitr::opts_chunk$set(echo = TRUE)
22 | knitr::opts_knit$set(root.dir = params$root)
23 | ```
24 | 


--------------------------------------------------------------------------------
/tests/manual_test.sh:
--------------------------------------------------------------------------------
 1 | VERSION=$1
 2 | #tag
 3 | git tag -f v$VERSION
 4 | #build
 5 | rm -rf .eggs build *.egg-info dist ; nocorrect python setup.py bdist_wheel sdist
 6 | #goto test dir
 7 | cd ~/Work/Test/Pipi
 8 | #uninstall old and install local new
 9 | pip uninstall -y MONSDA && pip install ~/MONSDA/dist/MONSDA-$VERSION\-py3-none-any.whl
10 | #run snakemake
11 | clear && MONSDA -j 4 --configfile multitool.json --directory ${PWD} --conda-frontend mamba
12 | #run nextflow
13 | clear && MONSDA --nextflow -j 4 -resume --configfile multitool.json --directory ${PWD}
14 | 


--------------------------------------------------------------------------------
/scripts/Shells/Sam2Bam.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | file=$1 ### Name of the sam file you want to convert
 4 | ref=$2  ### Path to reference genome fasta file
 5 | bins=$3
 6 | out=$4
 7 | threads=$5
 8 | 
 9 | echo "running samtools view -bT $ref -o $out --threads $threads $file"
10 | 
11 | if [ ! -f $out ];then
12 |         echo "$out not found, creating new"
13 |         zcat $file|samtools view -bT $ref -o $out --threads $threads -
14 | fi
15 | if [ ! -f $out".bai" ];then
16 |        echo "$out.bai not found, creating new"
17 |         samtools index $out
18 | fi
19 | 
20 | 


--------------------------------------------------------------------------------
/scripts/Shells/printEnds.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | in=$1
 4 | out=$2
 5 | 
 6 | if [ ! -f $out ]; then
 7 | 	echo -ne "SAMPLE\tCCACCA\tCCACC\tCCAC\tCCA\tCC\tCA\tC\tA" > $out
 8 | fi
 9 | 
10 | fn=${in#*/}
11 | 
12 | echo -ne "\n$fn\t" >> $out
13 | #perl -se '{$o = (split("\/",$f))[-1];print "\n".$o."\t"}' -- -f=$in >> $out
14 | 
15 | for a in CCACCA CCACC CCAC CCA CC CA C A;do
16 | 	zgrep -w $a $in | perl -sae 'BEGIN{$c=0}{if($F[1] eq "seq"){$c+=$F[3]}}END{if($e ne "A"){print $c."\t"}else{print $c}}' -- -e=$a >> $out
17 | done
18 | 
19 | #awk -v e="$a" 'BEGIN{FS="\t";OFS="";c=0}{c+=$3}END{if(e != "A"){print c,"\t"}else{print c}}' >> $out
20 | 


--------------------------------------------------------------------------------
/workflows/collect.nf:
--------------------------------------------------------------------------------
 1 | process collect_stuff{
 2 |     cpus THREADS
 3 | 	cache 'lenient'
 4 |     //validExitStatus 0,1
 5 | 
 6 |     publishDir "${workflow.workDir}/../" , mode: 'link',
 7 |     saveAs: {filename ->        
 8 |         "LOGS/COLLECT/${COMBO}/${CONDITION}/${file(filename).getName()}"
 9 |     }
10 |     input:
11 |     path check
12 | 
13 | 
14 |     output:
15 |     path "collect.txt", emit: done
16 | 
17 |     script:
18 |     """
19 |     echo "$check successful!" > collect.txt
20 |     """
21 | }
22 | 
23 | workflow COLLECT{
24 |     take:
25 |     whatever
26 | 
27 |     main:
28 |     
29 |     collect_stuff(whatever.collect())
30 | 
31 |     
32 | }
33 | 


--------------------------------------------------------------------------------
/scripts/Analysis/CountFastqEnds.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | use PerlIO::gzip;
 5 | 
 6 | my $in = shift; 	#fastq
 7 | 
 8 | open (SEQ, "<:gzip(autopop)", $in) or die "$!";
 9 | 
10 | my $tails = ();
11 | my $all = 0;
12 | my $i = 1;
13 | 
14 | while(<SEQ>){
15 | 
16 | 	chomp(my $line = $_);
17 | 
18 | 	if ($i == 2){
19 | 		my $tail = substr $line,-3;
20 | 		$tails->{$tail}++;
21 | 		$i++;
22 | 	}
23 | 	elsif($i==4){
24 | 		$i=1;
25 | 		next;
26 | 	}
27 | 	else{
28 | 		$i++;
29 | 		next;
30 | 	}
31 | }
32 | 
33 | foreach my $tail (sort{$a eq $b} keys %{$tails}){
34 | 	print join("\t", $tail, $tails->{$tail})."\n";
35 | }
36 | 


--------------------------------------------------------------------------------
/envs/summary.yaml:
--------------------------------------------------------------------------------
 1 | name: summary
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 |   - r
 7 | dependencies:
 8 |   - r-base =4.2.2
 9 |   - r-base64enc =0.1_3
10 |   - r-bookdown =0.30
11 |   - r-digest =0.6.30
12 |   - r-evaluate =0.18
13 |   - r-glue =1.6.2
14 |   - r-highr =0.9
15 |   - r-htmltools =0.5.3
16 |   - r-jquerylib =0.1.4
17 |   - r-jsonlite =1.8.3
18 |   - r-knitr =1.40
19 |   - r-magrittr =2.0.3
20 |   - r-markdown =1.4
21 |   - r-mime =0.12
22 |   - r-rlang =1.0.6
23 |   - r-rmarkdown =2.18
24 |   - r-rmdformats =1.0.4
25 |   - r-stringi =1.7.8
26 |   - r-stringr =1.4.1
27 |   - r-tinytex =0.42
28 |   - r-xfun =0.35
29 |   - r-yaml =2.3.6
30 |   - readline =8.2
31 |   - sed =4.8


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/source/integrate.rst:
--------------------------------------------------------------------------------
1 | Integrating new tools/workflows
2 | ================================
3 | 
4 | In case new tools need to be integrated, please refer to similar tools already implemented or contact us in case nothing similar is available yet. Workflows should always be split in subworkflows that follow the same principle as existing subworkflows, ideally making them reusable for other workflows in the future.
5 | 
6 | Tools should be easy to integrate, all that is needed is to write a tool and if applicable version specific **.smk** or **.nf** file describing input/output and command line calls as well as a fitting **conda environment yaml** file. Once these are available, they should already be usable and configurable via the **config.json** in the specific workflow section.
7 | 


--------------------------------------------------------------------------------
/scripts/Shells/NonUniqueBam_woPicard.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | in=$1
 4 | out=$2
 5 | threads=$3
 6 | bwa="${4:-}"
 7 | 
 8 | samtools view -H ${in} | grep '@HD' > nhead
 9 | samtools view -H ${in} | grep '@SQ' | sort -t$'\t' -k1,1 -k2,2V >> nhead
10 | samtools view -H ${in} | grep '@RG' >> nhead
11 | samtools view -H ${in} | grep '@PG' >> nhead
12 | 
13 | 
14 | if [[ "${in}" == *bwa* ]] || [[ -n "${bwa}" ]]
15 | then
16 |     cat nhead <(samtools view --threads ${threads} -F 4 ${in} | grep -v "^@"| grep -e $'\t''XA:Z:' -e $'\t''SA:Z:') | samtools view --threads ${threads} -hb - > ${out}
17 | else
18 |     cat nhead <(samtools view --threads ${threads} -F 4 ${in} | grep -v "^@" | grep -v -w -P "NH:i:0|NH:i:1|tp:A:P") | samtools view --threads ${threads} -hb - > ${out}
19 | fi
20 | 
21 | rm -f nhead
22 | 


--------------------------------------------------------------------------------
/envs/monsda.yaml:
--------------------------------------------------------------------------------
 1 | name: monsda
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 | dependencies:
 7 |   - biopython=1.83
 8 |   - grep >=3.4
 9 |   - isort=5.13.2
10 |   - monsda=1.2.8
11 |   - natsort=8.4.0
12 |   - nextflow=24.04.4
13 |   - numpy=1.26.4
14 |   - pandas=2.2.1
15 |   - perl=5.34.0
16 |   - pip>=24.0
17 |   - python=3.12.2
18 |   - pyyaml=6.0.1
19 |   - scipy=1.12.0
20 |   - snakemake=8.16.0
21 |   - snakemake-executor-plugin-slurm=0.8.0
22 |   - snakemake-executor-plugin-cluster-generic=1.0.9
23 |   - snakemake-interface-common=1.17.2
24 |   - snakemake-interface-executor-plugins=9.2.0
25 |   - snakemake-interface-report-plugins=1.0.0
26 |   - snakemake-interface-storage-plugins=3.2.3
27 |   - snakemake-storage-plugin-s3=0.2.11
28 |   - snakemake-storage-plugin-ftp=0.1.2
29 |   - snakemake-storage-plugin-http=0.2.3


--------------------------------------------------------------------------------
/docs/source/contribute.rst:
--------------------------------------------------------------------------------
 1 | Contribute
 2 | ==========
 3 | 
 4 | If you like this project, are missing features, want to contribute or
 5 | file bugs please open a PR, leave an issue or contact us directly.
 6 | 
 7 | To contribute new tools feel free to adopt existing ones, there should
 8 | be a number of examples available that cover implementation details
 9 | for almost all sorts of standard tools. If you need to add new
10 | python/groovy functions for processing of options or parameters add
11 | them to the corresponding file in the **lib** directory.  New environments
12 | go into the **envs** directory, new subworkflows into the **workflows**
13 | directory. Do not forget to also extend the **template.json** and add some
14 | documentation before opening a pull request.
15 | 
16 | PRs always welcome.
17 | 
18 | 
19 | ##Contributors
20 | Joerg Fallmann @jfallmann
21 | Robin Goldmann @meisterL
22 | 


--------------------------------------------------------------------------------
/scripts/Universal/sam2fastq.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | use autodie;
 5 | 
 6 | my $in1 = shift; 	#bam file
 7 | my $in2 = shift;	#fastq
 8 | 
 9 | open IN1, "samtools view -h $in1 |";
10 | #open IN2, "< $in2" or die "can t open $in2\n";
11 | open(IN2, "gunzip -c $in2 |") or die "gunzip $in2: $!";
12 | 
13 | 
14 | my %hash =();
15 | while(<IN1>){
16 | 	next if($_ =~ /^@/);
17 | 	chomp $_;
18 | 
19 | 	my @line = split(/\t/,$_);
20 | 	my $ID = "@".$line[0];
21 | 	$hash{$ID} = 0;
22 | }
23 | 
24 | 
25 | my @entry = ();
26 | 
27 | while(<IN2>){
28 | 	 chomp;
29 |          push @entry, $_;
30 | 
31 |          if (scalar(@entry) == 4) {
32 | 
33 |                 my ($id, $seq, $plusLine, $qual) = @entry;
34 |                 @entry = ();
35 | 
36 | 		if(exists $hash{$id}){
37 | 			print join ("\n", $id, $seq, $plusLine, $qual)."\n";
38 | 		}
39 | 	}
40 | }
41 | 


--------------------------------------------------------------------------------
/workflows/summary.smk:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | logid = 'summary.smk '
 3 | 
 4 | outdir = "REPORTS/SUMMARY"
 5 | 
 6 | rule themall:
 7 |     input:  summary_all = expand("{outdir}/SUMMARY.html", outdir=outdir)
 8 |             # summarys = expand("{dir}/SUMMARY.pdf", dir=get_summary_dirs(config))
 9 | 
10 | rule make_rmd:
11 |     input:  os.path.join(outdir,'summary.Rmd')
12 |     output: rules.themall.input.summary_all
13 |             # rules.themall.input.summarys
14 |     log:    expand("LOGS/{outdir}/make_rmd.log", outdir=outdir)
15 |     conda:  "summary.yaml"
16 |     params: outdir = outdir,
17 |             currentpath = os.path.join(os.path.dirname(os.path.realpath(os.path.abspath(inspect.getfile( inspect.currentframe() )) )),"..")
18 |     shell:  "Rscript --vanilla -e \"rmarkdown::render('{input}',params=list(root='{params.currentpath}/'),output_file='{params.currentpath}/{params.outdir}/SUMMARY.html', quiet=TRUE)\" 2> {log}"
19 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Set the OS, Python version and other tools you might need
 9 | 
10 | build:
11 |   os: ubuntu-22.04
12 |   tools:
13 |     python: "3.12"
14 |     # You can also specify other tool versions:
15 |     # nodejs: "20"
16 |     # rust: "1.70"
17 |     # golang: "1.20"
18 | 
19 | # Build documentation in the docs/ directory with Sphinx
20 | sphinx:
21 |   configuration: docs/conf.py
22 |   
23 | # Build documentation with MkDocs
24 | #mkdocs:
25 | #  configuration: mkdocs.yml
26 | 
27 | # Optionally build your docs in additional formats such as PDF and ePub
28 | formats: [htmlzip, epub]
29 | 
30 | # Optionally set the version of Python and requirements required to build your docs
31 | python:
32 |   install:
33 |     - requirements: docs/requirements.txt
34 | 
35 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "python.testing.pytestArgs": [
 3 |         "tests"
 4 |     ],
 5 |     "python.testing.unittestEnabled": false,
 6 |     "python.testing.nosetestsEnabled": false,
 7 |     "python.testing.pytestEnabled": true,
 8 |     "esbonio.server.enabled": true,
 9 |     "esbonio.sphinx.confDir": "${workspaceFolder}/docs",
10 |     //"restructuredtext.linter.disabledLinters": [
11 |     //    "doc8"
12 |     //],
13 |     "restructuredtext.linter.run": "onSave",
14 |     "restructuredtext.linter.doc8.extraArgs": [
15 |         "--ignore D001"
16 |     ],
17 |     "cSpell.words": [
18 |         "COMPARABLES",
19 |         "DEDUP",
20 |         "MAXTHREADS",
21 |         "subdir",
22 |         "unstranded"
23 |     ],
24 |     "cSpell.enableFiletypes": [
25 |         "snakemake"
26 |     ],
27 |     "python.analysis.typeCheckingMode": "basic",
28 |     "[python]": {
29 |         "editor.defaultFormatter": "ms-python.black-formatter"
30 |     },
31 |     "python.formatting.provider": "none"
32 | }
33 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [aliases]
 2 | test=pytest
 3 | 
 4 | [isort]
 5 | multi_line_output=3
 6 | include_trailing_comma=True
 7 | force_grid_wrap=0
 8 | use_parentheses=True
 9 | line_length=110
10 | 
11 | [flake8]
12 | ignore = E203, E266, E501, W503
13 | max-line-length = 110
14 | max-complexity = 18
15 | select = B,C,E,F,W,T4
16 | 
17 | [tool:pytest]
18 | testpaths=test
19 | 
20 | [versioneer]
21 | VCS = git
22 | style = pep440
23 | versionfile_source = MONSDA/_version.py
24 | versionfile_build = MONSDA/_version.py
25 | tag_prefix = v
26 | parentdir_prefix = MONSDA-
27 | 
28 | [report]
29 | exclude_lines =
30 |     # Have to re-enable the standard pragma
31 |     pragma: no cover
32 | 
33 |     # Don't complain about missing debug-only code:
34 |     def __repr__
35 |     if self\.debug
36 | 
37 |     # Don't complain if tests don't hit defensive assertion code:
38 |     raise AssertionError
39 |     raise NotImplementedError
40 | 
41 |     # Don't complain if non-runnable code isn't run:
42 |     if 0:
43 |     if __name__ == .__main__.:
44 | 


--------------------------------------------------------------------------------
/workflows/fastqc_raw.nf:
--------------------------------------------------------------------------------
 1 | QCENV=get_always('QCENV')
 2 | QCBIN=get_always('QCBIN')
 3 | QCPARAMS = get_always('fastqc_params_QC') ?: ''
 4 | 
 5 | process qc_raw{
 6 |     conda "$QCENV"+".yaml"
 7 |     cpus THREADS
 8 | 	cache 'lenient'
 9 |     //validExitStatus 0,1
10 | 
11 |     publishDir "${workflow.workDir}/../" , mode: 'link',
12 |     saveAs: {filename ->
13 |         if (filename.indexOf("zip") > 0)          "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip"
14 |         else if (filename.indexOf("html") > 0)    "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html"
15 |         else null
16 |     }
17 | 
18 |     input:
19 |     path read
20 | 
21 |     output:
22 |     path "*.{zip,html}", emit: fastqc_results
23 | 
24 |     script:
25 |     """
26 |     fastqc --quiet -t ${task.cpus} $QCPARAMS --noextract -f fastq $read
27 |     """
28 | }
29 | 
30 | workflow QC_RAW{
31 |     take:
32 |     collection
33 | 
34 |     main:
35 |     
36 |     qc_raw(samples_ch)
37 | 
38 |     emit:
39 |     qc = qc_raw.out.fastqc_results
40 | }
41 | 


--------------------------------------------------------------------------------
/scripts/Shells/UniqueBam_woPicard.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | in=$1
 4 | out=$2
 5 | threads=$3
 6 | specialmappers="${4:-}"
 7 | 
 8 | samtools view -H ${in} | grep '@HD' > ${in}_head
 9 | samtools view -H ${in} | grep '@SQ' | sort -t$'\t' -k1,1 -k2,2V >> ${in}_head
10 | samtools view -H ${in} | grep '@RG' >> ${in}_head
11 | samtools view -H ${in} | grep '@PG' >> ${in}_head
12 | 
13 | 
14 | if [[ "$1" == *bwa* ]] || [[ "$specialmappers" == *bwa* ]]
15 | then
16 |     cat ${in}_head <(samtools view --threads ${threads} -F 4 ${in} | grep -v "^@"| grep -v -e $'\t''XA:Z:' -e $'\t''SA:Z:') | samtools view --threads ${threads} -hb - > ${out}
17 | elif [[ "$1" == *minimap* ]] || [[ "$specialmappers" == *minimap* ]]
18 | then
19 |     cat ${in}_head <(samtools view --threads ${threads} -F 4 ${in} | grep -v "^@" | perl -wlane 'print if $F[4] >=60') | samtools view --threads ${threads} -hb - > ${out}
20 | else
21 |     cat ${in}_head <(samtools view --threads ${threads} -F 4 ${in} | grep -v "^@" | grep -w -P "NH:i:1|tp:A:P") | samtools view --threads ${threads} -hb - > ${out}
22 | fi
23 | 
24 | rm -f ${in}_head
25 | 


--------------------------------------------------------------------------------
/workflows/multiqc.nf:
--------------------------------------------------------------------------------
 1 | QCENV=get_always('QCENV')
 2 | QCBIN=get_always('QCBIN')
 3 | QCPARAMS = get_always('fastqc_params_MULTI') ?: ''
 4 | 
 5 | process mqc{
 6 |     conda "$QCENV"+".yaml"
 7 |     cpus THREADS
 8 | 	cache 'lenient'
 9 |     //validExitStatus 0,1
10 | 
11 |     publishDir "${workflow.workDir}/../" , mode: 'link',
12 |     saveAs: {filename ->
13 |         if (filename.indexOf("zip") > 0)          "QC/Multi/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip"
14 |         else if (filename.indexOf("html") > 0)    "QC/Multi/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html"
15 |         else "QC/Multi/${COMBO}/${CONDITION}/${file(filename).getName()}"
16 |     }
17 | 
18 |     input:
19 |     path others
20 |     //path samples
21 | 
22 |     output:
23 |     path "*.zip", emit: mqc
24 |     path "*.html", emit: html
25 | 
26 |     script:
27 |     """
28 |     touch $others; export LC_ALL=en_US.utf8; export LC_ALL=C.UTF-8; multiqc -f --exclude picard --exclude gatk -k json -z -o \${PWD} .
29 |     """
30 | }
31 | 
32 | workflow MULTIQC{
33 |     take:
34 |     otherqcs
35 |     
36 |     main:
37 |     
38 |     mqc(otherqcs.collect())
39 | 
40 |     emit:
41 |     mqcres = mqc.out.mqc
42 | }
43 | 


--------------------------------------------------------------------------------
/workflows/picard_dedup.smk:
--------------------------------------------------------------------------------
 1 | DEDUPBIN, DEDUPENV = env_bin_from_config(config, 'DEDUP')
 2 | 
 3 | rule dedupbam:
 4 |     input:  bam = "MAPPED/{combo}/{file}_mapped_{type}.bam"
 5 |     output: bam = report("MAPPED/{combo}/{file}_mapped_{type}_dedup.bam", category="DEDUP"),
 6 |             bai = report("MAPPED/{combo}/{file}_mapped_{type}_dedup.bam.bai", category="DEDUP"),
 7 |             met = report("MAPPED/{combo}/{file}_mapped_{type}_dedup_metrics.txt", category="DEDUP"),
 8 |             td = temp(directory("TMP/UMIDD/{combo}/{file}_{type}"))
 9 |     log:    "LOGS/{combo}/{file}_{type}/dedupbam.log"
10 |     conda:  ""+DEDUPENV+".yaml"
11 |     threads: 1
12 |     priority: 0               # This should be done after all mapping is done
13 |     params: jpara = lambda wildcards: tool_params(wildcards.file, None, config, "DEDUP", DEDUPENV)['OPTIONS'].get('JAVA', ""),
14 |             dpara = lambda wildcards: tool_params(wildcards.file, None, config, "DEDUP", DEDUPENV)['OPTIONS'].get('DEDUP', ""),
15 |             dedup = DEDUPBIN
16 |     shell: "mkdir -p {output.td} && {params.dedup} {params.jpara} MarkDuplicates --REMOVE_DUPLICATES true --ASSUME_SORT_ORDER coordinate --TMP_DIR {output.td} --INPUT {input.bam} --OUTPUT {output.bam} --METRICS_FILE {output.met} {params.dpara} 2> {log} && samtools index {output.bam} 2>> {log}"


--------------------------------------------------------------------------------
/workflows/dorado.smk:
--------------------------------------------------------------------------------
 1 | CALLERBIN, CALLERENV = env_bin_from_config(config,'BASECALL')
 2 | 
 3 | wildcard_constraints:
 4 |     rawfile = '|'.join(SAMPLES)
 5 | 
 6 | rule themall:
 7 |     input: expand("FASTQ/{rawfile}.fastq.gz", rawfile = SAMPLES)
 8 | 
 9 | rule call_base:
10 |     input:  p5 = "RAW/{rawfile}.pod5"
11 |     output: fq = "FASTQ/{rawfile}.fastq.gz",
12 |             bam = temp("FASTQ/{rawfile}.bam")
13 |     log:    "LOGS/BASECALL/{rawfile}_dorado.log"
14 |     conda:  ""+CALLERENV+".yaml"
15 |     threads: MAXTHREAD
16 |     params: caller = CALLERBIN,
17 |             cpara = lambda wildcards, input: tool_params(SAMPLES[0], None, config, 'BASECALL', CALLERENV)['OPTIONS'].get('BASECALL', ""),
18 |             cmodel = lambda wildcards, input: tool_params(SAMPLES[0], None, config, 'BASECALL', CALLERENV)['OPTIONS'].get('MODEL', ""),
19 |             p5dir = lambda wildcards, input: os.path.dirname(input.p5),
20 |             p5file = lambda wildcards, input: os.path.basename(input.p5),
21 |             fqdir = lambda wildcards, output: os.path.dirname(output.fq)
22 |     shell: "{params.caller} download --directory {params.p5dir} --model {params.cmodel} &> {log} && {params.caller} basecaller {params.cpara} {params.p5dir}/{params.cmodel} {params.p5dir}/ 2>> {log} 1> {output.bam} && samtools view -h {output.bam}|samtools fastq -n - | pigz > {output.fq}"


--------------------------------------------------------------------------------
/docs/source/cluster.rst:
--------------------------------------------------------------------------------
 1 | .. _slurm:
 2 | 
 3 | ============
 4 | Run on Slurm
 5 | ============
 6 | 
 7 | Snakemake
 8 | =========
 9 | 
10 | You can either use the slurm profile adapted from
11 | Snakemake-Profiles_ that comes with this repository, or go
12 | through the process of manually creating one, either using the cookiecutter example in the
13 | **Snakemake-Profiles** repository or on your own. You can also adapt the example that comes with this repository and execute
14 | 
15 | .. _Snakemake-Profiles: https://github.com/Snakemake-Profiles/slurm
16 | 
17 | .. code-block::
18 | 
19 |     monsda -j ${cpus} --configfile ${config.json} --directory ${PWD} --profile ${path_to_slurm_profile}
20 | 
21 | 
22 | Further adaptions like grouping of jobs and advanced configs for rule
23 | based performance increase will be tackled in future releases of **MONSDA**.
24 | 
25 | Nextflow
26 | ========
27 | 
28 | Cluster config for Nextflow follows the description Nextflow-Executors_ and Nextflow-Profiles_. To use **SLURM** as executor you can adapt the profile that comes with this repository and simply append 
29 | 
30 | .. code-block::
31 |     
32 |     export NXF_EXECUTOR=slurm
33 |     
34 | to the call to **MONSDA**.
35 | 
36 | .. _Nextflow-Executors: https://www.Nextflow.io/docs/latest/executor.html
37 | .. _Nextflow-Profiles: https://www.Nextflow.io/docs/latest/config.html#config-profiles 


--------------------------------------------------------------------------------
/configs/tutorial_quick.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "WORKFLOWS": "FETCH,MAPPING",
 3 |     "BINS": "",
 4 |     "MAXTHREADS": "4",
 5 |     "VERSION": "1.2.8",
 6 |     "SETTINGS": {
 7 |         "SIMPLE": {
 8 |             "SAMPLES": [
 9 |                 "SRR16324019"
10 |             ],
11 |             "SEQUENCING": "paired",
12 |             "REFERENCE": "GENOMES/Ecoli/ecoli.fa.gz",
13 |             "ANNOTATION": {
14 |                 "GFF": "GENOMES/Ecoli/ecoli.gff.gz",
15 |                 "GTF": "GENOMES/Ecoli/ecoli.gtf.gz"
16 |             }
17 |         }
18 |     },
19 |     "FETCH": {
20 |         "TOOLS" :
21 |         {
22 |             "sra" : "fasterq-dump"
23 |         },
24 |         "SIMPLE": {
25 |             "sra": {
26 |                 "OPTIONS":
27 |                 {
28 |                     "PREFETCH": "${HOME}/.ncbi/user-settings.mkfg",
29 |                     "DOWNLOAD": ""
30 |                 }
31 |             }
32 |         }
33 |     },
34 |     "MAPPING": {
35 |         "TOOLS": {
36 |             "star": "STAR"
37 |         },
38 |         "SIMPLE": {
39 |             "star": {
40 |                 "OPTIONS": {
41 |                     "INDEX": "--genomeSAindexNbases 8",
42 |                     "MAP": "--outSAMprimaryFlag AllBestScore       --outFilterMultimapNmax 20",
43 |                     "EXTENSION": ""
44 |                 }
45 |             }
46 |         }
47 |     }
48 | }
49 | 


--------------------------------------------------------------------------------
/workflows/summary.nf:
--------------------------------------------------------------------------------
 1 | process make_rmd{
 2 |     conda "summary.yaml"
 3 |     cpus THREADS
 4 | 	cache 'lenient'
 5 |     //validExitStatus 0,1
 6 | 
 7 |     publishDir "${workflow.workDir}/../" , mode: 'link',
 8 |     saveAs: {filename ->
 9 |         if (filename == 'SUMMARY.html')         "REPORTS/SUMMARY/SUMMARY.html"                               
10 |         else if (filename.indexOf("log") > 0)        "LOGS/REPORTS/SUMMARY/make_rmd.log"
11 |     }
12 | 
13 |     input:
14 |     path figs
15 |     path tables
16 | 
17 |     output:
18 |     path "*.html", emit: report
19 |     path "log", emit: log
20 | 
21 |     script:
22 |     """
23 |     ln -f \"${projectDir}/../REPORTS/SUMMARY/summary.Rmd\" .;
24 |     touch log;
25 |     Rscript --vanilla -e \"rmarkdown::render('summary.Rmd', params=list(root='.'), output_file='SUMMARY.html', quiet=TRUE)\" 2> log
26 |     """
27 | }
28 | 
29 | 
30 | workflow SUMMARY{
31 |     take: collection
32 | 
33 |     main:
34 | 
35 |     png_ch =  Channel.fromPath("${projectDir}/../D{E,EU,AS,TU}/**/Figures/*.{png,pdf}")
36 |     tab_ch =  Channel.fromPath("${projectDir}/../D{E,EU,AS,TU}/**/Tables/*.tsv.gz")
37 |     //png_ch.subscribe {  println "PNG: $it"  }
38 |     //tab_ch.subscribe {  println "TABLE: $it"  }
39 | 
40 |     make_rmd(png_ch.collect(), tab_ch.collect())
41 |     
42 |     emit:
43 |     rmds = make_rmd.out.report
44 | }
45 | 
46 | workflow{
47 |     SUMMARY(dummy)
48 | }


--------------------------------------------------------------------------------
/workflows/premultiqc.nf:
--------------------------------------------------------------------------------
 1 | 
 2 | QCENV=get_always('QCENV')
 3 | QCBIN=get_always('QCBIN')
 4 | QCPARAMS = get_always('fastqc_params_MULTI') ?: ''
 5 | 
 6 | process collect_multi{
 7 |     input:
 8 |     path check
 9 |     
10 |     output:
11 |     path "collect.txt", emit: done
12 | 
13 |     script:
14 |     """
15 |     echo "$check Collection successful!" > collect.txt
16 |     """
17 | }
18 | 
19 | 
20 | process premultiqc{
21 |     conda "$QCENV"+".yaml"
22 |     cpus THREADS
23 | 	cache 'lenient'
24 |     //validExitStatus 0,1
25 | 
26 |     publishDir "${workflow.workDir}/../" , mode: 'link',
27 |     saveAs: {filename ->
28 |         if (filename.indexOf("zip") > 0)          "QC/Multi/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip"
29 |         else if (filename.indexOf("html") > 0)    "QC/Multi/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html"
30 |         else null
31 |     }
32 | 
33 |     input:
34 |     path samples
35 | 
36 |     output:
37 |     path "*.{zip,html}", emit: multiqc_results
38 | 
39 |     script:
40 |     """
41 |     export LC_ALL=en_US.utf8; export LC_ALL=C.UTF-8; multiqc -f --exclude picard --exclude gatk -k json -z -s 
42 |     """
43 | }
44 | 
45 | workflow PREMULTIQC{
46 |     take:
47 |     otherqcs
48 | 
49 |     main:
50 | 
51 |     //SAMPLE CHANNELS
52 |     multiqc(otherqcs.collect())
53 | 
54 |     emit:
55 |     mqcres = premultiqc.out.multiqc_results
56 | }
57 | 


--------------------------------------------------------------------------------
/docs/source/installation.rst:
--------------------------------------------------------------------------------
 1 | .. _install:
 2 | 
 3 | 
 4 | Installation
 5 | ============
 6 | 
 7 | Install MONSDA via **conda** or **pip**
 8 | -------------------------------------------
 9 | 
10 | To install via **conda/mamba** in an environment named 'monsda' simply run
11 | 
12 | .. code-block::
13 | 
14 |     conda create -n monsda -c bioconda -c conda-forge monsda
15 | 
16 | 
17 | To install via **pip** you first need to create the **MONSDA** environment as found in the **envs** directory of this repository (simply clone with git clone) like so:
18 | 
19 | .. code-block::
20 | 
21 |     conda env create -n monsda -f MONSDA/envs/monsda.yaml
22 | 
23 | 
24 | The **envs** directory holds all the environments needed to run the pipelines in the **workflows** directory, these will be installed automatically alongside **MONSDA**.
25 | 
26 | For that activate the **MONSDA** environment and run **pip**
27 | 
28 | .. code-block::
29 | 
30 |     conda activate monsda
31 |     pip install MONSDA
32 | 
33 | 
34 | Install from source
35 | -------------------
36 | 
37 | Simply clone this repository with 
38 | 
39 | .. code-block::
40 |     
41 |     git clone git@github.com:jfallmann/MONSDA.git
42 | 
43 | You can then install dependencies as described for **pip** installation and manually run **setup.py**.
44 | 
45 | Be aware that **MONSDA** is *version dependent*, so config files can only be run with the **specified** version of **MONSDA** in order to guarantee reproducibility by conserving dependencies and environments.


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "MONSDA"
 3 | description = "MONSDA, Modular Organizer of Nextflow and Snakemake driven hts Data Analysis"
 4 | readme = "README.md"
 5 | license = { file = "LICENSE" }
 6 | authors = [{name = "Joerg Fallmann", email = "fallmann.joerg@gmail.com"}]
 7 | maintainers = [{name = "Joerg Fallmann", email = "fallmann.joerg@gmail.com"}]
 8 | dynamic = ["scripts", "dependencies", "requires-python", "version"]
 9 | 
10 | 
11 | [project.urls]
12 | Homepage = "https://github.com/jfallmann/MONSDA"
13 | Documentation = "https://monsda.readthedocs.io/en/latest"
14 | Repository = "https://github.com/jfallmann/MONSDA"
15 | Issues = "https://github.com/jfallmann/MONSDA/issues"
16 | 
17 | [build-system]
18 | build-backend = "setuptools.build_meta"
19 | requires = [
20 |   "setuptools>=42",
21 |   'tomli; python_version >= "3.12.0"',
22 |   "biopython>=1.83",  
23 |   "snakemake>=8.16.0",
24 |   "black>=21.5b2",
25 |   "flake8>=3.8.3",
26 |   "isort>=5.13.2",
27 |   "sphinx>=4.1.0",
28 |   "versioneer>=0.20",
29 | ] 
30 | 
31 | [tool.versioneer]
32 | VCS = "git"
33 | style = "pep440"
34 | tag_prefix = "v"
35 | versionfile_build = "MONSDA/_version.py"
36 | versionfile_source = "MONSDA/_version.py"
37 | 
38 | [tool.codespell]
39 | # Ref: https://github.com/codespell-project/codespell#using-a-config-file
40 | skip = '.git,*.pdf,*.svg,versioneer.py,*.css,test_*'
41 | check-hidden = true
42 | ignore-regex = '^\s*"image/\S+": ".*|\b[Mm]anuel[. ][Hh]oltgrewe\b'
43 | ignore-words-list = 'testin'
44 | 
45 | 


--------------------------------------------------------------------------------
/workflows/simulatetrim.smk:
--------------------------------------------------------------------------------
 1 | if paired == 'paired' or paired == 'singlecell':
 2 |     rule simulate_trim:
 3 |         input:  r1 = lambda wildcards: "FASTQ/{rawfile}_R1.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_R1_dedup.fastq.gz",
 4 |                 r2 = lambda wildcards: "FASTQ/{rawfile}_R2.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_R2_dedup.fastq.gz"
 5 |         output: r1 = "TRIMMED_FASTQ/{combo}/{file}_R1_trimmed.fastq.gz",
 6 |                 r2 = "TRIMMED_FASTQ/{combo}/{file}_R2_trimmed.fastq.gz"
 7 |         threads: 1
 8 |         params: filetolink = lambda w, input: "{r}".format(r=os.path.abspath(input.r1)),
 9 |                 filetolink2 = lambda w, input: "{r}".format(r=os.path.abspath(input.r2))
10 |         shell:  "ln -s {params.filetolink} {output.r1} && ln -s {params.filetolink2} {output.r2}"
11 | 
12 | else:
13 |     rule simulate_trim:
14 |         input:  r1 = lambda wildcards: "FASTQ/{rawfile}.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_dedup.fastq.gz"
15 |         output: r1 = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fastq.gz"
16 |         threads: 1
17 |         params: filetolink = lambda w, input: "{r}".format(r=os.path.abspath(input.r1))
18 |         shell:  "ln -s {params.filetolink} {output.r1}"
19 | 


--------------------------------------------------------------------------------
/docs/source/runsmk.rst:
--------------------------------------------------------------------------------
 1 | ===================
 2 | Start a pipline run
 3 | ===================
 4 | 
 5 | 
 6 | Snakemake
 7 | ---------
 8 | 
 9 | Activate the **MONSDA** conda environment and run
10 | 
11 | 
12 | .. code-block::
13 |     
14 |     monsda --help
15 | 
16 | 
17 | to see the help and available options that will be passed through to **Snakemake**.
18 | 
19 | To start a job with **Snakemake**, which is the default, run
20 | 
21 | .. code-block::
22 | 
23 |     monsda -j NUMBER_OF_CORES -c YOUR_CONFIG.json --directory ${PWD}
24 | 
25 | 
26 | or add additional arguments for **Snakemake** as you see fit,
27 | **Snakemake** currently defaults to mamba as conda frontend. Please be aware that for that to work one should follow the recommendations at MAMBA_. However, using conda-libmamba-solver, the conda frontend can lead to an even better and more stable experience. We currently recommend to set a fixed directory to store environments (here conda_envs) and run the conda frontend.  
28 | 
29 | .. _MAMBA: https://mamba.readthedocs.io/en/latest/mamba-installation.html
30 | 
31 | .. code-block::
32 |     
33 |     --conda-frontend conda --conda-prefix path_to_conda_envs
34 | 
35 | 
36 | Nextflow
37 | --------
38 | 
39 | To run **MONSDA** in **Nextflow** mode just add '--nextflow'
40 | 
41 | .. code-block::
42 | 
43 |     monsda --nextflow -j NUMBER_OF_CORES -c YOUR_CONFIG.json --directory ${PWD}
44 | 
45 | 
46 | As with **Snakemake** additional arguments for **Nextflow** can be added and will be passed through.
47 | 


--------------------------------------------------------------------------------
/workflows/dorado.nf:
--------------------------------------------------------------------------------
 1 | CALLERENV = get_always('BASECALLENV')
 2 | CALLERBIN = get_always('BASECALLBIN')
 3 | 
 4 | CALLERPARAMS = get_always('dorado_params_CALLER') ?: ''
 5 | MODELPARAMS = get_always('dorado_params_MODEL') ?: ''
 6 | 
 7 | //CALLERS PROCESSES
 8 | 
 9 | process dorado{
10 |     conda "$CALLERENV"+".yaml"
11 |     cpus THREADS
12 | 	cache 'lenient'
13 |     //validExitStatus 0,1
14 | 
15 |     publishDir "${workflow.workDir}/../" , mode: 'link',
16 |     saveAs: {filename ->
17 |         if (filename.indexOf(".fastq.gz") > 0)      "FASTQ/${CONDITION}/${file(filename).getName()}"
18 |         else if (filename.indexOf(".log") > 0)        "LOGS/BASECALL/${CONDITION}/${file(filename).getName()}"
19 |     }
20 | 
21 |     input:
22 |     path f5
23 | 
24 |     output:
25 |     path "*.fastq.gz", emit: fastq
26 |     path "*.log", emit: log
27 | 
28 |     script:
29 |     fn = file(f5).getSimpleName()
30 |     oc = fn+".fastq.gz"
31 |     ol = fn+".log"
32 |     sortmem = '30%'
33 |     
34 |     """
35 |     $CALLERBIN download --model $MODELPARAMS &> $ol && $CALLERBIN basecaller $CALLERPARAMS $MODELPARAMS . 2>> $ol 1> tmp.bam && samtools view -h tmp.bam|samtools fastq -n - | pigz 1> $oc 2>> $ol && rm -rf tmp.bam
36 |     """
37 | }
38 | 
39 | workflow BASECALL{ 
40 |     take: collection
41 | 
42 |     main:
43 | 
44 |     P5SAMPLES = SAMPLES.collect{
45 |         element -> return "${workflow.workDir}/../RAW/"+element+"*.pod5"
46 |     }
47 | 
48 |     p5samples_ch = Channel.fromPath(P5SAMPLES.sort())  
49 |     
50 |     dorado(p5samples_ch.collate(1))
51 | 
52 |     emit:
53 |     fastq = dorado.out.fastq
54 |     logs = dorado.out.log
55 | }


--------------------------------------------------------------------------------
/workflows/premultiqc.smk:
--------------------------------------------------------------------------------
 1 | rule qcall:
 2 |     input: expand("QC/Multi/{condition}/multiqc_report.html", condition=str.join(os.sep, conditiononly(SAMPLES[0], config)))
 3 | 
 4 | if paired == 'paired':
 5 |     rule multiqc:
 6 |         input: expand("QC/{rawfile}_{read}_fastqc.zip", rawfile=list(SAMPLES), read=['R1','R2']),
 7 |         output: html = report("QC/Multi/{condition}/multiqc_report.html", category="QC"),
 8 |                 tmp = temp("QC/Multi/{condition}/tmp"),
 9 |                 lst = "QC/Multi/{condition}/qclist.txt"
10 |         log:    "LOGS/{condition}/multiqc.log"
11 |         conda:  "qc.yaml"
12 |         threads: 1
13 |         shell:  "OUT=$(dirname {output.html}); for i in {input};do echo $(dirname \"${{i}}\") >> {output.tmp};done; cat {output.tmp} |sort -u > {output.lst};export LC_ALL=en_US.utf8; export LC_ALL=C.UTF-8; multiqc -f --exclude picard --exclude gatk -k json -z -s -o $OUT -l {output.lst} 2> {log}"
14 | 
15 | else:
16 |     rule multiqc:
17 |         input: expand("QC/{rawfile}_fastqc.zip", rawfile=list(SAMPLES)),
18 |         output: html = report("QC/Multi/{condition}/multiqc_report.html", category="QC"),
19 |                 tmp = temp("QC/Multi/{condition}/tmp"),
20 |                 lst = "QC/Multi/{condition}/qclist.txt"
21 |         log:    "LOGS/{condition}/multiqc.log"
22 |         conda:  "qc.yaml"
23 |         threads: 1
24 |         shell:  "OUT=$(dirname {output.html}); for i in {input};do echo $(dirname \"${{i}}\") >> {output.tmp};done; cat {output.tmp} |sort -u > {output.lst};export LC_ALL=en_US.utf8; export LC_ALL=C.UTF-8; multiqc -f --exclude picard --exclude gatk -k json -z -s -o $OUT -l {output.lst} 2> {log}"
25 | 


--------------------------------------------------------------------------------
/scripts/Shells/MergeGeneExpression_Cufflinks.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/env bash
 2 | COND=$1
 3 | SOURCE=$2
 4 | PATTERN=$3
 5 | SAMPLES=$4
 6 | 
 7 | #cut -d$'\t' -f1 L12270_09_WT_d4_1_2_IN_R1_mapped_sorted.transcript_counts > tmp_counts
 8 | 
 9 | for i in *$COND\_*$SOURCE\_*$PATTERN\_unique/genes.fpkm_tracking;do
10 | 	echo $i
11 | 	tail -n+2 $i|cut -d$'\t' -f1,5,10  >>  GENECOUNTS_$COND\_$SOURCE\_tmp
12 | done
13 | 
14 | #env sa=$SAMPLES perl -lan -F'\t' -e 'BEGIN{%exp=()}; $exp{$F[0]}+=$F[1]; END{foreach $key(keys %exp){print $key,"\t",$exp{$key}/$ENV{sa}}}' COUNTS_$COND\_$SOURCE\_tmp |sort -k1,1d > COUNTS_$COND\_$SOURCE.transcript.fpkm
15 | 
16 | #rm -f COUNTS_$COND\_$SOURCE\_tmp
17 | 
18 | #join -1 1 -2 1 <(sort -k1,1d Gene_sum_bfx656.hg38.e81) <(sort -k1,1d COUNTS/Featurecounter/bfx656/Gene_sum_mapped_sorted_unique.counts) > Compare_D_L_uni
19 | #join -1 1 -2 1 <(sort -k1,1d Gene_sum_bfx656.hg38.e81) <(sort -k1,1d COUNTS/Featurecounter/bfx656/Gene_sum_mapped_sortedall) > Compare_D_L
20 | 
21 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh KO_d4 IP R1 3
22 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh KO_d0 IP R1 3
23 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh WT_d4 IP R1 3
24 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh WT_d0 IP R1 3
25 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh WT_d0 IN R1 3
26 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh WT_d4 IN R1 3
27 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh KO_d4 IN R1 3
28 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh KO_d0 IN R1 3
29 | 


--------------------------------------------------------------------------------
/scripts/lib/_lib.R:
--------------------------------------------------------------------------------
 1 | ## FUNCS
 2 | get_gene_name <- function(id, df) {
 3 |     if (!"gene_id" %in% colnames(df)) {
 4 |         message("WARNING: gene_id not found as colname, will be replaced by first match of colname with ID")
 5 |         colnames(df)[grepl("id$", names(df), ignore.case = TRUE)][1] <- "gene_id"
 6 |     }
 7 |     if (!"gene_name" %in% colnames(df)) {
 8 |         message("WARNING: gene_name not found as colname, will be replaced by gene column, please make sure the gtf file is in the correct format")
 9 |         df$gene_name <- df$gene
10 |     }
11 |     name_list <- df$gene_name[df["type"] == "gene" & df["gene_id"] == id]
12 |     if (length(unique(name_list)) == 1) {
13 |         return(name_list[1])
14 |     } else {
15 |         message(paste("WARNING: ambigous gene id: ", id))
16 |         return(paste(unique(name_list), sep = "|"))
17 |     }
18 | }
19 | 
20 | 
21 | get_exon_name <- function(id, df) {
22 |     if (!"gene_id" %in% colnames(df)) {
23 |         message("WARNING: gene_id not found as colname, will be replaced by first match of colname with ID")
24 |         colnames(df)[grepl("id$", names(df), ignore.case = TRUE)][1] <- "gene_id"
25 |     }
26 |     if (!"gene_name" %in% colnames(df)) {
27 |         message("WARNING: gene_name not found as colname, will be replaced by gene column, please make sure the gtf file is in the correct format")
28 |         df$gene_name <- df$gene
29 |     }
30 |     name_list <- df$gene_name[df["type"] == "exon" & df["gene_id"] == id]
31 |     if (length(unique(name_list)) == 1) {
32 |         return(name_list[1])
33 |     } else {
34 |         message(paste("WARNING: ambigous gene id: ", id))
35 |         return(paste(unique(name_list), sep = "|"))
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/envs/edger_DAS.yaml:
--------------------------------------------------------------------------------
 1 | name: edger_DAS
 2 | channels:
 3 |   - bioconda
 4 |   - conda-forge
 5 |   - defaults
 6 | dependencies:
 7 |   - bioconductor-biobase =2.46.0
 8 |   - bioconductor-biocgenerics =0.32.0
 9 |   - bioconductor-biocparallel =1.20.0
10 |   - bioconductor-biostrings =2.54.0
11 |   - bioconductor-delayedarray =0.12.0
12 |   - bioconductor-edger =3.28.0
13 |   - bioconductor-genomeinfodb =1.22.0
14 |   - bioconductor-genomeinfodbdata =1.2.2
15 |   - bioconductor-genomicalignments =1.22.0
16 |   - bioconductor-genomicranges =1.38.0
17 |   - bioconductor-iranges =2.20.0
18 |   - bioconductor-limma =3.42.0
19 |   - bioconductor-rhtslib =1.18.0
20 |   - bioconductor-rsamtools =2.2.0
21 |   - bioconductor-rtracklayer =1.46.0
22 |   - bioconductor-s4vectors =0.24.0
23 |   - bioconductor-summarizedexperiment =1.16.0
24 |   - bioconductor-xvector =0.26.0
25 |   - bioconductor-zlibbioc =1.32.0
26 |   - r-assertthat =0.2.1
27 |   - r-base =3.6.2
28 |   - r-bh =1.69.0_1
29 |   - r-bitops =1.0_6
30 |   - r-cli =1.1.0
31 |   - r-crayon =1.3.4
32 |   - r-dplyr =0.8.0.1
33 |   - r-fansi =0.4.0
34 |   - r-formatr =1.7
35 |   - r-futile.logger =1.4.3
36 |   - r-futile.options =1.0.1
37 |   - r-glue =1.3.1
38 |   - r-lambda.r =1.2.4
39 |   - r-lattice =0.20_38
40 |   - r-locfit =1.5_9.1
41 |   - r-magrittr =1.5
42 |   - r-matrix =1.2_18
43 |   - r-matrixstats =0.57.0
44 |   - r-pillar =1.3.1
45 |   - r-pkgconfig =2.0.2
46 |   - r-plogr =0.2.0
47 |   - r-purrr =0.3.2
48 |   - r-r6 =2.4.0
49 |   - r-rcpp =1.0.3
50 |   - r-rcurl =1.98_1.1
51 |   - r-rlang =0.3.4
52 |   - r-snow =0.4_3
53 |   - r-statmod =1.4.33
54 |   - r-tibble =2.1.1
55 |   - r-tidyselect =0.2.5
56 |   - r-utf8 =1.1.4
57 |   - r-xml =3.99_0.3
58 |   - readline =8.2
59 |   - sed =4.7


--------------------------------------------------------------------------------
/envs/edger_DEU.yaml:
--------------------------------------------------------------------------------
 1 | name: edger_DEU
 2 | channels:
 3 |   - bioconda
 4 |   - conda-forge
 5 |   - defaults
 6 | dependencies:
 7 |   - bioconductor-biobase =2.46.0
 8 |   - bioconductor-biocgenerics =0.32.0
 9 |   - bioconductor-biocparallel =1.20.0
10 |   - bioconductor-biostrings =2.54.0
11 |   - bioconductor-delayedarray =0.12.0
12 |   - bioconductor-edger =3.28.0
13 |   - bioconductor-genomeinfodb =1.22.0
14 |   - bioconductor-genomeinfodbdata =1.2.2
15 |   - bioconductor-genomicalignments =1.22.0
16 |   - bioconductor-genomicranges =1.38.0
17 |   - bioconductor-iranges =2.20.0
18 |   - bioconductor-limma =3.42.0
19 |   - bioconductor-rhtslib =1.18.0
20 |   - bioconductor-rsamtools =2.2.0
21 |   - bioconductor-rtracklayer =1.46.0
22 |   - bioconductor-s4vectors =0.24.0
23 |   - bioconductor-summarizedexperiment =1.16.0
24 |   - bioconductor-xvector =0.26.0
25 |   - bioconductor-zlibbioc =1.32.0
26 |   - r-assertthat =0.2.1
27 |   - r-base =3.6.2
28 |   - r-bh =1.69.0_1
29 |   - r-bitops =1.0_6
30 |   - r-cli =1.1.0
31 |   - r-crayon =1.3.4
32 |   - r-dplyr =0.8.0.1
33 |   - r-fansi =0.4.0
34 |   - r-formatr =1.7
35 |   - r-futile.logger =1.4.3
36 |   - r-futile.options =1.0.1
37 |   - r-glue =1.3.1
38 |   - r-lambda.r =1.2.4
39 |   - r-lattice =0.20_38
40 |   - r-locfit =1.5_9.1
41 |   - r-magrittr =1.5
42 |   - r-matrix =1.2_18
43 |   - r-matrixstats =0.57.0
44 |   - r-pillar =1.3.1
45 |   - r-pkgconfig =2.0.2
46 |   - r-plogr =0.2.0
47 |   - r-purrr =0.3.2
48 |   - r-r6 =2.4.0
49 |   - r-rcpp =1.0.3
50 |   - r-rcurl =1.98_1.1
51 |   - r-rlang =0.3.4
52 |   - r-snow =0.4_3
53 |   - r-statmod =1.4.33
54 |   - r-tibble =2.1.1
55 |   - r-tidyselect =0.2.5
56 |   - r-utf8 =1.1.4
57 |   - r-xml =3.99_0.3
58 |   - readline =8.2
59 |   - sed =4.7


--------------------------------------------------------------------------------
/workflows/guppy.smk:
--------------------------------------------------------------------------------
 1 | CALLERBIN, CALLERENV = env_bin_from_config(config,'BASECALL')
 2 | 
 3 | wildcard_constraints:
 4 |     rawfile = '|'.join(SAMPLES)
 5 | 
 6 | rule themall:
 7 |     input: expand("FASTQ/{rawfile}.fastq.gz", rawfile = SAMPLES)
 8 | 
 9 | rule call_base:
10 |     input:  f5 = "RAW/{rawfile}.fast5"
11 |     output: fq = "FASTQ/{rawfile}.fastq.gz",
12 |             summary = "FASTQ/{rawfile}_summary.txt",
13 |             telemetry = "FASTQ/{rawfile}_telemetry.js"
14 |     log:    "LOGS/BASECALL/{rawfile}_guppy.log"
15 |     conda:  ""+CALLERENV+".yaml"
16 |     threads: MAXTHREAD
17 |     params: caller = CALLERBIN,
18 |             cpara = lambda wildcards, input: tool_params(SAMPLES[0], None, config, 'BASECALL', CALLERENV)['OPTIONS'].get('BASECALL', ""),
19 |             cmodel = lambda wildcards, input: tool_params(SAMPLES[0], None, config, 'BASECALL', CALLERENV)['OPTIONS'].get('MODEL', ""),
20 |             f5dir = lambda wildcards, input: os.path.dirname(input.f5),
21 |             f5file = lambda wildcards, input: os.path.basename(input.f5),
22 |             fqdir = lambda wildcards, output: os.path.dirname(output.fq)
23 |     shell: " echo \"{params.f5file}\" > {params.f5dir}/f5list && {params.caller} {params.cpara} -c {params.cmodel} --compress_fastq -i {params.f5dir} --input_file_list {params.f5dir}/f5list -s {params.f5dir}/BASECALL 2> {log} && cat {params.f5dir}/BASECALL/pass/fastq_runid_*.fastq.gz > {output.fq} && rm -f {params.f5dir}/BASECALL/pass/fastq_runid_*.fastq.gz && cat {params.f5dir}/BASECALL/*.log >> {log} && rm -f {params.f5dir}/BASECALL/*.log && mv -f {params.f5dir}/BASECALL/sequencing_summary.txt {output.summary} &&  mv -f {params.f5dir}/BASECALL/sequencing_telemetry.js {output.telemetry} && rm -f {params.f5dir}/f5list"


--------------------------------------------------------------------------------
/scripts/Shells/MergeExpression_RNAcounter.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/env bash
 2 | COND=$1
 3 | SOURCE=$2
 4 | PATTERN=$3
 5 | SAMPLES=$4
 6 | 
 7 | cut -d$'\t' -f1 L12270_09_WT_d4_1_2_IN_R1_mapped_sorted.transcript_counts > tmp_counts
 8 | 
 9 | for i in *$COND\_*$SOURCE\_*$PATTERN\.transcript_counts;do
10 | 	echo $i
11 | 	join -1 1 -2 1 <(sort -k1,1d tmp_counts) <(sort -k1,1d $i|cut -d$'\t' -f1,2,3) > COUNTS_$COND\_$SOURCE && cp -f COUNTS_$COND\_$SOURCE tmp_counts
12 | done
13 | rm -f tmp_counts
14 | 
15 | awk 'BEGIN{FS=" ";OFS=FS}{s=1;t=0;for(i=2;i<=NF;i+=2){if($i >=1){t+=$i;s++}} print $1,t/s;t=0;s=1}' COUNTS_$COND\_$SOURCE |sort -k1,1d > $SOURCE\_$COND\_$PATTERN\.transcript.counts
16 | 
17 | #join -1 1 -2 1 <(sort -k1,1d Gene_sum_bfx656.hg38.e81) <(sort -k1,1d COUNTS/Featurecounter/bfx656/Gene_sum_mapped_sorted_unique.counts) > Compare_D_L_uni
18 | #join -1 1 -2 1 <(sort -k1,1d Gene_sum_bfx656.hg38.e81) <(sort -k1,1d COUNTS/Featurecounter/bfx656/Gene_sum_mapped_sortedall) > Compare_D_L
19 | 
20 | #bash ../../../Workflows/scripts/Shells/MergeExpression_RNAcounter.sh KO_d4 IP mapped_sorted 3
21 | #bash ../../../Workflows/scripts/Shells/MergeExpression_RNAcounter.sh KO_d0 IP mapped_sorted 3
22 | #bash ../../../Workflows/scripts/Shells/MergeExpression_RNAcounter.sh WT_d4 IP mapped_sorted 3
23 | #bash ../../../Workflows/scripts/Shells/MergeExpression_RNAcounter.sh WT_d0 IP mapped_sorted 3
24 | #bash ../../../Workflows/scripts/Shells/MergeExpression_RNAcounter.sh WT_d0 IN mapped_sorted 3
25 | #bash ../../../Workflows/scripts/Shells/MergeExpression_RNAcounter.sh WT_d4 IN mapped_sorted 3
26 | #bash ../../../Workflows/scripts/Shells/MergeExpression_RNAcounter.sh KO_d4 IN mapped_sorted 3
27 | #bash ../../../Workflows/scripts/Shells/MergeExpression_RNAcounter.sh KO_d0 IN mapped_sorted 3
28 | 


--------------------------------------------------------------------------------
/envs/perl.yaml:
--------------------------------------------------------------------------------
 1 | name: perl
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 |   - r
 7 | dependencies:
 8 |   - grep >=2.14
 9 |   - perl >=5.26.2
10 |   - perl-app-cpanminus >=1.7044
11 |   - perl-autoloader >=5.74
12 |   - perl-bioperl >=1.6.924
13 |   - perl-capture-tiny >=0.48
14 |   - perl-carp >=1.38
15 |   - perl-constant >=1.33
16 |   - perl-data-dumper >=2.173
17 |   - perl-dbi >=1.642
18 |   - perl-dynaloader >=1.25
19 |   - perl-encode >=2.88
20 |   - perl-exporter >=5.72
21 |   - perl-extutils-cppguess >=0.12
22 |   - perl-extutils-makemaker >=7.36
23 |   - perl-file-find >=1.27
24 |   - perl-file-path >=2.16
25 |   - perl-file-slurp >=9999.27
26 |   - perl-file-spec >=3.48_01
27 |   - perl-file-temp >=0.2304
28 |   - perl-findbin-real >=1.05
29 |   - perl-getopt-long >=2.50
30 |   - perl-ipc-cmd >=1.02
31 |   - perl-list-util >=1.38
32 |   - perl-locale-maketext-simple >=0.21
33 |   - perl-math-cdf >=0.1
34 |   - perl-math-round >=0.07
35 |   - perl-module-corelist >=5.20190524
36 |   - perl-module-load >=0.32
37 |   - perl-module-load-conditional >=0.68
38 |   - perl-module-metadata >=1.000036
39 |   - perl-params-check >=0.38
40 |   - perl-parent >=0.236
41 |   - perl-path-class >=0.37
42 |   - perl-perl-ostype >=1.010
43 |   - perl-perlio-gzip >=0.20
44 |   - perl-pod-escapes >=1.07
45 |   - perl-pod-simple >=3.35
46 |   - perl-pod-usage >=1.69
47 |   - perl-posix >=1.38_03
48 |   - perl-set-intervaltree >=0.12
49 |   - perl-socket >=2.027
50 |   - perl-symbol >=1.07
51 |   - perl-test >=1.26
52 |   - perl-test-harness >=3.42
53 |   - perl-threaded >=5.26.0
54 |   - perl-tie-hash >=1.05
55 |   - perl-tie-hash-indexed >=0.05
56 |   - perl-time-hires >=1.9760
57 |   - perl-version >=0.9924
58 |   - perl-xsloader >=0.24
59 |   - perl-yaml >=1.29
60 |   - pigz >=2.6
61 | 
62 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | #temps
  2 | *~
  3 | \#*
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | #lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | db.sqlite3
 61 | 
 62 | # Flask stuff:
 63 | instance/
 64 | .webassets-cache
 65 | 
 66 | # Scrapy stuff:
 67 | .scrapy
 68 | 
 69 | # Sphinx documentation
 70 | docs/_build/
 71 | 
 72 | # PyBuilder
 73 | target/
 74 | 
 75 | # Jupyter Notebook
 76 | .ipynb_checkpoints
 77 | 
 78 | # pyenv
 79 | .python-version
 80 | 
 81 | # celery beat schedule file
 82 | celerybeat-schedule
 83 | 
 84 | # SageMath parsed files
 85 | *.sage.py
 86 | 
 87 | # Environments
 88 | .env
 89 | .venv
 90 | env/
 91 | venv/
 92 | ENV/
 93 | env.bak/
 94 | venv.bak/
 95 | 
 96 | # Spyder project settings
 97 | .spyderproject
 98 | .spyproject
 99 | 
100 | # Rope project settings
101 | .ropeproject
102 | 
103 | # mkdocs documentation
104 | /site
105 | 
106 | # mypy
107 | .mypy_cache/
108 | 


--------------------------------------------------------------------------------
/scripts/Shells/UniqueSam_woPicard.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | in=$1
 4 | out=$2
 5 | threads=$3
 6 | specialmappers="${4:-}"
 7 | 
 8 | if [[ "$1" == *.gz* ]]
 9 | then
10 |     samtools view -H <(zcat $in) | grep '@HD' | pigz -p $threads -f > $out
11 |     samtools view -H <(zcat $in) | grep '@SQ' | sort -t$'\t' -k1,1 -k2,2V | pigz -p $threads -f >> $out
12 |     samtools view -H <(zcat $in) | grep '@RG' | pigz -p $threads -f >> $out
13 |     samtools view -H <(zcat $in) | grep '@PG' | pigz -p $threads -f >> $out
14 | else
15 |     samtools view -H <(cat $in)|grep '@HD' | pigz -p $threads -f > $out
16 |     samtools view -H <(cat $in)|grep '@SQ' | sort -t$'\t' -k1,1 -k2,2V |pigz -p $threads -f >> $out
17 |     samtools view -H <(cat $in)|grep '@RG' | pigz -p $threads -f >> $out
18 |     samtools view -H <(cat $in)|grep '@PG' | pigz -p $threads -f >> $out
19 | fi
20 | 
21 | if [[ "$1" == *bwa* ]] || [[ "$specialmappers" == *bwa* ]]
22 | then
23 |     if [[ "$1" == *.gz* ]]
24 |     then
25 |         zcat $in | grep -v "^@"| grep -v -e $'\t''XA:Z:' -e $'\t''SA:Z:' | pigz -p $threads -f >> $out
26 |     else
27 |         cat $in | grep -v "^@"| grep -v -e $'\t''XA:Z:' -e $'\t''SA:Z:' | pigz -p $threads -f >> $out
28 |     fi
29 | elif [[ "$1" == *minimap* ]] || [[ "$specialmappers" == *minimap* ]]
30 | then
31 |     if [[ "$1" == *.gz* ]]
32 |     then
33 |         zcat $in | grep -v "^@"| perl -wlane 'print if $F[4] >=60'| pigz -p $threads -f >> $out
34 |     else
35 |         cat $in | grep -v "^@"| perl -wlane 'print if $F[4] >=60' | pigz -p $threads -f >> $out
36 |     fi
37 | else
38 |     if [[ "$1" == *.gz* ]]
39 |     then
40 |         zcat $in | grep -v "^@" | grep -w -P "NH:i:1|tp:A:P" | pigz -p $threads -f >> $out
41 |     else
42 |         cat $in | grep -v "^@" | grep -w -P "NH:i:1|tp:A:P" | pigz -p $threads -f >> $out
43 |     fi
44 | fi
45 | 


--------------------------------------------------------------------------------
/workflows/header.nf:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env nextflow
 2 | 
 3 | // includes
 4 | // include {} from "../lib/Collection.groovy" 
 5 | 
 6 | // NOTE
 7 | // ALWAYS COMMENT LINES WITH '//', DO NOT USE MULTI LINE COMMENTS AS THE PARSER WILL NOT IGNORE MIDDLE LINES AND THIS WILL CAUSE CHAOS
 8 | 
 9 | //Version Check
10 | nextflowVersion = '>=20.01.0.5264'
11 | nextflow.enable.dsl=2
12 | 
13 | //define unset Params
14 | def get_always(parameter){
15 |     if (!params.containsKey(parameter)){
16 |         params.put(parameter, null)
17 |     }
18 |     return params[parameter]
19 | }
20 | 
21 | //Params from CL
22 | REFERENCE = "${workflow.workDir}/../"+get_always('REFERENCE')
23 | REFDIR = "${workflow.workDir}/../"+get_always('REFDIR')
24 | BINS = get_always('BINS')
25 | THREADS = get_always('MAXTHREAD')
26 | PAIRED = get_always('PAIRED') ?: null
27 | RUNDEDUP = get_always('RUNDEDUP') ?: null
28 | PREDEDUP = get_always('PREDEDUP') ?: null
29 | STRANDED = get_always('STRANDED') ?: null
30 | IP = get_always('IP') ?: null
31 | CONDITION = get_always('CONDITION') ?: null
32 | COMBO = get_always('COMBO') ?: ''
33 | SCOMBO = get_always('SCOMBO') ?: ''
34 | SAMPLES = get_always('SAMPLES').split(',') ?: null
35 | LONGSAMPLES = get_always('LONGSAMPLES').split(',') ?: null
36 | SHORTSAMPLES = get_always('SHORTSAMPLES').split(',') ?: null
37 | SETS = get_always('SETS') ?: null
38 | //dummy
39 | dummy = Channel.fromPath("${workflow.workDir}/../LOGS/MONSDA.log")
40 | 
41 | //SAMPLE CHANNELS
42 | if (PAIRED == 'paired' || PAIRED == 'singlecell'){
43 |     RSAMPLES = SAMPLES.collect{
44 |         element -> return "${workflow.workDir}/../FASTQ/"+element+"_{R2,R1}.*fastq.gz"
45 |     }
46 | }else{
47 |     RSAMPLES=SAMPLES.collect{
48 |         element -> return "${workflow.workDir}/../FASTQ/"+element+".*fastq.gz"
49 |     }
50 | }
51 | 
52 | samples_ch = Channel.fromPath(RSAMPLES)


--------------------------------------------------------------------------------
/scripts/Shells/MergeExpression_Cufflinks.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/env bash
 2 | COND=$1
 3 | SOURCE=$2
 4 | PATTERN=$3
 5 | SAMPLES=$4
 6 | 
 7 | #cut -d$'\t' -f1 L12270_09_WT_d4_1_2_IN_R1_mapped_sorted.transcript_counts > tmp_counts
 8 | 
 9 | for i in *$COND\_*$SOURCE\_*$PATTERN\/transcripts.gtf;do
10 | 	echo $i
11 | 	cut -d$'\t' -f9 $i|perl -lan -F'; ' -e 'BEGIN{$trans;%exp=()};for(0..$#F){($line=$F[$_])=~s/\"//g;$line=~s/^\s//g;@tmp=split(/\s/,$line);if($tmp[0] eq "transcript_id"){$trans=$tmp[1]} if($tmp[0] eq "FPKM"){$exp{$trans}=$tmp[1]}}END{foreach $key(keys %exp){print $key,"\t",$exp{$key}}}' - >>  COUNTS_$COND\_$SOURCE\_tmp
12 | done
13 | 
14 | env sa=$SAMPLES perl -lan -F'\t' -e 'BEGIN{%exp=()}; $exp{$F[0]}+=$F[1]; END{foreach $key(keys %exp){print $key,"\t",$exp{$key}/$ENV{sa}}}' COUNTS_$COND\_$SOURCE\_tmp |sort -k1,1d > COUNTS_$COND\_$SOURCE.transcript.fpkm
15 | 
16 | #rm -f COUNTS_$COND\_$SOURCE\_tmp
17 | 
18 | #join -1 1 -2 1 <(sort -k1,1d Gene_sum_bfx656.hg38.e81) <(sort -k1,1d COUNTS/Featurecounter/bfx656/Gene_sum_mapped_sorted_unique.counts) > Compare_D_L_uni
19 | #join -1 1 -2 1 <(sort -k1,1d Gene_sum_bfx656.hg38.e81) <(sort -k1,1d COUNTS/Featurecounter/bfx656/Gene_sum_mapped_sortedall) > Compare_D_L
20 | 
21 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh KO_d4 IP R1 3
22 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh KO_d0 IP R1 3
23 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh WT_d4 IP R1 3
24 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh WT_d0 IP R1 3
25 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh WT_d0 IN R1 3
26 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh WT_d4 IN R1 3
27 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh KO_d4 IN R1 3
28 | #bash ../../../Workflows/scripts/Shells/MergeExpression_Cufflinks.sh KO_d0 IN R1 3
29 | 


--------------------------------------------------------------------------------
/workflows/ciri2.smk:
--------------------------------------------------------------------------------
 1 | CBIN, CENV = env_bin_from_config(config, 'CIRCS')
 2 | 
 3 | if not 'bwa' in combo or not 'bwa' in scombo:
 4 |         log.warning('Ciri2 needs BWA input, can only be used with BWA in mapping step')
 5 | 
 6 | if not rundedup:
 7 |     rule themall:
 8 |         input:  expand("CIRCS/{combo}/{file}_circs", combo=combo, file=samplecond(SAMPLES, config))
 9 | else:
10 |     rule themall:
11 |         input:  expand("CIRCS/{combo}/{file}_{type}", combo=combo, file=samplecond(SAMPLES, config), type=['sorted', 'sorted_dedup'])
12 | 
13 | rule FindCircs:
14 |     input:  sam = expand("MAPPED/{scombo}/{{file}}_mapped_sorted.sam.gz", scombo=scombo),
15 |             ref = REFERENCE,
16 |             anno = ANNOTATION
17 |     output: circs = "CIRCS/{combo}/{file}_circs",
18 |             tmp = temp(directory("CIRCS/{combo}/{file}_tmp")),
19 |             ts = temp("CIRCS/{combo}/{file}_tmp.sam"),
20 |             ta = temp("CIRCS/{combo}/{file}_tmp.gtf"),
21 |             tf = temp("CIRCS/{combo}/{file}_tmp.fa")
22 |     log:    "LOGS/CIRCS/{combo}/{file}_ciri2.log"
23 |     conda:  ""+CENV+".yaml"
24 |     threads: MAXTHREAD
25 |     params: cpara = lambda wildcards: tool_params(wildcards.file, None, config, "CIRCS", CENV)['OPTIONS'].get('CIRC', ""),
26 |             circ = CBIN
27 |     shell:  "set +o pipefail; export LC_ALL=C; if [[ -n \"$(zcat {input.sam} | head -c 1 | tr \'\\0\\n\' __)\" ]] ;then mkdir -p {output.tmp} && zcat {input.sam}|samtools sort -n -@ {threads} -u -O sam -T {output.tmp} > {output.ts} && zcat {input.anno} > {output.ta} && zcat {input.ref} > {output.tf} && perl {params.circ} -I {output.ts} -O {output.circs} -F {output.tf} -T {threads} -A {output.ta} -G {log} {params.cpara} &>> {log}; else gzip < /dev/null > {output.circs}; echo \"File {input.sam} empty\" >> {log}; fi; touch CIRIerror.log && cat CIRIerror.log >> {log} && echo '' > CIRIerror.log && touch {output.circs} && mkdir -p {output.tmp}"
28 | 


--------------------------------------------------------------------------------
/workflows/umitools_dedup.nf:
--------------------------------------------------------------------------------
 1 | DEDUPENV=get_always('DEDUPENV')
 2 | DEDUPBIN=get_always('DEDUPBIN')
 3 | 
 4 | DEDUPPARAMS = get_always('umitools_params_DEDUP') ?: ''
 5 | 
 6 | process dedup_bam{
 7 |     conda "$DEDUPENV"+".yaml"
 8 |     cpus THREADS
 9 | 	cache 'lenient'
10 |     //validExitStatus 0,1
11 | 
12 |     publishDir "${workflow.workDir}/../" , mode: 'link',
13 |     saveAs: {filename ->
14 |         if (filename.endsWith("_dedup.bam"))          "MAPPED/${COMBO}/${CONDITION}/${file(filename).getName()}"
15 |         else if (filename.indexOf("_dedup.bam.bai") > 0) "MAPPED/${COMBO}/${CONDITION}/${file(filename).getName()}"
16 |         else if (filename.indexOf("dedup.log") > 0)           "LOGS/${COMBO}/${CONDITION}/DEDUP/${file(filename).getName()}"
17 |         else null
18 |     }
19 | 
20 |     input:
21 |     path todedup
22 |     path bami
23 |         
24 |     output:
25 |     path "*_dedup.bam", emit: bam
26 |     path "*_dedup.bam.bai", emit: bai
27 |     path "*_dedup.log", emit: logs
28 | 
29 |     script:
30 |     bams = todedup[0]
31 |     bais = todedup[1]
32 |     outf = bams.getSimpleName()+"_dedup.bam"
33 |     outl = bams.getSimpleName()+"_dedup.log"
34 |     if (PAIRED == 'paired'){        
35 |         """
36 |             mkdir tmp && $DEDUPBIN dedup $DEDUPPARAMS --temp-dir tmp --log=$outl --paired --stdin=$bams --stdout=$outf && samtools index $outf >> $outl
37 |         """
38 |     }
39 |     else{
40 |         """
41 |             mkdir tmp && $DEDUPBIN dedup $DEDUPPARAMS --temp-dir tmp --log=$outl --stdin=$bams --stdout=$outf && samtools index $outf >> $outl
42 |         """
43 |     }
44 | }
45 | 
46 | workflow DEDUPBAM{
47 |     take: 
48 |     map
49 |     mapi
50 |     mapu
51 |     mapui
52 | 
53 |     main:
54 |     dedup_bam(map.concat(mapu), mapi.concat(mapui))
55 | 
56 |     emit:
57 |     dedup = dedup_bam.out.bam
58 |     dedupbai = dedup_bam.out.bai
59 |     deduplog = dedup_bam.out.logs
60 | }
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/workflows/simulatetrim.nf:
--------------------------------------------------------------------------------
 1 | T1SAMPLES = null
 2 | T2SAMPLES = null
 3 | 
 4 | process trim{
 5 |     //conda "$TOOLENV"+".yaml"
 6 |     cpus THREADS
 7 | 	cache 'lenient'
 8 |     //validExitStatus 0,1
 9 | 
10 |     publishDir "${workflow.workDir}/../" , mode: 'copy',
11 |     saveAs: {filename ->
12 |         if (filename.indexOf("_trimmed.fastq.gz") > 0)     "TRIMMED_FASTQ/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.fastq.gz"
13 |         else if (filename.indexOf("report.txt") >0)        "TRIMMED_FASTQ/${COMBO}/${CONDITION}/Trimming_report.txt"
14 |         else null
15 |     }
16 | 
17 |     input:
18 |     path reads
19 | 
20 |     output:
21 |     path "*trimmed.fastq.gz" , emit: trim
22 |     path "Trimming_report.txt", emit: rep
23 | 
24 |     script:
25 |     if (PAIRED == 'paired' || PAIRED == 'singlecell'){
26 |         r1 = reads[0]
27 |         r2 = reads[1]
28 |         a="Trimming_report.txt"
29 |         b=file(r1).getName().replace(".fastq.gz", "_trimmed.fastq.gz")
30 |         c=file(r2).getName().replace(".fastq.gz", "_trimmed.fastq.gz")
31 |         """
32 |         ln -sf $r1 $b ; ln -sf $r2 $c; echo "simulated $r1 $r2 trimming" > $a
33 |         """
34 |     }else{
35 |         a="Trimming_report.txt"
36 |         b=file(reads).getName().replace(".fastq.gz", "_trimmed.fastq.gz")
37 |         """
38 |         ln -sf $reads $b ; echo "simulated $reads trimming" > $a
39 |         """
40 |     }
41 | }
42 | 
43 | workflow TRIMMING{
44 |     take: 
45 |     collection
46 | 
47 |     main:
48 | 
49 |     if ( PREDEDUP == 'enabled' ){
50 |         trim(collection)
51 |     } else if ( collection.toList().contains('MONSDA.log') || collection.isEmpty()){
52 |         if (PAIRED == 'paired' || PAIRED == 'singlecell'){
53 |             trim(samples_ch.collate(2))
54 |         }
55 |         else{
56 |             trim(samples_ch.collate(1))
57 |         }
58 |     } else{
59 |         trim(collection)
60 |     }    
61 | 
62 |     emit:
63 |     trimmed = trim.out.trim
64 |     report  = trim.out.rep
65 | }


--------------------------------------------------------------------------------
/workflows/guppy.nf:
--------------------------------------------------------------------------------
 1 | CALLERENV = get_always('BASECALLENV')
 2 | CALLERBIN = get_always('BASECALLBIN')
 3 | 
 4 | CALLERPARAMS = get_always('guppy_params_BASECALL') ?: ''
 5 | MODELPARAMS = get_always('guppy_params_MODEL') ?: ''
 6 | 
 7 | //CALLERS PROCESSES
 8 | 
 9 | process guppy{
10 |     conda "$CALLERENV"+".yaml"
11 |     cpus THREADS
12 | 	cache 'lenient'
13 |     //validExitStatus 0,1
14 | 
15 |     publishDir "${workflow.workDir}/../" , mode: 'link',
16 |     saveAs: {filename ->
17 |         if (filename.indexOf(".fastq.gz") > 0)      "FASTQ/${CONDITION}/${file(filename).getName()}"
18 |         else if (filename.indexOf("_summary.txt") > 0)      "FASTQ/${CONDITION}/${file(filename).getName()}"
19 |         else if (filename.indexOf("_telemetry.js") > 0)      "FASTQ/${CONDITION}/${file(filename).getName()}"
20 |         else if (filename.indexOf(".log") > 0)        "LOGS/BASECALL/${CONDITION}/${file(filename).getName()}"
21 |     }
22 | 
23 |     input:
24 |     path f5
25 | 
26 |     output:
27 |     path ".fastq.gz", emit: fastq
28 |     path "*_telemetry.js", emit: telemetry
29 |     path "*_summary.txt", emit: summary
30 |     path "*.log", emit: log
31 | 
32 |     script:
33 |     fn = file(f5).getSimpleName()
34 |     oc = fn+".fastq.gz"
35 |     ol = fn+".log"
36 |     sortmem = '30%'
37 |     
38 |     """
39 |     mkdir -p TMP; echo \"${f5}\" > f5list && $CALLERBIN $CALLERPARAMS  -c $MODELPARAMS --compress_fastq -i . --input_file_list f5list -s TMP 2> $ol && cat TMP/pass/fastq_runid_*.fastq.gz > $oc && cat TMP/*.log >> $ol && mv -f TMP/sequencing_summary.txt . &&  mv -f TMP/sequencing_telemetry.js . && rm -rf TMP
40 |     """
41 | }
42 | 
43 | workflow BASECALL{ 
44 |     take: collection
45 | 
46 |     main:
47 | 
48 |     F5SAMPLES = SAMPLES.collect{
49 |         element -> return "${workflow.workDir}/../RAW/"+element+"*.fast5"
50 |     }
51 | 
52 |     f5samples_ch = Channel.fromPath(F5SAMPLES.sort())  
53 |     
54 |     guppy(f5samples_ch.collate(1))
55 | 
56 |     emit:
57 |     fastq = guppy.out.fastq
58 |     logs = guppy.out.log
59 | }


--------------------------------------------------------------------------------
/workflows/picard_dedup.nf:
--------------------------------------------------------------------------------
 1 | DEDUPENV=get_always('DEDUPENV')
 2 | DEDUPBIN=get_always('DEDUPBIN')
 3 | DEDUPPARAMS = get_always('picard_params_DEDUP') ?: ''
 4 | JAVAPARAMS = get_always('picard_params_JAVA') ?: ''
 5 | 
 6 | process dedup_bam{
 7 |     conda "$DEDUPENV"+".yaml"
 8 |     cpus THREADS
 9 | 	cache 'lenient'
10 |     //validExitStatus 0,1
11 | 
12 |     publishDir "${workflow.workDir}/../" , mode: 'link',
13 |     saveAs: {filename ->
14 |         if (filename.endsWith("_dedup.bam"))              "MAPPED/${COMBO}/${CONDITION}/${file(filename).getName()}"
15 |         else if (filename.indexOf("_dedup.bam.bai") > 0)  "MAPPED/${COMBO}/${CONDITION}/${file(filename).getName()}"
16 |         else if (filename.indexOf("dedup.log") > 0)       "LOGS/${COMBO}/${CONDITION}/DEDUP/${file(filename).getName()}"
17 |         else if (filename.indexOf("metrix.txt") > 0)      "MAPPED/${COMBO}/${CONDITION}/${file(filename).getName()}"
18 |         else null
19 |     }
20 | 
21 |     input:
22 |     path todedup
23 |     path bami
24 |         
25 |     output:
26 |     path "*_dedup.bam", emit: bam
27 |     path "*_dedup.bam.bai", emit: bai
28 |     path "*_dedup.log", emit: logs
29 |     path "*_dedup_metrix.txt", emit: metrics
30 | 
31 |     script:
32 |     bams = todedup[0]
33 |     bais = todedup[1]
34 |     outf = bams.getSimpleName()+"_dedup.bam"
35 |     outl = bams.getSimpleName()+"_dedup.log"
36 |     outm = bams.getSimpleName()+"_dedup_metrix.txt"
37 |     """
38 |     mkdir -p TMP && $DEDUPBIN $JAVAPARAMS MarkDuplicates --REMOVE_DUPLICATES true --ASSUME_SORT_ORDER coordinate --TMP_DIR TMP --INPUT $bams --OUTPUT $outf --METRICS_FILE $outm $DEDUPPARAMS &> $outl && samtools index $outf &>> $outl
39 |     """
40 | }
41 | 
42 | workflow DEDUPBAM{
43 |     take:
44 |     map
45 |     mapi
46 |     mapu
47 |     mapui
48 | 
49 |     main:   
50 |     //dedup_bam(collection)
51 |     dedup_bam(map.concat(mapu), mapi.concat(mapui))
52 | 
53 |     emit:
54 |     dedup = dedup_bam.out.bam
55 |     dedupbai = dedup_bam.out.bai
56 |     deduplog = dedup_bam.out.logs
57 | }
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/tests/test_Utils.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from MONSDA.Utils import NestedDefaultDict, rmempty, comment_remover, dict_inst, get_from_dict, yield_from_dict
 3 | 
 4 | class TestUtils(unittest.TestCase):
 5 | 
 6 |     def test_NestedDefaultDict(self):
 7 |         # Test initialization and basic functionality
 8 |         nested_dict = NestedDefaultDict(lambda: NestedDefaultDict(int))
 9 |         nested_dict['a']['b'] = 1
10 |         self.assertEqual(nested_dict['a']['b'], 1)
11 |         # Test default factory
12 |         self.assertEqual(nested_dict['c']['d'], 0)
13 | 
14 |     def test_rmempty(self):
15 |         # Assuming rmempty function removes empty directories
16 |         # This test might need to create temporary directories and files to fully test rmempty functionality
17 |         pass
18 | 
19 |     def test_comment_remover(self):
20 |         # Assuming comment_remover function removes comments from a list of strings
21 |         input_text = ["code line 1", "# this is a comment", "code line 2 # inline comment"]
22 |         expected_output = ["code line 1", "code line 2 "]
23 |         self.assertEqual(comment_remover(input_text), expected_output)
24 | 
25 |     def test_dict_inst(self):
26 |         # Assuming dict_inst function checks if an instance is a dictionary
27 |         self.assertTrue(dict_inst({'key': 'value'}))
28 |         self.assertFalse(dict_inst(['not', 'a', 'dict']))
29 | 
30 |     def test_get_from_dict(self):
31 |         # Assuming get_from_dict function retrieves a value from a nested dictionary using a list of keys
32 |         data_dict = {'a': {'b': {'c': 'd'}}}
33 |         map_list = ['a', 'b', 'c']
34 |         self.assertEqual(get_from_dict(data_dict, map_list), 'd')
35 | 
36 |     def test_yield_from_dict(self):
37 |         # Assuming yield_from_dict function yields items from a dictionary that match a given key
38 |         data_dict = {'a': 1, 'b': 2, 'c': {'a': 3, 'b': 4}}
39 |         key = 'a'
40 |         expected_output = [1, 3]
41 |         self.assertEqual(list(yield_from_dict(key, data_dict)), expected_output)
42 | 
43 | if __name__ == '__main__':
44 | unittest.main()


--------------------------------------------------------------------------------
/workflows/wip/pycoqc.smk:
--------------------------------------------------------------------------------
 1 | rule pycoqc_raw:
 2 |     input: "FASTQ/{rawfile}.fastq.gz"
 3 |     output: report("QC/{rawfile}_pycoqc.zip", category="QC")
 4 |     log:    "LOGS/{rawfile}/pycoqc_raw.log"
 5 |     conda:  "../envs/qc.yaml"
 6 |     threads: 20
 7 |     params: dir=lambda w: expand("QC/{source}",source=source_from_sample(w.file))
 8 |     shell: "for i in {input}; do OUT=$(dirname {output});pycoqc --quiet -o $OUT -t {threads} --noextract -f fastq {input} 2> {log};done"
 9 | 
10 | rule pycoqc_trimmed:
11 |     input:  "TRIMMED_FASTQ/{rawfile}_trimmed.fastq.gz",
12 |             "QC/{rawfile}_pycoqc.zip"
13 |     output: report("QC/{rawfile}_trimmed_pycoqc.zip", category="QC")
14 |     log:    "LOGS/{rawfile}/pycoqc_trimmed.log"
15 |     conda:  "../envs/qc.yaml"
16 |     threads: 20
17 |     params: dir=lambda w: expand("QC/{source}",source=source_from_sample(w.file))
18 |     shell: "for i in {input[0]}; do OUT=$(dirname {output});pycoqc --quiet -o $OUT -t {threads} --noextract -f fastq {input[0]} 2> {log};done"
19 | 
20 | rule pyqc_mapped:
21 |     input:  "SORTED_MAPPED/{file}_mapped_sorted.sam.gz"
22 |     output:  report("QC/{file}_mapped_sorted_pycoqc.zip", category="QC")
23 |     log: "LOGS/{file}/pycoqc_mapped.log"
24 |     params: dir=lambda w: expand("QC/{source}",source=source_from_sample(w.file))
25 |     conda: "../envs/qc.yaml"
26 |     threads: 20
27 |     shell: "for i in {input}; do OUT=$(dirname {output});pycoqc --quiet -o $OUT -t {threads} --noextract -f sam_mapped {input} 2> {log};done"
28 | 
29 | rule pyqc_uniquemapped:
30 |     input:  "UNIQUE_MAPPED/{file}_mapped_sorted_unique.bam",
31 |             "UNIQUE_MAPPED/{file}_mapped_sorted_unique.bam.bai"
32 |     output: report("QC/{file}_mapped_sorted_unique_pycoqc.zip", category="QC")
33 |     log: "LOGS/{file}/pycoqc_uniquemapped.log"
34 |     conda: "../envs/qc.yaml"
35 |     threads: 20
36 |     params:  dir=lambda w: expand("QC/{source}",source=source_from_sample(w.file))
37 | #    params: dir=expand("QC/{source}",source=SOURCE)
38 |     shell: "for i in {input[0]}; do OUT=$(dirname {output});pycoqc --quiet -o $OUT -t {threads} --noextract -f bam {input[0]} 2> {log};done"
39 | 


--------------------------------------------------------------------------------
/profile_snakemake/slurm-submit.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Snakemake SLURM submit script.
 4 | """
 5 | import warnings  # use warnings.warn() rather than print() to output info in this script
 6 | 
 7 | from snakemake.utils import read_job_properties
 8 | 
 9 | import slurm_utils
10 | 
11 | # cookiecutter arguments
12 | SBATCH_DEFAULTS = """ """
13 | CLUSTER_CONFIG = "cluster_config.yaml"
14 | ADVANCED_ARGUMENT_CONVERSION = {"yes": True, "no": False}[
15 |     "no"
16 | ]
17 | 
18 | RESOURCE_MAPPING = {
19 |     "time": ("time", "runtime", "walltime"),
20 |     "mem": ("mem", "mem_mb", "ram", "memory"),
21 |     "mem-per-cpu": ("mem-per-cpu", "mem_per_cpu", "mem_per_thread"),
22 |     "nodes": ("nodes", "nnodes"),
23 | }
24 | 
25 | # parse job
26 | jobscript = slurm_utils.parse_jobscript()
27 | job_properties = read_job_properties(jobscript)
28 | 
29 | sbatch_options = {}
30 | cluster_config = slurm_utils.load_cluster_config(CLUSTER_CONFIG)
31 | 
32 | # 1) sbatch default arguments
33 | sbatch_options.update(slurm_utils.parse_sbatch_defaults(SBATCH_DEFAULTS))
34 | 
35 | # 2) cluster_config defaults
36 | sbatch_options.update(cluster_config["__default__"])
37 | 
38 | # 3) Convert resources (no unit conversion!) and threads
39 | sbatch_options.update(
40 |     slurm_utils.convert_job_properties(job_properties, RESOURCE_MAPPING)
41 | )
42 | 
43 | # 4) cluster_config for particular rule
44 | sbatch_options.update(cluster_config.get(job_properties.get("rule"), {}))
45 | 
46 | # 5) cluster_config options
47 | sbatch_options.update(job_properties.get("cluster", {}))
48 | 
49 | # 6) Advanced conversion of parameters
50 | if ADVANCED_ARGUMENT_CONVERSION:
51 |     sbatch_options = slurm_utils.advanced_argument_conversion(sbatch_options)
52 | 
53 | # 7) Format pattern in snakemake style
54 | sbatch_options = slurm_utils.format_values(sbatch_options, job_properties)
55 | 
56 | # ensure sbatch output dirs exist
57 | for o in ("output", "error"):
58 |     slurm_utils.ensure_dirs_exist(sbatch_options[o]) if o in sbatch_options else None
59 | 
60 | # submit job and echo id back to Snakemake (must be the only stdout)
61 | print(slurm_utils.submit_job(jobscript, **sbatch_options))
62 | 


--------------------------------------------------------------------------------
/profile_snakemake/slurm-status.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import re
 3 | import subprocess as sp
 4 | import shlex
 5 | import sys
 6 | import time
 7 | import logging
 8 | 
 9 | logger = logging.getLogger("__name__")
10 | 
11 | STATUS_ATTEMPTS = 20
12 | 
13 | jobid = sys.argv[1]
14 | 
15 |   # noqa: E999,E225
16 | cluster = "--cluster=cluster"
17 |   # noqa: E225
18 | 
19 | for i in range(STATUS_ATTEMPTS):
20 |     try:
21 |         sacct_res = sp.check_output(shlex.split(f"sacct {cluster} -P -b -j {jobid} -n"))
22 |         res = {
23 |             x.split("|")[0]: x.split("|")[1]
24 |             for x in sacct_res.decode().strip().split("\n")
25 |         }
26 |         break
27 |     except sp.CalledProcessError as e:
28 |         logger.error("sacct process error")
29 |         logger.error(e)
30 |     except IndexError as e:
31 |         pass
32 |     # Try getting job with scontrol instead in case sacct is misconfigured
33 |     try:
34 |         sctrl_res = sp.check_output(
35 |             shlex.split(f"scontrol {cluster} -o show job {jobid}")
36 |         )
37 |         m = re.search(r"JobState=(\w+)", sctrl_res.decode())
38 |         res = {jobid: m.group(1)}
39 |         break
40 |     except sp.CalledProcessError as e:
41 |         logger.error("scontrol process error")
42 |         logger.error(e)
43 |         if i >= STATUS_ATTEMPTS - 1:
44 |             print("failed")
45 |             exit(0)
46 |         else:
47 |             time.sleep(1)
48 | 
49 | status = res[jobid]
50 | 
51 | if status == "BOOT_FAIL":
52 |     print("failed")
53 | elif status == "OUT_OF_MEMORY":
54 |     print("failed")
55 | elif status.startswith("CANCELLED"):
56 |     print("failed")
57 | elif status == "COMPLETED":
58 |     print("success")
59 | elif status == "DEADLINE":
60 |     print("failed")
61 | elif status == "FAILED":
62 |     print("failed")
63 | elif status == "NODE_FAIL":
64 |     print("failed")
65 | elif status == "PREEMPTED":
66 |     print("failed")
67 | elif status == "TIMEOUT":
68 |     print("failed")
69 | # Unclear whether SUSPENDED should be treated as running or failed
70 | elif status == "SUSPENDED":
71 |     print("failed")
72 | else:
73 |     print("running")
74 | 


--------------------------------------------------------------------------------
/workflows/trimgalore.nf:
--------------------------------------------------------------------------------
 1 | TRIMENV=get_always('TRIMMINGENV')
 2 | TRIMBIN=get_always('TRIMMINGBIN')
 3 | 
 4 | TRIMPARAMS = get_always('trimgalore_params_TRIM') ?: ''
 5 | //int cores = min(THREADS,4)
 6 | //TRIMMING PROCESSES
 7 | 
 8 | process trim{
 9 |     conda "$TRIMENV"+".yaml"
10 |     cpus 4//cores
11 |     //validExitStatus 0,1
12 | 
13 |     publishDir "${workflow.workDir}/../" , mode: 'link',
14 |     saveAs: {filename ->
15 |         if (filename.indexOf("_trimmed.fastq.gz") > 0)     "TRIMMED_FASTQ/${COMBO}/${CONDITION}/${file(filename).getSimpleName().replaceAll(/_val_\d{1}|_trimmed|_dedup/,"")}_trimmed.fastq.gz"
16 |         else if (filename.indexOf("report.txt") >0)        "TRIMMED_FASTQ/${COMBO}/${CONDITION}/${file(filename).getSimpleName().replaceAll(/.fastq.gz/,"")}_trimming_report.txt"
17 |         else if (filename.indexOf(".log") >0)              "LOGS/${COMBO}/${CONDITION}/TRIMMING/${file(filename).getSimpleName()}.log"
18 |         else null
19 |     }
20 | 
21 |     input:
22 |     path reads
23 | 
24 |     output:
25 |     path "*_trimmed.fastq.gz", emit: trim
26 |     path "*trimming_report.txt", emit: rep
27 | 
28 |     script:
29 |     if (PAIRED == 'paired'){
30 |         r1 = reads[0]
31 |         r2 = reads[1]
32 |         """
33 |         $TRIMBIN --cores ${task.cpus} --paired --gzip $TRIMPARAMS $r1 $r2 &> trim.log && rename 's/_dedup//g' *.fq.gz && rename 's/_R([1|2])_val_([1|2]).fq.gz/_R\\1_trimmed.fastq.gz/g' *.fq.gz && rename 's/.fastq.gz_trimming/_trimming/g' *.txt
34 |         """
35 |     }
36 |     else{
37 |         """
38 |         $TRIMBIN --cores ${task.cpus} --gzip $TRIMPARAMS $reads &> trim.log && rename 's/_dedup//g' *.fq.gz && rename 's/.fq.gz/.fastq.gz/g' *.fq.gz && rename 's/.fastq.gz_trimming/_trimming/g' *.txt
39 |         """
40 |     }
41 | }
42 | 
43 | workflow TRIMMING{
44 |     take: 
45 |     collection    
46 | 
47 |     main:
48 |     //check = collection.toList()
49 |     if ( PREDEDUP == 'enabled' ){  // && !check.contains('MONSDA.log')){
50 |         trim(collection)
51 |     }else {        
52 |         if (PAIRED == 'paired'){
53 |             trim(samples_ch.collate(2))
54 |         } else{
55 |             trim(samples_ch.collate(1))
56 |         }
57 |     }
58 | 
59 |     emit:
60 |     trimmed = trim.out.trim
61 |     report  = trim.out.rep
62 | }
63 | 


--------------------------------------------------------------------------------
/workflows/ciri2.nf:
--------------------------------------------------------------------------------
 1 | CIRCENV = get_always('CIRCSENV')
 2 | CIRCBIN = get_always('CIRCSBIN')
 3 | CIRCREF = get_always('CIRCSREF')
 4 | CIRCREFDIR = "${workflow.workDir}/../"+get_always('CIRCSREFDIR')
 5 | CIRCANNO = get_always('CIRCSANNO')
 6 | 
 7 | CIRCPARAMS = get_always('ciri2_params_CIRC') ?: ''
 8 | 
 9 | //CIRCS PROCESSES
10 | 
11 | process ciri2{
12 |     conda "$CIRCENV"+".yaml"
13 |     cpus THREADS
14 | 	cache 'lenient'
15 |     //validExitStatus 0,1
16 | 
17 |     publishDir "${workflow.workDir}/../" , mode: 'link',
18 |     saveAs: {filename ->
19 |         if (filename.indexOf("_circs") > 0)      "CIRCS/${SCOMBO}/${CONDITION}/${file(filename).getSimpleName()}"        
20 |         else if (filename.indexOf(".log") > 0)        "LOGS/${SCOMBO}/${CONDITION}/${file(filename).getSimpleName()}"
21 |     }
22 | 
23 |     input:
24 |     path fls
25 | 
26 |     output:
27 |     path "*_circs", emit: circs
28 |     path "log", emit: log
29 | 
30 |     script:
31 |     ref = fls[0]
32 |     anno = fls[1]
33 |     reads = fls[2]        
34 |     fn = file(reads).getSimpleName()
35 |     oc = fn+"_circs"
36 |     ol = fn+".log"
37 |     sortmem = '30%'
38 |     
39 |     """
40 |     set +o pipefail; export LC_ALL=C; if [[ -n \"\$(zcat ${reads} | head -c 1 | tr \'\\0\\n\' __)\" ]] ;then mkdir -p TMP && zcat ${reads}|samtools sort -n -@ ${task.cpus} -u -O sam -T TMP > ${fn}_tmp.sam && zcat ${anno} > ${fn}_tmp.gtf && zcat ${ref} > ${fn}_tmp.fa && perl $CIRCBIN -I ${fn}_tmp.sam -O ${fn}_circs -F ${fn}_tmp.fa -T ${task.cpus} -A ${fn}_tmp.gtf -G log $CIRCPARAMS &>> log; else gzip < /dev/null > ${fn}_circs; echo \"File ${reads} empty\" >> log; fi; touch CIRIerror.log && cat CIRIerror.log >> {log} && echo '' > CIRIerror.log && touch ${fn}_circs
41 |     """
42 | }
43 | 
44 | workflow CIRCS{ 
45 |     take: collection
46 | 
47 |     main:
48 | 
49 |     MAPPEDSAMPLES = LONGSAMPLES.collect{
50 |         element -> return "${workflow.workDir}/../MAPPED/${COMBO}/"+element+"*_mapped_sorted.sam.gz"
51 |     }
52 | 
53 |     mapsamples_ch = Channel.fromPath(MAPPEDSAMPLES.sort())  
54 |     annofile = Channel.fromPath(CIRCANNO)
55 |     genomefile = Channel.fromPath(CIRCREF)
56 | 
57 |     ciri2(genomefile.combine(annofile.combine(mapsamples_ch.collate(1))))
58 | 
59 |     emit:
60 |     circs = ciri2.out.circs
61 |     logs = ciri2.out.log
62 | }


--------------------------------------------------------------------------------
/workflows/sra.nf:
--------------------------------------------------------------------------------
 1 | FETCHENV=get_always('FETCHENV')
 2 | FETCHBIN=get_always('FETCHBIN')
 3 | 
 4 | FETCHPARAMS = get_always('sra_params_PREFETCH') ?: ''
 5 | DOWNPARAMS = get_always('sra_params_DOWNLOAD') ?: ''
 6 | 
 7 | 
 8 | //FETCH PROCESSES
 9 | 
10 | process prefetch_sra{
11 |     conda "$FETCHENV"+".yaml"
12 |     cpus THREADS
13 | 	cache 'lenient'
14 |     //validExitStatus 0,1
15 | 
16 |     publishDir "${workflow.workDir}/../" , mode: 'link',
17 |     saveAs: {filename ->
18 |         if (filename.indexOf(".log") >0)              "LOGS/$CONDITION/FETCH/Prefetch_SRA.log"
19 |         else null
20 |     }
21 | 
22 |     input:
23 |     val reads
24 | 
25 |     output:
26 |     path "*.sra", emit: sra
27 | 
28 |     script:
29 |         fn = reads+".sra"
30 |         """
31 |         export NCBI_SETTINGS=\"$FETCHPARAMS\"
32 |         prefetch $reads -o $fn &> prefetch.log
33 |         """
34 | }
35 | 
36 | process download_sra{
37 |     conda "$FETCHENV"+".yaml"
38 |     cpus THREADS
39 | 	cache 'lenient'
40 |     //validExitStatus 0,1
41 | 
42 |     publishDir "${workflow.workDir}/../" , mode: 'link',
43 |     saveAs: {filename ->
44 |         if (filename.indexOf(".fastq.gz") > 0)                "FASTQ/$CONDITION/${file(filename).getSimpleName()}.fastq.gz"
45 |         else if (filename.indexOf(".log") >0)              "LOGS/$CONDITION/FETCH/SRA.log"
46 |         else null
47 |     }
48 | 
49 |     input:
50 |     path sras
51 | 
52 |     output:
53 |     path "*fastq.gz", emit: fq
54 | 
55 |     script:
56 |     if (PAIRED == 'paired'){        
57 |         """
58 |         export NCBI_SETTINGS=\"$FETCHPARAMS\"
59 |         fasterq-dump -e ${task.cpus} $DOWNPARAMS --split-files $sras &> sra.log ; rename 's/(.sra)*_([1|2])/_R\$2/' *.fastq; for i in *.fastq;do pigz -p ${task.cpus} \$i;done
60 |         """
61 |     }
62 |     else{
63 |         """
64 |         export NCBI_SETTINGS=\"$FETCHPARAMS\"
65 |         fasterq-dump -e ${task.cpus} $DOWNPARAMS $sras &> sra.log ; rename 's/(.sra)*_([1|2])/_R\$2/' *.fastq ; for i in *.fastq;do pigz -p ${task.cpus} \$i;done
66 |         """
67 |     }
68 | }
69 | 
70 | workflow FETCH{
71 |     take: collection
72 | 
73 |     main:
74 |     //SAMPLE CHANNELS
75 |     samples_ch = Channel.of(SHORTSAMPLES)
76 | 
77 |     prefetch_sra(samples_ch)
78 |     download_sra(prefetch_sra.out.sra)
79 | 
80 |     emit:
81 |     fetched = download_sra.out.fq
82 | }
83 | 


--------------------------------------------------------------------------------
/workflows/manipulate_genome.smk:
--------------------------------------------------------------------------------
 1 | rule UnzipGenome:
 2 |     input:  ref = REFERENCE,
 3 |     output: fa = expand("{ref}.fa", ref=REFERENCE.replace('.fa.gz', '')),
 4 |             fai = expand("{ref}.fa.fai", ref=REFERENCE.replace('.fa.gz', '')),
 5 |             fas = expand("{ref}.chrom.sizes", ref=REFERENCE.replace('.fa.gz', ''))
 6 |     log:    expand("LOGS/{combo}/indexfa.log", combo=combo)
 7 |     conda:  "samtools.yaml"
 8 |     threads: 1
 9 |     params: bins = BINS
10 |     shell:  "set +o pipefail; zcat {input[0]} |perl -F\\\\040 -wane 'if($_ =~ /^>/){{chomp($F[0]);print \"\\n\".$F[0].\"\\n\"}} else{{($line=$_)=~s/\\r[\\n]*/\\n/gm; chomp($line=$_); print $line}}' |tail -n+2 > {output.fa} && {params.bins}/Preprocessing/indexfa.sh {output.fa} 2> {log} && cut -f1,2 {output.fai} > {output.fas}"
11 |     #shell:  "set +o pipefail; zcat {input[0]} |perl -F\\\\040 -wane 'if($_ =~ /^>/){{$F[0] = $F[0] =~ /^>chr/ ? $F[0] : \">chr\".substr($F[0],1);chomp($F[0]);print \"\\n\".$F[0].\"\\n\"}} else{{($line=$_)=~s/\\r[\\n]*/\\n/gm; chomp($line=$_); print $line}}' |tail -n+2 > {output.fa} && {params.bins}/Preprocessing/indexfa.sh {output.fa} 2> {log} && cut -f1,2 {output.fai} > {output.fas}"
12 | 
13 | rule UnzipGenome_no_us:
14 |     input:  ref = REFERENCE,
15 |     output: fa = expand("{ref}_us.fa", ref=REFERENCE.replace('.fa.gz', '')),
16 |             fai = expand("{ref}_us.fa.fai", ref=REFERENCE.replace('.fa.gz', '')),
17 |             fas = expand("{ref}_us.chrom.sizes", ref=REFERENCE.replace('.fa.gz', ''))
18 |     log:    expand("LOGS/{combo}/indexfa_us.log", combo=combo)
19 |     conda:  "samtools.yaml"
20 |     threads: 1
21 |     params: bins = BINS
22 |     shell:  "set +o pipefail; zcat {input[0]} |perl -F\\\\040 -wane 'if($_ =~ /^>/){{$F[0] = $F[0] =~ /^>chr/ ? $F[0] : \">chr\".substr($F[0],1))=~ s/\_/\./g;chomp($F[0]);print \"\\n\".$F[0].\"\\n\"}} else{{($line=$_)=~s/\\r[\\n]*/\\n/gm; chomp($line=$_); print $line}}' |tail -n+2 > {output.fa} && {params.bins}/Preprocessing/indexfa.sh {output.fa} 2> {log} && cut -f1,2 {output.fai} > {output.fas}"
23 |     #shell:  "set +o pipefail; zcat {input[0]} |perl -F\\\\040 -wane 'if($_ =~ /^>/){{$F[0] = $F[0] =~ /^>chr/ ? $F[0] : \">chr\".substr($F[0],1))=~ s/\_/\./g;chomp($F[0]);print \"\\n\".$F[0].\"\\n\"}} else{{($line=$_)=~s/\\r[\\n]*/\\n/gm; chomp($line=$_); print $line}}' |tail -n+2 > {output.fa} && {params.bins}/Preprocessing/indexfa.sh {output.fa} 2> {log} && cut -f1,2 {output.fai} > {output.fas}"
24 | 


--------------------------------------------------------------------------------
/workflows/fastqc_dedup.nf:
--------------------------------------------------------------------------------
 1 | QCENV=get_always('QCENV')
 2 | QCBIN=get_always('QCBIN')
 3 | QCPARAMS = get_always('fastqc_params_QC') ?: ''
 4 | 
 5 | // RAW QC
 6 | process qc_raw{
 7 |     conda "$QCENV"+".yaml"
 8 |     cpus THREADS
 9 | 	cache 'lenient'
10 |     //validExitStatus 0,1
11 | 
12 |     publishDir "${workflow.workDir}/../" , mode: 'link',
13 |     saveAs: {filename ->
14 |         if (filename.indexOf("zip") > 0)          "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip"
15 |         else if (filename.indexOf("html") > 0)    "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html"
16 |         else null
17 |     }
18 | 
19 |     input:
20 |     path read
21 | 
22 |     output:
23 |     path "*.{zip,html}", emit: fastqc_results
24 | 
25 |     script:
26 |     """
27 |     fastqc --quiet -t ${task.cpus} $QCPARAMS --noextract -f fastq $read
28 |     """
29 | }
30 | 
31 | workflow QC_RAW{
32 |     take: collection
33 | 
34 |     main:
35 |     //SAMPLE CHANNELS
36 |     if (PAIRED == 'paired'){
37 |         SAMPLES = SAMPLES.collect{
38 |             element -> return "${workflow.workDir}/../FASTQ/"+element+"_{R2,R1}.*fastq.gz"
39 |         }
40 |     }else{
41 |         SAMPLES=SAMPLES.collect{
42 |             element -> return "${workflow.workDir}/../FASTQ/"+element+".*fastq.gz"
43 |         }
44 |     }
45 | 
46 |     samples_ch = Channel.fromPath(SAMPLES.sort())
47 | 
48 |     qc_raw(samples_ch.collect())
49 | 
50 |     emit:
51 |     qc = qc_raw.out.fastqc_results
52 | }
53 | 
54 | 
55 | // DEDUP QC
56 | 
57 | process qc_dedup{
58 |     conda "$QCENV"+".yaml"
59 |     cpus THREADS
60 | 	cache 'lenient'
61 |     //validExitStatus 0,1
62 | 
63 |     publishDir "${workflow.workDir}/../" , mode: 'link',
64 |     saveAs: {filename ->
65 |         if (filename.indexOf("zip") > 0)          "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip"
66 |         else if (filename.indexOf("html") > 0)    "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html"
67 |         else null
68 |     }
69 | 
70 |     input:
71 |     path read
72 | 
73 |     output:
74 |     path "*.{zip,html}", emit: fastqc_results
75 | 
76 |     script:
77 |     """
78 |     fastqc --quiet -t ${task.cpus} $QCPARAMS --noextract -f fastq $read
79 |     """
80 | }
81 | 
82 | workflow QC_DEDUP{
83 |     take: collection
84 | 
85 |     main:
86 |    
87 |     qc_dedup(collection.collect())
88 | 
89 |     emit:
90 |     qc = qc_dedup.out.fastqc_results
91 | }
92 | 


--------------------------------------------------------------------------------
/workflows/fastqc_trim.nf:
--------------------------------------------------------------------------------
 1 | QCENV=get_always('QCENV')
 2 | QCBIN=get_always('QCBIN')
 3 | QCPARAMS = get_always('fastqc_params_QC') ?: ''
 4 | 
 5 | // RAW QC
 6 | process qc_raw{
 7 |     conda "$QCENV"+".yaml"
 8 |     cpus THREADS
 9 | 	cache 'lenient'
10 |     //validExitStatus 0,1
11 | 
12 |     publishDir "${workflow.workDir}/../" , mode: 'link',
13 |     saveAs: {filename ->
14 |         if (filename.indexOf("zip") > 0)          "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip"
15 |         else if (filename.indexOf("html") > 0)    "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html"
16 |         else null
17 |     }
18 | 
19 |     input:
20 |     path read
21 | 
22 |     output:
23 |     path "*.{zip,html}", emit: fastqc_results
24 | 
25 |     script:
26 |     """
27 |     fastqc --quiet -t ${task.cpus} $QCPARAMS --noextract -f fastq $read
28 |     """
29 | }
30 | 
31 | workflow QC_RAW{
32 |     take: collection
33 | 
34 |     main:
35 |     //SAMPLE CHANNELS
36 |     if (PAIRED == 'paired'){
37 |         SAMPLES = SAMPLES.collect{
38 |             element -> return "${workflow.workDir}/../FASTQ/"+element+"_{R2,R1}.*fastq.gz"
39 |         }
40 |     }else{
41 |         SAMPLES=SAMPLES.collect{
42 |             element -> return "${workflow.workDir}/../FASTQ/"+element+".*fastq.gz"
43 |         }
44 |     }
45 | 
46 |     samples_ch = Channel.fromPath(SAMPLES.sort())
47 | 
48 |     qc_raw(samples_ch.collect())
49 | 
50 |     emit:
51 |     qc = qc_raw.out.fastqc_results
52 | }
53 | 
54 | // TRIMMED QC
55 | 
56 | process qc_trimmed{
57 |     conda "$QCENV"+".yaml"
58 |     cpus THREADS
59 | 	cache 'lenient'
60 |     //validExitStatus 0,1
61 | 
62 |     publishDir "${workflow.workDir}/../" , mode: 'link',
63 |     saveAs: {filename ->
64 |         if (filename.indexOf("zip") > 0)          "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip"
65 |         else if (filename.indexOf("html") > 0)    "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html"
66 |         else null
67 |     }
68 | 
69 |     input:
70 |     //val collect
71 |     path read
72 | 
73 |     output:
74 |     path "*.{zip,html}", emit: fastqc_results
75 | 
76 |     script:
77 |     """
78 |     fastqc --quiet -t ${task.cpus} $QCPARAMS --noextract -f fastq $read
79 |     """
80 | }
81 | 
82 | workflow QC_TRIMMING{
83 |     take: collection
84 | 
85 |     main:
86 |     
87 |     qc_trimmed(collection.collect())
88 | 
89 |     emit:
90 |     qc = qc_trimmed.out.fastqc_results
91 | }
92 | 


--------------------------------------------------------------------------------
/scripts/Analysis/DAS/FeatureCounts2DIEGO.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use strict;
 3 | use warnings;
 4 | use autodie;
 5 | use Getopt::Long;
 6 | use vars qw ($help $inlist $outfile $annotation);
 7 | 
 8 | $outfile="junction_table_dexgo";
 9 | GetOptions (
10 |     "i=s"       => \$inlist,
11 |     "h"         => \$help,
12 |     "o=s"       => \$outfile,
13 |     "a=s"       => \$annotation
14 |     );
15 | usage() if ($help || !$inlist);
16 | 
17 | open(my $IN, "<", $inlist);
18 | my @files;
19 | my @names;
20 | my $count=0;
21 | 
22 | while(<$IN>) { 
23 | 	chomp(my $line = $_);
24 | 	next if ($line =~ /^#/);
25 |     my @F = split("\t",$line);
26 |     $names[$count]=$F[0] if $F[0];
27 |     $files[$count]=$F[1] if $F[1];
28 |     $count++;
29 | }
30 | 
31 | die "$#names not $#files!" if ($#names != $#files or $#names <1 ) ;
32 | 
33 | #my $filelist=join(" ",@files);
34 | my $headerlist=join("\t",@names);
35 | close($IN);
36 | 
37 | #my %annotation;
38 | #if($annotation) {
39 | #    open($IN, "<",$annotation);
40 | #}
41 | 
42 | open(my $OUT, ">>", $outfile);
43 | print $OUT "junction\ttype\t$headerlist\tgeneID\tgeneName\n";
44 | 
45 | my $oldid="";
46 | my $zaehl=0;
47 | $count=0;
48 | 
49 | my @countfiles = @files;#split(" ",$filelist);
50 | 
51 | while (my $f = shift(@countfiles)){
52 | 	open($IN,'<',$f);
53 | 	while(<$IN>) {
54 | 		next if ($_ =~ /^#/);
55 | 		my @F=split;
56 | 		$F[0]=~/(\S+):\d+$/;
57 | 		my $geneid=$1;
58 | 		if($geneid ne $oldid) {
59 | 			$count++;
60 | 		}
61 | 		$oldid=$geneid;
62 | 		$zaehl+=100;
63 | 		my $z2=$zaehl+50;
64 | 		print $OUT "chrfoo:$zaehl"."-$z2\tN_w";
65 | 		for (my $i=1; $i<=$#F; $i+=2) {
66 | 			print $OUT "\t$F[$i]";
67 | 		}
68 | 		print $OUT "\t$geneid\tbar$count\n";
69 | 	}
70 | 	close($OUT);
71 | }
72 | 
73 | printf STDERR "You now should hav a file $outfile to play with\nThank you for travelling with us, Good bye!\n";
74 | 
75 | 
76 | sub usage {
77 |   print STDERR "\nHTseq2DIEGO.pl\n";
78 |   print STDERR "usage: HTseq2DIEGO.pl -i <file>  [OPTIONS]\n";
79 |   print STDERR "\n";
80 |   print STDERR "[INPUT]\n";
81 |   print STDERR " -i <file>    file containing input files and ids\n \t\tid [tab] path.to/file\n";
82 |   print STDERR " -o <file>    output file name (default:junction_table_dexdas )\n";
83 |   print STDERR " -h <file>    this (usefull) help message\n";
84 |   print STDERR "[VERSION]\n";
85 |   print STDERR " 06-25-2012\n";
86 |   print STDERR "[BUGS]\n";
87 |   print STDERR " Please report bugs to salzamt\@bioinf.uni-leipzig.de\n";
88 |   print STDERR "\n";
89 |   exit(-1);
90 | }
91 | 


--------------------------------------------------------------------------------
/workflows/cutadapt.nf:
--------------------------------------------------------------------------------
 1 | TRIMENV=get_always('TRIMMINGENV')
 2 | TRIMBIN=get_always('TRIMMINGBIN')
 3 | 
 4 | TRIMPARAMS = get_always('cutadapt_params_TRIM') ?: ''
 5 | //int cores = min(THREADS,4)
 6 | //TRIMMING PROCESSES
 7 | 
 8 | process trim{
 9 |     conda "$TRIMENV"+".yaml"
10 |     cpus 4//cores
11 |     //validExitStatus 0,1
12 | 
13 |     publishDir "${workflow.workDir}/../" , mode: 'link',
14 |     saveAs: {filename ->
15 |         if (filename.indexOf("_trimmed.fastq.gz") > 0)                "TRIMMED_FASTQ/${COMBO}/${CONDITION}/${file(filename).getSimpleName().replaceAll(/_val_\d{1}|_trimmed|_dedup/,"")}_trimmed.fastq.gz"
16 |         else if (filename.indexOf("report.txt") >0)        "TRIMMED_FASTQ/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}"
17 |         else if (filename.indexOf(".log") >0)              "LOGS/${COMBO}/${CONDITION}/TRIMMING/${file(filename).getSimpleName()}.log"
18 |         else null
19 |     }
20 | 
21 |     input:
22 |     path reads
23 | 
24 |     output:
25 |     path "*_trimmed.fastq.gz", emit: trim
26 |     path "*trimming_report.txt", emit: rep
27 | 
28 |     script:
29 |     if (PAIRED == 'paired'){
30 |         r1 = reads[0]
31 |         r2 = reads[1]
32 |         o = file(r1).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimmed.fastq.gz"
33 |         p = file(r2).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimmed.fastq.gz"
34 |         r = file(r1).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimming_report.txt"
35 |         """
36 |         $TRIMBIN --cores ${task.cpus} $TRIMPARAMS -o $o -p $p $r1 $r2 &> $r
37 |         """
38 |     }
39 |     else{
40 |         o = file(reads).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimmed.fastq.gz"
41 |         r = file(reads).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimming_report.txt"
42 |         """
43 |         $TRIMBIN --cores ${task.cpus} $TRIMPARAMS -o $o $reads &> $r
44 |         """
45 |     }
46 | }
47 | 
48 | workflow TRIMMING{
49 |     take: 
50 |     collection    
51 | 
52 |     main:
53 |     //check = collection.toList()
54 |     if ( PREDEDUP == 'enabled' ){  // && !check.contains('MONSDA.log')){
55 |         trim(collection)
56 |     }else {        
57 |         if (PAIRED == 'paired'){
58 |             trim(samples_ch.collate(2))
59 |         } else{
60 |             trim(samples_ch.collate(1))
61 |         }
62 |     }
63 | 
64 |     emit:
65 |     trimmed = trim.out.trim
66 |     report  = trim.out.rep
67 | }
68 | 


--------------------------------------------------------------------------------
/MONSDA/lib/Collection.groovy:
--------------------------------------------------------------------------------
 1 | //import groovy.json.JsonSlurper
 2 | 
 3 | def nfcoreHeader() {
 4 |     // Log colors ANSI codes
 5 |     c_black = params.monochrome_logs ? '' : "\033[0;30m";
 6 |     c_blue = params.monochrome_logs ? '' : "\033[0;34m";
 7 |     c_cyan = params.monochrome_logs ? '' : "\033[0;36m";
 8 |     c_dim = params.monochrome_logs ? '' : "\033[2m";
 9 |     c_green = params.monochrome_logs ? '' : "\033[0;32m";
10 |     c_purple = params.monochrome_logs ? '' : "\033[0;35m";
11 |     c_reset = params.monochrome_logs ? '' : "\033[0m";
12 |     c_white = params.monochrome_logs ? '' : "\033[0;37m";
13 |     c_yellow = params.monochrome_logs ? '' : "\033[0;33m";
14 | 
15 |     return """    -${c_dim}--------------------------------------------------${c_reset}-
16 |                                             ${c_green},--.${c_black}/${c_green},-.${c_reset}
17 |     ${c_blue}        ___     __   __   __   ___     ${c_green}/,-._.--~\'${c_reset}
18 |     ${c_blue}  |\\ | |__  __ /  ` /  \\ |__) |__         ${c_yellow}}  {${c_reset}
19 |     ${c_blue}  | \\| |       \\__, \\__/ |  \\ |___     ${c_green}\\`-._,-`-,${c_reset}
20 |                                             ${c_green}`._,._,\'${c_reset}
21 |     ${c_purple}  nf-core/mapping v${workflow.manifest.version}${c_reset}
22 |     -${c_dim}--------------------------------------------------${c_reset}-
23 |     """.stripIndent()
24 | }
25 | 
26 | def checkHostname() {
27 |     def c_reset = params.monochrome_logs ? '' : "\033[0m"
28 |     def c_white = params.monochrome_logs ? '' : "\033[0;37m"
29 |     def c_red = params.monochrome_logs ? '' : "\033[1;91m"
30 |     def c_yellow_bold = params.monochrome_logs ? '' : "\033[1;93m"
31 |     if (params.hostnames) {
32 |         def hostname = "hostname".execute().text.trim()
33 |         params.hostnames.each { prof, hnames ->
34 |             hnames.each { hname ->
35 |                 if (hostname.contains(hname) && !workflow.profile.contains(prof)) {
36 |                     log.error "====================================================\n" +
37 |                             "  ${c_red}WARNING!${c_reset} You are running with `-profile $workflow.profile`\n" +
38 |                             "  but your machine hostname is ${c_white}'$hostname'${c_reset}\n" +
39 |                             "  ${c_yellow_bold}It's highly recommended that you use `-profile $prof${c_reset}`\n" +
40 |                             "============================================================"
41 |                 }
42 |             }
43 |         }
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/workflows/bbduk.nf:
--------------------------------------------------------------------------------
 1 | TRIMENV=get_always('TRIMMINGENV')
 2 | TRIMBIN=get_always('TRIMMINGBIN')
 3 | 
 4 | TRIMPARAMS = get_always('bbduk_params_TRIM') ?: ''
 5 | //int cores = min(THREADS,4)
 6 | //TRIMMING PROCESSES
 7 | 
 8 | process trim{
 9 |     conda "$TRIMENV"+".yaml"
10 |     cpus 4//cores
11 |     //validExitStatus 0,1
12 | 
13 |     publishDir "${workflow.workDir}/../" , mode: 'link',
14 |     saveAs: {filename ->
15 |         if (filename.indexOf("_trimmed.fastq.gz") > 0)     "TRIMMED_FASTQ/${COMBO}/${CONDITION}/${file(filename).getSimpleName().replaceAll(/_val_\d{1}|_trimmed|_dedup/,"")}_trimmed.fastq.gz"
16 |         else if (filename.indexOf("report.txt") >0)        "TRIMMED_FASTQ/${COMBO}/${CONDITION}/${file(filename).getSimpleName().replaceAll(/.fastq.gz/,"")}_trimming_report.txt"
17 |         else if (filename.indexOf(".log") >0)              "LOGS/${COMBO}/${CONDITION}/TRIMMING/${file(filename).getSimpleName()}.log"
18 |         else null
19 |     }
20 | 
21 |     input:
22 |     path reads
23 | 
24 |     output:
25 |     path "*_trimmed.fastq.gz", emit: trim
26 |     path "*trimming_report.txt", emit: rep
27 | 
28 |     script:
29 |     if (PAIRED == 'paired'){
30 |         r1 = reads[0]
31 |         r2 = reads[1]
32 |         o = file(r1).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimmed.fastq.gz"
33 |         p = file(r2).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimmed.fastq.gz"
34 |         r = file(r1).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimming_report.txt"
35 |         """
36 |         $TRIMBIN $TRIMPARAMS t=${task.cpus} in1=$r1 in2=$r2 out1=$o out2=$p &> $r 
37 |         """
38 |     }
39 |     else{
40 |         o = file(reads).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimmed.fastq.gz"        
41 |         r = file(reads).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimming_report.txt"
42 |         """
43 |         $TRIMBIN $TRIMPARAMS t=${task.cpus} in=$r1 out=$o &> $r 
44 |         """
45 |     }
46 | }
47 | 
48 | workflow TRIMMING{
49 |     take: 
50 |     collection    
51 | 
52 |     main:
53 |     //check = collection.toList()
54 |     if ( PREDEDUP == 'enabled' ){  // && !check.contains('MONSDA.log')){
55 |         trim(collection)
56 |     }else {        
57 |         if (PAIRED == 'paired'){
58 |             trim(samples_ch.collate(2))
59 |         } else{
60 |             trim(samples_ch.collate(1))
61 |         }
62 |     }
63 | 
64 |     emit:
65 |     trimmed = trim.out.trim
66 |     report  = trim.out.rep
67 | }
68 | 


--------------------------------------------------------------------------------
/workflows/fastp.nf:
--------------------------------------------------------------------------------
 1 | TRIMENV=get_always('TRIMMINGENV')
 2 | TRIMBIN=get_always('TRIMMINGBIN')
 3 | 
 4 | TRIMPARAMS = get_always('fastp_params_TRIM') ?: ''
 5 | //int cores = min(THREADS,4)
 6 | //TRIMMING PROCESSES
 7 | 
 8 | process trim{
 9 |     conda "$TRIMENV"+".yaml"
10 |     cpus 4//cores
11 |     //validExitStatus 0,1
12 | 
13 |     publishDir "${workflow.workDir}/../" , mode: 'link',
14 |     saveAs: {filename ->
15 |         if (filename.indexOf("_trimmed.fastq.gz") > 0)     "TRIMMED_FASTQ/${COMBO}/${CONDITION}/${file(filename).getSimpleName().replaceAll(/_val_\d{1}|_trimmed|_dedup/,"")}_trimmed.fastq.gz"
16 |         else if (filename.indexOf("report.txt") >0)        "TRIMMED_FASTQ/${COMBO}/${CONDITION}/${file(filename).getSimpleName().replaceAll(/.fastq.gz/,"")}_trimming_report.txt"
17 |         else if (filename.indexOf(".log") >0)              "LOGS/${COMBO}/${CONDITION}/TRIMMING/${file(filename).getSimpleName()}.log"
18 |         else null
19 |     }
20 | 
21 |     input:
22 |     path reads
23 | 
24 |     output:
25 |     path "*_trimmed.fastq.gz", emit: trim
26 |     path "*trimming_report.txt", emit: rep
27 | 
28 |     script:
29 |     if (PAIRED == 'paired'){
30 |         r1 = reads[0]
31 |         r2 = reads[1]
32 |         o = file(r1).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimmed.fastq.gz"
33 |         p = file(r2).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimmed.fastq.gz"
34 |         r = file(r1).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimming_report.txt"
35 |         """
36 |         $TRIMBIN $TRIMPARAMS --thread ${task.cpus} --in1 $r1 --in2 $r2 --out1 $o --out2 $p &> $r 
37 |         """
38 |     }
39 |     else{
40 |         o = file(reads).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimmed.fastq.gz"        
41 |         r = file(reads).getSimpleName().replaceAll(/_dedup/,"").replaceAll(/.fastq.gz/,"")+"_trimming_report.txt"
42 |         """
43 |         $TRIMBIN $TRIMPARAMS --threads ${task.cpus} --i $r1 --o $o &> $r 
44 |         """
45 |     }
46 | }
47 | 
48 | workflow TRIMMING{
49 |     take: 
50 |     collection    
51 | 
52 |     main:
53 |     //check = collection.toList()
54 |     if ( PREDEDUP == 'enabled' ){  // && !check.contains('MONSDA.log')){
55 |         trim(collection)
56 |     }else {        
57 |         if (PAIRED == 'paired'){
58 |             trim(samples_ch.collate(2))
59 |         } else{
60 |             trim(samples_ch.collate(1))
61 |         }
62 |     }
63 | 
64 |     emit:
65 |     trimmed = trim.out.trim
66 |     report  = trim.out.rep
67 | }
68 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | MONSDA
 2 | ======
 3 | 
 4 | .. toctree::
 5 |    :hidden:
 6 |    :maxdepth: 2
 7 |    :caption: GETTING STARTED
 8 | 
 9 |    source/installation
10 |    source/first
11 | 
12 | .. toctree::
13 |    :hidden:
14 |    :maxdepth: 2
15 |    :caption: PREPARING YOUR PROJECT
16 | 
17 |    source/preparation
18 |    source/configurator
19 | 
20 | .. toctree::
21 |    :hidden:
22 |    :maxdepth: 2
23 |    :caption: EXECUTING MONSDA
24 | 
25 |    source/runsmk
26 |    source/cluster
27 | 
28 | 
29 | .. toctree::
30 |    :hidden:
31 |    :maxdepth: 2
32 |    :caption: TUTORIAL
33 | 
34 |    source/tutorial
35 | 
36 | 
37 | .. toctree::
38 |    :hidden:
39 |    :maxdepth: 2
40 |    :caption: WORKFLOW AND TOOL OVERVIEW
41 | 
42 |    source/workflows
43 | 
44 | 
45 | .. toctree::
46 |    :hidden:
47 |    :maxdepth: 2
48 |    :caption: DETAILS
49 | 
50 |    source/wrapper
51 |    source/conditiontree
52 |    source/config
53 | 
54 | .. toctree::
55 |    :hidden:
56 |    :maxdepth: 2
57 |    :caption: CONTRIBUTE
58 | 
59 |    source/integrate
60 |    source/contribute
61 | 
62 | 
63 | Welcome to **MONSDA**, Modular Organizer of Nextflow and Snakemake driven hts Data Analysis
64 | 
65 | Automizing HTS analysis from data download, preprocessing and mapping to postprocessing/analysis and track generation centered on a single config file. **MONSDA** can create **Snakemake** and **Nextflow** workflows centered on a user friendly, sharable **Json** config file and reproducible subworkflows. These workflows can either be saved to disk for manual inspection and execution or automatically executed.
66 | 
67 | For details on **Snakemake** and **Nextflow** and their features please refer to the corresponding Snakemake_  or Nextflow_ documentation.
68 | 
69 | .. _Snakemake: https://Snakemake.readthedocs.io/en/stable/tutorial/tutorial.html
70 | .. _Nextflow: https://www.Nextflow.io/docs/latest/index.html
71 | 
72 | In general it is necessary to write a configuration file containing workflows to execute, information on paths, files to process and settings beyond default for mapping tools and others.
73 | The template on which **MONSDA** is based on can be found in the **config** directory.
74 | 
75 | For **MONSDA** to be as FAIR as possible, one needs to use **conda** or the alternative **mamba**. For details on either please refer to the corresponding conda_ or mamba_ manual.
76 | 
77 | .. _conda: https://docs.conda.io/en/latest/
78 | .. _mamba: https://mamba.readthedocs.io/en/latest/
79 | 
80 | This workflow organizer makes heavy use of **conda** and especially the bioconda_ channel.
81 | 
82 | .. _bioconda: https://bioconda.github.io
83 | 


--------------------------------------------------------------------------------
/scripts/Analysis/PreprocessPeaks.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | #### use things ###
 4 | use strict;
 5 | use warnings;
 6 | use autodie;
 7 | use PerlIO::gzip;
 8 | use Getopt::Long qw( :config posix_default bundling no_ignore_case );
 9 | use Cwd;
10 | ### use own modules
11 | use FindBin::Real qw(Bin); # locate this script
12 | use lib Bin() . "/../lib";
13 | use Collection;
14 | 
15 | my ( $dir, $odir, $peakfile, $filter);
16 | my $VERBOSE=0;
17 | pod2usage(-verbose => 0)
18 |     unless GetOptions(
19 | 		"dir|d=s"	 => \$dir,
20 | 		"odir|o=s"	 => \$odir,
21 | 		"peak|p=s"	 => \$peakfile,
22 | 		"filter|f=s" => \$filter,
23 | 		"help|h"	 => sub{pod2usage(-verbose => 1)},
24 | 		"man|m"		 => sub{pod2usage(-verbose => 2)},
25 | 		"verbose"	 => sub{ $VERBOSE++ }
26 |     );
27 | 
28 | #my $pwd = cwd();
29 | $dir = cwd() unless ($dir);
30 | $odir =~ s/$odir/\Q$odir\E/g if($odir);
31 | $odir = "$dir"."\/Subpeaks" unless ($odir);
32 | $filter = qr(\Q$filter\E) if($filter);
33 | 
34 | my $pid = $$;
35 | (my $job = `cat /proc/$pid/cmdline`)=~ s/\0/ /g;
36 | print STDERR $job,"\n";
37 | 
38 | chdir ("$dir");
39 | 
40 | #Read beds;
41 | push my @beds,split(',',$peakfile);
42 | 
43 | print STDERR "Processing Bed File and annotating profile\n";
44 | 
45 | #parse bedfile
46 | my ($unique,$chl) = Collection::parse_bedgraph(\@beds);#, $unique, $chl);
47 | 
48 | print STDERR "Printing bed with profile\n";
49 | 
50 | foreach my $pk (keys %{$unique}){
51 | 	my @tempuni = split(/\_/,$pk);
52 | 	push @tempuni , split(/\_/,$unique->{$pk});
53 | 	(my $chromosome = $tempuni[0])=~ s/=/\_/g;
54 | 	$chromosome =~ s/(:)+/_/g;
55 | 	my $start      = $tempuni[1];
56 | 	my $end        = $tempuni[2];
57 | 	my $strand     = $tempuni[3];
58 | 	my $name       = $tempuni[4];
59 | 	my $score      = $tempuni[5];
60 | 	my $summit     = $tempuni[6];
61 | 	my $rest       = $tempuni[7];
62 | 
63 | 	if ($filter){
64 | 		next if $name !~ /$filter/;
65 | 	}
66 | 
67 | 	$strand = '.' if $strand eq 'u';
68 | 
69 | 	my $profile;
70 | 	for ($start..$end-1){
71 | 		$profile->{$_}=$score;
72 | 	}
73 | 	my $area;
74 | 	my @tmp;
75 |     for my $loci (sort{$a <=> $b} keys %{$profile}){
76 | #	    push @tmp, join(':',$loci,$profile->{$loci});
77 | 	    $summit = $profile->{$loci} if ( $summit < $profile->{$loci} );
78 | 		$area+=$profile->{$loci};
79 |     }
80 |     if ($rest eq 'undef'){
81 | 		$rest = $area;
82 | 	}
83 | 	else{
84 | 		$rest=join('\t',$area,$rest);
85 | 	}
86 | 	#	my $peakprofile = join("|",@tmp);
87 | 	my $peakprofile = ($end-$start).':'.$score; #Changing to more sparse peak profile
88 |     print STDOUT "$chromosome\t$start\t$end\t$peakprofile\t$summit\t$strand\t$rest\n";
89 | }
90 | 


--------------------------------------------------------------------------------
/workflows/manipulate_genome.nf:
--------------------------------------------------------------------------------
 1 | process UnzipGenome{
 2 |     conda "samtools.yaml"
 3 |     cpus 1
 4 | 	cache 'lenient'
 5 |     //validExitStatus 0,1
 6 | 
 7 |     publishDir "${workflow.workDir}/../" , mode: 'link',
 8 |     saveAs: {filename ->
 9 |         if (filename.indexOf(".fa.fai") > 0)      "${REFDIR}/${file(filename).getName()}"
10 |         else if (filename.indexOf(".fa") > 0)      "${REFDIR}/${file(filename).getName()}"
11 |         else if (filename.indexOf(".chrom.sizes") > 0)      "${REFDIR}/${file(filename).getName()}"
12 |         else if (filename == "log")      "LOGS/${SCOMBO}/${COMBO}_indexfa.log"
13 |     }
14 | 
15 |     input:
16 |     path ref
17 | 
18 |     output: 
19 |     path "*.fa", emit: unzipped
20 |     path "*.fa.fai", emit: index
21 |     path "*.chrom.sizes", emit: chromsize
22 |     path "log", emit: log
23 | 
24 |     script:
25 |     fa = ref.getSimpleName()+".fa"
26 |     fai = ref.getSimpleName()+".fa.fai"
27 |     cs = ref.getSimpleName()+".chrom.sizes"
28 | 
29 |     """
30 |     zcat $ref |perl -F'\\t' -wane 'if(\$_ =~ /^>/){{chomp(\$F[0]);print \"\\n\".\$F[0].\"\\n\"}} else{{(\$line=\$_)=~s/\\r[\\n]*/\\n/gm; chomp(\$line=\$_); print \$line}}' |tail -n+2 > $fa && $BINS/Preprocessing/indexfa.sh $fa 2> log && cut -f1,2 $fai > $cs
31 |     """
32 | }
33 | 
34 | 
35 | process UnzipGenome_no_us{
36 |     conda "samtools.yaml"
37 |     cpus 1
38 | 	cache 'lenient'
39 |     //validExitStatus 0,1
40 | 
41 |     publishDir "${workflow.workDir}/../" , mode: 'link',
42 |     saveAs: {filename ->
43 |         if (filename.indexOf("_us.fa.fai") > 0)      "${REFDIR}/${file(filename).getName()}"
44 |         else if (filename.indexOf("_us.fa") > 0)      "${REFDIR}/${file(filename).getName()}"
45 |         else if (filename.indexOf("_us.chrom.sizes") > 0)      "${REFDIR}/${file(filename).getName()}"
46 |         else if (filename == "log")      "LOGS/${SCOMBO}/${COMBO}_indexfa_us.log"
47 |     }
48 | 
49 |     input:
50 |     path ref
51 | 
52 |     output: 
53 |     path "*.fa", emit: unzipped
54 |     path "*.fa.fai", emit: index
55 |     path "*.chrom.sizes", emit: chromsize
56 |     path "log", emit: log
57 | 
58 |     script:
59 |     fa = ref.getSimpleName()+"_us.fa"
60 |     fai = ref.getSimpleName()+"_us.fa.fai"
61 |     cs = ref.getSimpleName()+"_us.chrom.sizes"
62 |     
63 |     """
64 |     zcat $ref |perl -F'\\t' -wane 'if(\$_ =~ /^>/){{\$F[0] = \$F[0] =~ /^>chr/ ? \$F[0] : \">chr\".substr(\$F[0],1) =~ s/_/./g;chomp(\$F[0]);print \"\\n\".\$F[0].\"\\n\"}} else{{(\$line=\$_)=~s/\\r[\\n]*/\\n/gm; chomp(\$line=\$_); print \$line}}' |tail -n+2 > $fa && $BINS/Preprocessing/indexfa.sh $fa 2> log && cut -f1,2 $fai > $cs
65 |     """
66 | }   


--------------------------------------------------------------------------------
/scripts/Universal/countCCA.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | use strict;
 3 | use warnings;
 4 | use PerlIO::gzip;
 5 | 
 6 | my $in = shift; 	#bam file        
 7 | my $in2 = shift;	#cluster sequences
 8 | 
 9 | 
10 | open BAM, "samtools view $in |";
11 | open SEQ, "<:gzip(autopop)", "$in2" or die "can t open $in2\n";
12 | 
13 | 
14 | my %hash =();
15 | my $c1 =0;
16 | my $c2 =0;
17 | my $c3 =0;
18 | my $c4 =0;
19 | my $c5 = 0;
20 | 
21 | 
22 | while(<SEQ>){
23 | 	chomp $_;
24 | 	
25 | 	if($_ =~m /^>(.*)/){
26 | 		my $id = $1;
27 | 		my $seq = <SEQ>;
28 | 		chomp $seq;
29 | 		$hash{$id} = $seq;
30 | 	}
31 | }
32 | 
33 | my $all;
34 | while(<BAM>){
35 |         $all++;
36 | 	chomp $_;
37 | 	
38 | 	my @line = split(/\t/,$_);
39 | 	
40 | 	my $cluster = $line[2];			#Reference sequence NAME
41 | 	my $start = $line[3];			#1-based leftmost mapping POSition	
42 | 	my $seq = $line[9]; 			#segment SEQuence (read)
43 | 	my $seqL = length($seq);
44 | 	
45 | 	my $cigar = $line[5];
46 | 	my @array = split(/(M|I|D|N|S|H|P|X)/,$cigar);
47 | 	
48 | 	#sum number of deletions or sub number of insertions from the read length
49 | 	my $del = 0;
50 | 	my $ins = 0;
51 | 	for(my $i=0; $i < scalar @array ; $i++){
52 | 		if($array[$i] eq "D"){
53 | 			$del += $array[$i-1];
54 | 		}
55 | 		if($array[$i] eq "I"){
56 |                 	$ins += $array[$i-1];
57 | 		}
58 | 	}
59 | 	
60 | 	$seqL += $del;
61 | 	$seqL -= $ins;
62 | 	
63 | 	my $len = length($hash{$cluster});	#length of the cluster seq with CCACCA
64 | 	my $tailCCA = substr $seq, -3;
65 |         my $tailCC = substr $seq, -2;
66 |         my $tailC = substr $seq, -1;
67 | 	my $tailCCACCA = substr $seq, -6;
68 | 
69 | 	### only reads with CCA and mapping position at 3' end -3 (CCA) position
70 |         if(($start -1 + $seqL) == ($len - 3) && $tailCCA eq "CCA"){
71 | 		$c1++;
72 |         }
73 | 	### only reads with CCACCA and mapping position at 3' end position
74 |         elsif(($start -1 + $seqL) == ($len) && $tailCCACCA eq "CCACCA"){
75 | 		$c2++;
76 |         }
77 | 	### only reads with no CCA and mapping position at 3' end -6 (CCACCA) position
78 |         elsif(($start -1 + $seqL) == ($len - 6)){
79 | 		$c3++;
80 | 	}
81 | 	### only reads with C and mapping position at 3' end -5 (CACCA) position
82 | 	elsif(($start -1 + $seqL) == ($len - 5) && $tailC eq "C"){
83 | 		$c4++;
84 |         }
85 | 	### only reads with CC and mapping position at 3' end -5 (ACCA) position
86 | 	elsif(($start -1 + $seqL) == ($len - 4) && $tailCC eq "CC"){
87 | 		$c5++;
88 |       	}
89 | }
90 | 
91 | 
92 | print join ("\t", "all", "CCACCA", "CCA", "CC", "C", "no");
93 | print "\n";
94 | print join ("\t", $all, $c2, $c1, $c5, $c4, $c3);
95 | print "\n";
96 | 
97 | 


--------------------------------------------------------------------------------
/workflows/bbduk.smk:
--------------------------------------------------------------------------------
 1 | TRIMBIN, TRIMENV = env_bin_from_config(config,'TRIMMING')
 2 | 
 3 | if paired == 'paired':
 4 |     rule bbduk_trim:
 5 |         input:  r1 = lambda wildcards: "FASTQ/{rawfile}_R1.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_R1_dedup.fastq.gz",
 6 |                 r2 = lambda wildcards: "FASTQ/{rawfile}_R2.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_R2_dedup.fastq.gz"
 7 |         output: o1 = "TRIMMED_FASTQ/{combo}/{file}_R1_val_1.fq.gz" if not prededup else "TRIMMED_FASTQ/{combo}/{file}_R1_dedup_val_1.fq.gz",
 8 |                 o2 = "TRIMMED_FASTQ/{combo}/{file}_R2_val_2.fq.gz" if not prededup else "TRIMMED_FASTQ/{combo}/{file}_R2_dedup_val_2.fq.gz"
 9 |         log:    "LOGS/{combo}/{file}_trim.log"
10 |         conda: ""+TRIMENV+".yaml"
11 |         threads: MAXTHREAD
12 |         params: odir = lambda wildcards, output:os.path.dirname(output.o1),
13 |                 tpara = lambda wildcards: tool_params(wildcards.file, None, config, "TRIMMING", TRIMENV).get('TRIM', ""),
14 |                 trim=TRIMBIN
15 |         shell:  "{params.trim} t={threads} in1={input.r1} in2={input.r2} out1={output.o1} out2={output.o2} {params.tpara}"
16 | 
17 |     rule bbduk_rename:
18 |         input:  o1 = rules.bbduk_trim.output.o1,
19 |                 o2 = rules.bbduk_trim.output.o2
20 |         output: r1 = "TRIMMED_FASTQ/{combo}/{file}_R1_trimmed.fastq.gz",
21 |                 r2 = "TRIMMED_FASTQ/{combo}/{file}_R2_trimmed.fastq.gz"
22 |         conda: ""+TRIMENV+".yaml"
23 |         threads: 1
24 |         shell:  "mv {input.o1} {output.r1} && mv {input.o2} {output.r2}"
25 | else:
26 |     rule bbduk_trim:
27 |         input:  r1 = lambda wildcards: "FASTQ/{rawfile}.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_dedup.fastq.gz"
28 |         output: o1 = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fq.gz" if not prededup else "TRIMMED_FASTQ/{combo}/{file}_dedup_trimmed.fq.gz"
29 |         log:    "LOGS/{combo}/{file}_trim.log"
30 |         conda: ""+TRIMENV+".yaml"
31 |         threads: MAXTHREAD
32 |         params: odir = lambda wildcards, output: os.path.dirname(output.o1),
33 |                 tpara = lambda wildcards: tool_params(wildcards.file, None, config, "TRIMMING", TRIMENV).get('TRIM',""),
34 |                 trim = TRIMBIN,
35 |         shell:  "{params.trim} t={threads} in={input.r1} out={output.o1} {params.tpara}"
36 | 
37 |     rule bbduk_rename:
38 |         input:  o1 = rules.bbduk_trim.output.o1
39 |         output: r1 = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fastq.gz"
40 |         conda: ""+TRIMENV+".yaml"
41 |         threads: 1
42 |         shell:  "mv {input.o1} {output.r1}"
43 | 


--------------------------------------------------------------------------------
/workflows/mapping.smk:
--------------------------------------------------------------------------------
 1 | rule sortsam:
 2 |     input:  mapps = rules.mapping.output.mapped
 3 |     output: sortedsam = report("MAPPED/{combo}/{file}_mapped_sorted.sam.gz", category="SORTING"),
 4 |             tmphead = temp("MAPPED/{combo}/{file}_mapped_header.gz"),
 5 |             tmpfile = temp("TMP/{combo}/{file}")
 6 |     log:    "LOGS/{combo}/{file}/sortsam.log"
 7 |     conda: "samtools.yaml"
 8 |     threads: MAXTHREAD
 9 |     priority: 100
10 |     params: linkto = lambda wildcards, output: os.path.basename(output.sortedsam),
11 |             sortmem = lambda wildcards, threads:  int(30/MAXTHREAD*threads)
12 |     shell: "set +o pipefail;samtools view -H {input.mapps}|grep -P '^@HD' |pigz -p {threads} -f > {output.tmphead} ; samtools view -H {input.mapps}|grep -P '^@SQ'|sort -t$'\t' -k1,1 -k2,2V |pigz -p {threads} -f >> {output.tmphead} ; samtools view -H {input.mapps}|grep -P '^@RG'|pigz -p {threads} -f >> {output.tmphead} ; samtools view -H {input.mapps}|grep -P '^@PG'|pigz -p {threads} -f >> {output.tmphead} ; export LC_ALL=C;samtools view -h {input.mapps} | grep -v \"^@\"|sort --parallel={threads} -S {params.sortmem}% -T TMP -t$'\t' -k3,3V -k4,4n - |pigz -p {threads} -f > {output.tmpfile} ; cat {output.tmphead} {output.tmpfile} > {output.sortedsam} 2> {log}"# && rm -f {input.mapps} && touch {input.mapps}"
13 | 
14 | rule sam2bam:
15 |     input:  sortedsam = rules.sortsam.output.sortedsam
16 |     output: bam = report("MAPPED/{combo}/{file}_mapped_sorted.bam", category="2BAM"),
17 |             bamindex = "MAPPED/{combo}/{file}_mapped_sorted.bam.bai"
18 |     log:    "LOGS/{combo}/{file}/sam2bam.log"
19 |     conda: "samtools.yaml"
20 |     threads: MAXTHREAD
21 |     params: bins = BINS
22 |     shell: "zcat {input.sortedsam} | samtools view -bS - > {output.bam} && samtools index {output.bam} 2> {log}"
23 | 
24 | rule uniqsam:
25 |     input:  sortedsam = rules.sortsam.output.sortedsam,
26 |             bam = rules.sam2bam.output
27 |     output: uniqsam = report("MAPPED/{combo}/{file}_mapped_sorted_unique.sam.gz", category="UNIQUE")
28 |     log: "LOGS/{combo}/{file}/uniqsam.log"
29 |     conda: "base.yaml"
30 |     threads: MAXTHREAD
31 |     params: bins=BINS
32 |     shell:  "{params.bins}/Shells/UniqueSam_woPicard.sh {input.sortedsam} {output.uniqsam} {threads} 2> {log}"
33 | 
34 | rule sam2bamuniq:
35 |     input: uniqsam = rules.uniqsam.output,
36 |            bam = rules.sam2bam.output
37 |     output:  uniqbam = report("MAPPED/{combo}/{file}_mapped_sorted_unique.bam", category="2BAM"),
38 |              uniqbamindex = "MAPPED/{combo}/{file}_mapped_sorted_unique.bam.bai"
39 |     log:     "LOGS/{combo}/{file}/sam2bamuniq.log"
40 |     conda:   "samtools.yaml"
41 |     threads: MAXTHREAD
42 |     priority: 50
43 |     params: bins = BINS
44 |     shell: "zcat {input.uniqsam} | samtools view -bS - > {output.uniqbam} && samtools index {output.uniqbam} 2> {log}"
45 | 


--------------------------------------------------------------------------------
/workflows/fastp.smk:
--------------------------------------------------------------------------------
 1 | TRIMBIN, TRIMENV = env_bin_from_config(config,'TRIMMING')
 2 | 
 3 | if paired == 'paired':
 4 |     rule fastp_trim:
 5 |         input:  r1 = lambda wildcards: "FASTQ/{rawfile}_R1.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_R1_dedup.fastq.gz",
 6 |                 r2 = lambda wildcards: "FASTQ/{rawfile}_R2.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_R2_dedup.fastq.gz"
 7 |         output: o1 = "TRIMMED_FASTQ/{combo}/{file}_R1_val_1.fq.gz" if not prededup else "TRIMMED_FASTQ/{combo}/{file}_R1_dedup_val_1.fq.gz",
 8 |                 o2 = "TRIMMED_FASTQ/{combo}/{file}_R2_val_2.fq.gz" if not prededup else "TRIMMED_FASTQ/{combo}/{file}_R2_dedup_val_2.fq.gz"
 9 |         log:    "LOGS/{combo}/{file}_trim.log"
10 |         conda: ""+TRIMENV+".yaml"
11 |         threads: MAXTHREAD
12 |         params: odir = lambda wildcards, output:os.path.dirname(output.o1),
13 |                 tpara = lambda wildcards: tool_params(wildcards.file, None, config, "TRIMMING", TRIMENV).get('TRIM', ""),
14 |                 trim=TRIMBIN
15 |         shell:  "{params.trim} --thread {threads} --in1 {input.r1} --in2 {input.r2} --out1 {output.o1} --out2 {output.o2} {params.tpara}"
16 | 
17 |     rule fastp_rename:
18 |         input:  o1 = rules.fastp_trim.output.o1,
19 |                 o2 = rules.fastp_trim.output.o2
20 |         output: r1 = "TRIMMED_FASTQ/{combo}/{file}_R1_trimmed.fastq.gz",
21 |                 r2 = "TRIMMED_FASTQ/{combo}/{file}_R2_trimmed.fastq.gz"
22 |         conda: ""+TRIMENV+".yaml"
23 |         threads: 1
24 |         shell:  "mv {input.o1} {output.r1} && mv {input.o2} {output.r2}"
25 | else:
26 |     rule fastp_trim:
27 |         input:  r1 = lambda wildcards: "FASTQ/{rawfile}.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_dedup.fastq.gz"
28 |         output: o1 = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fq.gz" if not prededup else "TRIMMED_FASTQ/{combo}/{file}_dedup_trimmed.fq.gz"
29 |         log:    "LOGS/{combo}/{file}_trim.log"
30 |         conda: ""+TRIMENV+".yaml"
31 |         threads: MAXTHREAD
32 |         params: odir = lambda wildcards, output: os.path.dirname(output.o1),
33 |                 tpara = lambda wildcards: tool_params(wildcards.file, None, config, "TRIMMING", TRIMENV).get('TRIM',""),
34 |                 trim = TRIMBIN,
35 |         shell:  "{params.trim} --thread {threads} -i {input.r1} -o {output.o1} {params.tpara}"
36 | 
37 |     rule fastp_rename:
38 |         input:  o1 = rules.fastp_trim.output.o1
39 |         output: r1 = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fastq.gz"
40 |         conda: ""+TRIMENV+".yaml"
41 |         threads: 1
42 |         shell:  "mv {input.o1} {output.r1}"
43 | 


--------------------------------------------------------------------------------
/scripts/Analysis/GOA.R:
--------------------------------------------------------------------------------
 1 | #source("http://bioconductor.org/biocLite.R")
 2 | #biocLite("topGO")
 3 | #if (!requireNamespace("BiocManager", quietly=TRUE))
 4 | #    install.packages("BiocManager")
 5 | #BiocManager::install("topGO")
 6 | 
 7 | suppressPackageStartupMessages({
 8 |     require(topGO)
 9 |     require(Rgraphviz)
10 | })
11 | 
12 | #define notin
13 | `%notin%` = Negate(`%in%`)
14 | 
15 | args <- commandArgs(TRUE)
16 | background <- args[1]
17 | test <- args[2]
18 | GOs <- args[3]
19 | 
20 | #run GO
21 | geneID2GO <- readMappings(GOs, sep = "\t", IDsep = ",")
22 | expressedGenes <- read.table(background,sep="\t")
23 | Genes <- read.table(test,sep="\t")
24 | GenesOI <- Genes$V1
25 | geneList <- factor(as.integer(expressedGenes$V1 %in% GenesOI))
26 | names(geneList) <- expressedGenes$V1
27 | 
28 | GOdata <- new("topGOdata",
29 |               ontology = "MF",
30 |               allGenes = geneList,
31 |               geneSel = GenesOI,
32 |               annot = annFUN.gene2GO,  # the new annotation function
33 |               gene2GO = geneID2GO)    ## the gene ID to GOs dataset
34 | 
35 | test.stat <- new("classicCount", testStatistic = GOFisherTest, name = "Fisher test")
36 | resultFisher <- getSigGroups(GOdata, test.stat)
37 | pvalFis <- score(resultFisher)
38 | resultWeight <- getSigGroups(GOdata, test.stat)
39 | pvalWeight <- score(resultWeight, whichGO = names(pvalFis))
40 | cor(pvalFis, pvalWeight)
41 | geneData(resultWeight)
42 | allRes <- GenTable(GOdata, classic = resultFisher, weight = resultWeight, orderBy = "weight", ranksOf = "classic", topNodes = 20)
43 | write.table(allRes, file = paste("TopGO_MF",test,sep="_"), append = FALSE, quote = TRUE, sep = "\t", eol = "\n", na = "NA", dec = ".", row.names = TRUE, col.names = TRUE, qmethod = "double")
44 | printGraph(GOdata, resultFisher, firstSigNodes = 10, fn.prefix = paste("TopGO_MFGraph",test,sep="_"), useInfo = "all", pdfSW = TRUE)
45 | 
46 | GOdata <- new("topGOdata",
47 |               ontology = "BP",
48 |               allGenes = geneList,
49 |               geneSel = GenesOI,
50 |               annot = annFUN.gene2GO,  # the new annotation function
51 |               gene2GO = geneID2GO)    ## the gene ID to GOs dataset
52 | 
53 | test.stat <- new("classicCount", testStatistic = GOFisherTest, name = "Fisher test")
54 | resultFisher <- getSigGroups(GOdata, test.stat)
55 | pvalFis <- score(resultFisher)
56 | resultWeight <- getSigGroups(GOdata, test.stat)
57 | pvalWeight <- score(resultWeight, whichGO = names(pvalFis))
58 | cor(pvalFis, pvalWeight)
59 | geneData(resultWeight)
60 | allRes <- GenTable(GOdata, classic = resultFisher, weight = resultWeight, orderBy = "weight", ranksOf = "classic", topNodes = 20)
61 | write.table(allRes, file = paste("TopGO_BP",test,sep="_"), append = FALSE, quote = TRUE, sep = "\t", eol = "\n", na = "NA", dec = ".", row.names = TRUE, col.names = TRUE, qmethod = "double")
62 | printGraph(GOdata, resultFisher, firstSigNodes = 10, fn.prefix = paste("TopGO_BPGraph",test,sep="_"), useInfo = "all", pdfSW = TRUE)
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | 
16 | sys.path.insert(0, os.path.abspath("."))
17 | sys.path.insert(0, os.path.abspath(".."))
18 | sys.path.insert(0, os.path.abspath("../MONSDA"))
19 | 
20 | from MONSDA import _version
21 | 
22 | __version__ = _version.get_versions()["version"]
23 | 
24 | # -- Project information -----------------------------------------------------
25 | 
26 | project = "MONSDA"
27 | copyright = "2020, Joerg Fallmann"
28 | author = "Joerg Fallmann"
29 | 
30 | # The full version, including alpha/beta/rc tags
31 | release = __version__
32 | 
33 | # -- General configuration ---------------------------------------------------
34 | # Master file to be generated
35 | 
36 | master_doc = "index"
37 | 
38 | # Add any Sphinx extension module names here, as strings. They can be
39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
40 | # ones.
41 | extensions = [
42 |     "sphinx.ext.autodoc",
43 |     "sphinx.ext.doctest",
44 |     "sphinx.ext.mathjax",
45 |     "sphinx.ext.napoleon",
46 |     "sphinx.ext.graphviz",
47 |     "recommonmark",
48 |     "sphinx_rtd_theme",
49 | ]
50 | 
51 | # Add any paths that contain templates here, relative to this directory.
52 | templates_path = ["source/_templates"]
53 | 
54 | # List of patterns, relative to source directory, that match files and
55 | # directories to ignore when looking for source files.
56 | # This pattern also affects html_static_path and html_extra_path.
57 | exclude_patterns = []
58 | 
59 | 
60 | # -- Options for HTML output -------------------------------------------------
61 | 
62 | # The theme to use for HTML and HTML Help pages.  See the documentation for
63 | # a list of builtin themes.
64 | #
65 | # html_theme = 'classic'
66 | html_theme = "sphinx_rtd_theme"
67 | 
68 | html_theme_options = {
69 |     # Toc options
70 |     "collapse_navigation": True,
71 |     "sticky_navigation": True,
72 |     "navigation_depth": 4,
73 |     "includehidden": True,
74 |     "titles_only": False,
75 | }
76 | 
77 | # Add any paths that contain custom static files (such as style sheets) here,
78 | # relative to this directory. They are copied after the builtin static files,
79 | # so a file named "default.css" will overwrite the builtin "default.css".
80 | html_static_path = ["source/_static"]
81 | pygments_style = "sphinx"
82 | 
83 | 
84 | # Add custom css to prevent tables with wide side-scrolling
85 | def setup(app):
86 |     app.add_css_file("css/custom.css")
87 | 


--------------------------------------------------------------------------------
/workflows/fastqc_raw.smk:
--------------------------------------------------------------------------------
 1 | QCBIN, QCENV = env_bin_from_config(config, 'QC')
 2 | 
 3 | if paired == 'paired':
 4 |     log.info('Running paired mode QC')
 5 |     rule qc_raw:
 6 |         input:  r1 = "FASTQ/{rawfile}_{read}.fastq.gz"
 7 |         output: o1 = report("QC{combo}{rawfile}_{read}_fastqc.zip")
 8 |         log:    "LOGS{combo}{rawfile}_fastqc_{read}_raw.log"
 9 |         conda:  ""+QCENV+".yaml"
10 |         threads: MAXTHREAD
11 |         params:  qpara = lambda wildcards: tool_params(SAMPLES[0], None, config, 'QC', QCENV)['OPTIONS'].get('QC', "")
12 |         shell: "OUT=$(dirname {output.o1});fastqc --quiet -o $OUT -t {threads} --noextract {params.qpara} -f fastq {input.r1} 2> {log}"
13 | 
14 |     rule multiqc:
15 |         input:  expand(rules.qc_raw.output.o1, rawfile=list(SAMPLES), read=['R1','R2'], combo=combo)
16 |         output: html = report("QC/Multi{combo}{condition}/multiqc_report.html", category="QC"),
17 |                 tmp = temp("QC/Multi{combo}{condition}/tmp"),
18 |                 lst = "QC/Multi{combo}{condition}/qclist_raw.txt"
19 |         log:    "LOGS{combo}{condition}_multiqc_raw.log"
20 |         conda:  ""+QCENV+".yaml"
21 |         threads: 1
22 |         params:  qpara = lambda wildcards: tool_params(SAMPLES[0], None, config, 'QC', QCENV)['OPTIONS'].get('MULTI', "")
23 |         shell:  "OUT=$(dirname {output.html}); for i in {input};do echo $(dirname \"${{i}}\") >> {output.tmp};done; cat {output.tmp} |sort -u > {output.lst};export LC_ALL=en_US.utf8; export LC_ALL=C.UTF-8; multiqc -f {params.qpara} --exclude picard --exclude gatk -k json -z -s -o $OUT -l {output.lst} 2> {log}"
24 | 
25 | else:
26 |     rule qc_raw:
27 |         input:  r1 = "FASTQ/{rawfile}.fastq.gz"
28 |         output: o1 = report("QC{combo}{rawfile}_fastqc.zip", category="QC")
29 |         log:    "LOGS{combo}{rawfile}_fastqc_raw.log"
30 |         conda:  ""+QCENV+".yaml"
31 |         threads: MAXTHREAD
32 |         params:  qpara = lambda wildcards: tool_params(SAMPLES[0], None, config, 'QC', QCENV)['OPTIONS'].get('QC', "")
33 |         shell: "OUT=$(dirname {output.o1});fastqc --quiet -o $OUT -t {threads} --noextract {params.qpara} -f fastq {input.r1} 2> {log}"
34 | 
35 |     rule multiqc:
36 |         input:  expand(rules.qc_raw.output.o1, rawfile=list(SAMPLES), combo=combo)
37 |         output: html = report("QC/Multi{combo}{condition}/multiqc_report.html", category="QC"),
38 |                 tmp = temp("QC/Multi{combo}{condition}/tmp"),
39 |                 lst = "QC/Multi{combo}{condition}/qclist_raw.txt"
40 |         log:    "LOGS{combo}{condition}_multiqc_raw.log"
41 |         conda:  ""+QCENV+".yaml"
42 |         threads: 1
43 |         params:  qpara = lambda wildcards: tool_params(SAMPLES[0], None, config, 'QC', QCENV)['OPTIONS'].get('MULTI', "")
44 |         shell:  "OUT=$(dirname {output.html}); for i in {input};do echo $(dirname \"${{i}}\") >> {output.tmp};done; cat {output.tmp} |sort -u > {output.lst};export LC_ALL=en_US.utf8; export LC_ALL=C.UTF-8; multiqc -f {params.qpara} --exclude picard --exclude gatk -k json -z -s -o $OUT -l {output.lst} 2> {log}"
45 | 


--------------------------------------------------------------------------------
/docs/source/wrapper.rst:
--------------------------------------------------------------------------------
 1 | Wrapping Workflows
 2 | ==================
 3 | 
 4 | In general **MONSDA** is *Python3* software, that wraps workflows by assembling subworkflows or single tools from `.smk` or `.nf` templates. The idea here is that tools and subworkflows for similar tasks are designed in a way that starts from the same input and results in the same output. This is not only true for single workflow steps which can be performed by multiple tools, but also for the wrapped workflow management systems (WMS). In principal output generated in `Nextflow` mode should be suitable as input for `Snakemake` and vice versa. This means that, for example, mapping output generated in `Nextflow` mode can be used as input for *DE* analysis in `Snakemake` mode, while both work from the same **config.json**.
 5 | 
 6 | As Snakemake is also written in *Python*, wrapping workflows is similar to the built-in way of submodule assembly, although we take care that submodules for the same task remain interchangeable. Wrapping Nextflow is slightly different, as `MONSDA` has to assemble *Groovy* text blocks, which does not make any difference to the end user, but requires to translate configuration from the config file to Nextflow parsable command lines. However, the idea of creating interchangeable subworkflows or tool specific code fragments stays the same.
 7 | 
 8 | Independently of the wrapped WMS, workflows are split internally in three independent stages. *PREPROCESSING* includes all workflow steps that generate or manipulate FASTQ files, to make them available to the *PROCESSING* stage. This includes download of files from SRA, basecalling with Guppy and pre-quality-control, so all workflow steps that do not require identical input formats, but lead to similar output.
 9 | 
10 | *PROCESSING* starts from FASTQ files and includes trimming, deduplication, mapping and quality control for all subprocesses.
11 | 
12 | *POSTPROCESSING* builds upon *PROCESSING* output and includes quantification, differential expression analysis on gene, transcript and exon level, generation of tracks for UCSC or other genome browsers, peak finding and circular RNA identification. In contrast to *PROCESSING* workflows, these steps do not require output to be of similar format but are able to work from the same input.
13 | 
14 | In case dedicated workflows need to be established, as is the case for example for cyPhyRNA-Seq, the main idea is to split all preprocessing and processing steps in units that remain interchangeable, and deliver dedicated post-processing subworkflows which can work on their output. In the mentioned example we have quality control, trimming, mapping and deduplication embedded in standard workflows and dedicated, specific postprocessing of mapped reads wrapped in the PEAKS postprocessing step.
15 | 
16 | For new workflows, we aim to split those into as small subunits as possible to make subworkflows available for other pipelines and add dedicated parts as postprocessing to currently established categories. In case new categories need to be defined, please contact us to discuss how this can be embedded.
17 | 
18 | 


--------------------------------------------------------------------------------
/workflows/cutadapt.smk:
--------------------------------------------------------------------------------
 1 | TRIMBIN, TRIMENV = env_bin_from_config(config,'TRIMMING')
 2 | 
 3 | if paired == 'paired':
 4 |     rule cutadapt_trim:
 5 |         input:  r1 = lambda wildcards: "FASTQ/{rawfile}_R1.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_R1_dedup.fastq.gz",
 6 |                 r2 = lambda wildcards: "FASTQ/{rawfile}_R2.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_R2_dedup.fastq.gz"
 7 |         output: o1 = "TRIMMED_FASTQ/{combo}/{file}_R1_val_1.fq.gz" if not prededup else "TRIMMED_FASTQ/{combo}/{file}_R1_dedup_val_1.fq.gz",
 8 |                 o2 = "TRIMMED_FASTQ/{combo}/{file}_R2_val_2.fq.gz" if not prededup else "TRIMMED_FASTQ/{combo}/{file}_R2_dedup_val_2.fq.gz"
 9 |         log:    "LOGS/{combo}/{file}_trim.log"
10 |         conda: ""+TRIMENV+".yaml"
11 |         threads: min(int(MAXTHREAD/8),4) if min(int(MAXTHREAD/8),4) >= 1 else (4 if int(MAXTHREAD) >= 4 else 1)
12 |         params: odir=lambda wildcards, output:os.path.dirname(output.o1),
13 |                 tpara = lambda wildcards: tool_params(wildcards.file, None, config, "TRIMMING", TRIMENV)['OPTIONS'].get('TRIM', ""),
14 |                 trim=TRIMBIN
15 |         shell:  "{params.trim} {params.tpara} --cores {threads} -o {output.o1} -p {output.o2} {input.r1} {input.r2} &> {log}"
16 | 
17 |     rule cutadapt_rename:
18 |         input:  o1 = rules.cutadapt_trim.output.o1,
19 |                 o2 = rules.cutadapt_trim.output.o2
20 |         output: r1 = "TRIMMED_FASTQ/{combo}/{file}_R1_trimmed.fastq.gz",
21 |                 r2 = "TRIMMED_FASTQ/{combo}/{file}_R2_trimmed.fastq.gz"
22 |         conda: ""+TRIMENV+".yaml"
23 |         threads: 1
24 |         shell:  "mv {input.o1} {output.r1} && mv {input.o2} {output.r2}"
25 | 
26 | else:
27 |     rule cutadapt_trim:
28 |         input:  r1 = lambda wildcards: "FASTQ/{rawfile}.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_dedup.fastq.gz"
29 |         output: o1 = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fq.gz" if not prededup else "TRIMMED_FASTQ/{combo}/{file}_dedup_trimmed.fq.gz"
30 |         log:    "LOGS/{combo}/{file}_trim.log"
31 |         conda: ""+TRIMENV+".yaml"
32 |         threads: min(int(MAXTHREAD/8),4) if min(int(MAXTHREAD/2),4) >= 1 else (4 if int(MAXTHREAD) >= 4 else 1)
33 |         params: odir=lambda wildcards, output: os.path.dirname(output.o1),
34 |                 tpara = lambda wildcards:tool_params(wildcards.file, None, config, "TRIMMING", TRIMENV)['OPTIONS'].get('TRIM', ""),
35 |                 trim=TRIMBIN,
36 |         shell:  "{params.trim} {params.tpara} --cores {threads} -o {output.o1} {input.r1} > {log}"
37 | 
38 |     rule cutadapt_rename:
39 |         input:  o1 = rules.cutadapt_trim.output.o1
40 |         output: r1 = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fastq.gz"
41 |         conda: ""+TRIMENV+".yaml"
42 |         threads: 1
43 |         shell:  "mv {input.o1} {output.r1}"
44 | 


--------------------------------------------------------------------------------
/workflows/minimap.smk:
--------------------------------------------------------------------------------
 1 | MAPPERBIN, MAPPERENV = env_bin_from_config(config,'MAPPING')
 2 | keydict = sub_dict(tool_params(SAMPLES[0], None, config, 'MAPPING', MAPPERENV)['OPTIONS'], ['INDEX'])
 3 | keydict["REF"] = REFERENCE
 4 | keydict["DECOY"] = DECOY
 5 | keydict["ENV"] = MAPPERENV
 6 | unik = get_dict_hash(keydict)
 7 | 
 8 | rule generate_index:
 9 |     input:  fa = REFERENCE
10 |     output: idx = INDEX,
11 |             uidx = expand("{refd}/INDICES/{mape}_{unikey}.idx", refd=REFDIR, mape=MAPPERENV, unikey=unik)
12 |     log:    expand("LOGS/{sets}/{mape}.idx.log", sets=SETS, mape=MAPPERENV)
13 |     conda:  ""+MAPPERENV+".yaml"
14 |     threads: MAXTHREAD
15 |     params: indexer=MAPPERBIN,
16 |             ipara = lambda wildcards, input: tool_params(SAMPLES[0], None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('INDEX', ""),
17 |             linkidx = lambda wildcards, output: str(os.path.abspath(output.uidx[0]))
18 |     shell: "{params.indexer} -t {threads} -d {output.uidx} {params.ipara} {input.fa} 2> {log} && ln -s {params.linkidx} {output.idx}"
19 | 
20 | if paired == 'paired':
21 |     rule mapping:
22 |         input:  q1 = "TRIMMED_FASTQ/{combo}/{file}_R1_trimmed.fastq.gz",
23 |                 q2 = "TRIMMED_FASTQ/{combo}/{file}_R2_trimmed.fastq.gz",
24 |                 uidx = rules.generate_index.output.uidx[0]
25 |         output: mapped = temp(report("MAPPED/{combo}/{file}_mapped.sam.gz", category="MAPPING")),
26 |                 unmapped1 = "UNMAPPED/{combo}/{file}_R1_unmapped.fastq.gz",
27 |                 unmapped2 = "UNMAPPED/{combo}/{file}_R2_unmapped.fastq.gz"
28 |         log:    "LOGS/{combo}/{file}/mapping.log"
29 |         conda:  ""+MAPPERENV+".yaml"
30 |         threads: MAXTHREAD
31 |         params: mpara = lambda wildcards: tool_params(wildcards.file, None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('MAP', ""),
32 |                 mapp = MAPPERBIN
33 |         shell: "{params.mapp} -t {threads} {params.mpara} {input.uidx} {input.q1} {input.q2} 2> {log}| tee >(samtools view -h -F 4 |gzip > {output.mapped}) >(samtools view -h -f 4 |samtools collate -u -O -|samtools fastq -n -c 6 -1 {output.unmapped1} -2 {output.unmapped2} - ) 2>> {log} &>/dev/null && touch {output.unmapped1} {output.unmapped2}"
34 | 
35 | else:
36 |     rule mapping:
37 |         input:  query = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fastq.gz",
38 |                 uidx = rules.generate_index.output.uidx[0]
39 |         output: mapped = temp(report("MAPPED/{combo}/{file}_mapped.sam.gz", category="MAPPING")),
40 |                 unmapped = "UNMAPPED/{combo}/{file}_unmapped.fastq.gz"
41 |         log:    "LOGS/{combo}/{file}/mapping.log"
42 |         conda:  ""+MAPPERENV+".yaml"
43 |         threads: MAXTHREAD
44 |         params: mpara = lambda wildcards: tool_params(wildcards.file, None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('MAP', ""),
45 |                 mapp = MAPPERBIN
46 |         shell: "{params.mapp} -t {threads} {params.mpara} {input.uidx} {input.query} 2> {log}| tee >(samtools view -h -F 4 |gzip > {output.mapped}) >(samtools view -h -f 4 |samtools fastq -n - | pigz > {output.unmapped}) 2>> {log} &>/dev/null && touch {output.unmapped}"
47 | 


--------------------------------------------------------------------------------
/docs/source/first.rst:
--------------------------------------------------------------------------------
 1 | ============
 2 | First Steps
 3 | ============
 4 | 
 5 | **MONSDA** acts a wrapper around **Snakemake** or **Nextflow** based on a user defined **config.json** file.  This **config.json** holds all the information that is needed to run the jobs and will be parsed by **MONSDA** and split into independent sub-configs that can later be found in the directory **SubSnakes** or **SubFlows** respectively. Command line execution calls are stored in the directory *JOBS*, so users can manipulate those or rerun them manually as needed. By default, however, **MONSDA** will run those jobs automatically either locally, or through **Snakemake's** or **Nextflow's** integrated cluster interfaces.
 6 | 
 7 | To successfully run an analysis pipeline, a few steps have to be followed:
 8 |   * Install MONSDA either via **bioconda** or **pip** following the instruction in :ref:`install`
 9 |   * Directory structure: The structure for the directories is dictated by :ref:`condition-tree` in the config file
10 |   * Config file: This is the central part of a **MONSDA** run. Depending on :ref:`config-file` **MONSDA** will determine processing steps and generate corresponding config and workflow files to run each subworkflow until all processing steps are done.
11 | 
12 | 
13 | In general it is necessary to write a configuration file containing
14 | information on paths, files to process and settings beyond default for
15 | mapping tools and others.  The template on which analysis is based on can
16 | be found in the **config** directory and will be explained in detail later.
17 | 
18 | To create a working environment for this repository please install the
19 | **MONSDA.yaml** environment (if not installed via **bioconda**) as found in the **envs** directory
20 | like so:
21 | 
22 | ::
23 | 
24 |   conda env create -n monsda -f envs/MONSDA.yaml
25 | 
26 | The **envs** directory holds all the environments needed to run the pipelines in the **workflows** directory,
27 | these will be installed automatically when needed.
28 | 
29 | For fast resolve of conda packages, we recommend conda-libmamba-solver_ which is a new solver for the conda package manager and speeds up conda without the need to install mamba  and is shipped with **MONSDA**. However, the user if free to use mamba_ which is currently also the standard conda-frontend for Snakemake_. 
30 | 
31 | .. _mamba: https://mamba.readthedocs.io/en/latest/
32 | .. _conda-libmamba-solver: https://github.com/conda-incubator/conda-libmamba-solver
33 | 
34 | For distribution of jobs one can either rely on local hardware, use
35 | scheduling software like
36 | Slurm_ or the SGE_
37 | or follow any other integration in
38 | Snakemake_ or Nextflow_
39 | but be aware that most of these have not been tested for this
40 | repository and usually require additional system dependent setup and
41 | configuration.
42 | 
43 | .. _Slurm: https://slurm.schedmd.com/documentation.html
44 | .. _SGE: https://docs.oracle.com/cd/E19957-01/820-0699/chp1-1/index.html
45 | .. _Snakemake: https://Snakemake.readthedocs.io/en/stable/executing/cluster-cloud.html
46 | .. _Nextflow: https://www.Nextflow.io/docs/latest/awscloud.html#aws-batch
47 | 
48 | This manual will only show examples on local and SLURM usage.
49 | 


--------------------------------------------------------------------------------
/workflows/segemehl.smk:
--------------------------------------------------------------------------------
 1 | MAPPERBIN, MAPPERENV = env_bin_from_config(config,'MAPPING')
 2 | keydict = sub_dict(tool_params(SAMPLES[0], None, config, 'MAPPING', MAPPERENV)['OPTIONS'], ['INDEX'])
 3 | keydict["REF"] = REFERENCE
 4 | keydict["DECOY"] = DECOY
 5 | keydict["ENV"] = MAPPERENV
 6 | unik = get_dict_hash(keydict)
 7 | 
 8 | rule generate_index:
 9 |     input:  fa = REFERENCE
10 |     output: idx = INDEX,
11 |             uidx = expand("{refd}/INDICES/{mape}_{unikey}.idx", refd=REFDIR, mape=MAPPERENV, unikey=unik)
12 |     log:    expand("LOGS/{sets}/{mape}.idx.log", sets=SETS, mape=MAPPERENV)
13 |     conda:  ""+MAPPERENV+".yaml"
14 |     threads: MAXTHREAD
15 |     params: indexer = MAPPERBIN,
16 |             ipara = lambda wildcards, input: tool_params(SAMPLES[0], None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('INDEX', ""),
17 |             linkidx = lambda wildcards, output: str(os.path.abspath(output.uidx[0]))
18 |     shell: "{params.indexer} --threads {threads} {params.ipara} -d {input.fa} -x {output.uidx} &> {log} && ln -fs {params.linkidx} {output.idx}"
19 | 
20 | if paired == 'paired':
21 |     rule mapping:
22 |         input:  r1 = "TRIMMED_FASTQ/{combo}/{file}_R1_trimmed.fastq.gz",
23 |                 r2 = "TRIMMED_FASTQ/{combo}/{file}_R2_trimmed.fastq.gz",
24 |                 uidx = rules.generate_index.output.uidx[0],
25 |                 fa = REFERENCE
26 |         output: mapped = temp(report("MAPPED/{combo}/{file}_mapped.sam.gz", category="MAPPING")),
27 |                 unmapped1 = "UNMAPPED/{combo}/{file}_R1_unmapped.fastq.gz",
28 |                 unmapped2 = "UNMAPPED/{combo}/{file}_R2_unmapped.fastq.gz"
29 |         log:    "LOGS/{combo}/{file}/mapping.log"
30 |         conda:  ""+MAPPERENV+".yaml"
31 |         threads: MAXTHREAD
32 |         params: mpara = lambda wildcards: tool_params(wildcards.file, None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('MAP', ""),
33 |                 mapp=MAPPERBIN
34 |         shell: "{params.mapp} {params.mpara} -d {input.fa} -i {input.uidx} -q {input.r1} -p {input.r2} --threads {threads} 2> {log}| tee >(samtools view -h -F 4 |gzip > {output.mapped}) >(samtools view -h -f 4 |samtools collate -u -O -|samtools fastq -n -c 6 -1 {output.unmapped1} -2 {output.unmapped2} - ) 2>> {log} &>/dev/null && touch {output.unmapped1} {output.unmapped2}"
35 | 
36 | else:
37 |     rule mapping:
38 |         input:  query = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fastq.gz",
39 |                 uidx = rules.generate_index.output.uidx[0],
40 |                 fa = REFERENCE
41 |         output: mapped = temp(report("MAPPED/{combo}/{file}_mapped.sam.gz", category="MAPPING")),
42 |                 unmapped = "UNMAPPED/{combo}/{file}_unmapped.fastq.gz"
43 |         log:    "LOGS/{combo}/{file}/mapping.log"
44 |         conda:  ""+MAPPERENV+".yaml"
45 |         threads: MAXTHREAD
46 |         params: mpara = lambda wildcards: tool_params(wildcards.file, None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('MAP', ""),
47 |                 mapp=MAPPERBIN
48 |         shell: "{params.mapp} {params.mpara} -d {input.fa} -i {input.uidx} -q {input.query} --threads {threads} 2> {log}| tee >(samtools view -h -F 4 |gzip > {output.mapped}) >(samtools view -h -f 4 |samtools fastq -n - | pigz > {output.unmapped}) 2>> {log} &>/dev/null && touch {output.unmapped}"
49 | 


--------------------------------------------------------------------------------
/docs/source/conditiontree.rst:
--------------------------------------------------------------------------------
 1 | .. _condition-tree:
 2 | 
 3 | The Condition-Tree
 4 | ==================
 5 | 
 6 | A key concept behind **MONSDA** is that config files are split according to conditions defined in the **config** file and each subworkflow is then run consecutively. This separates workflows in independent subworkflows, each potentially running on different input, with differing options and executables, without danger of interfering with each other.
 7 | 
 8 | As described in :ref:`preparation`, you should have an idea of what to analyze and how to split up your conditions if needed. For each ID you work on, you can define no, one or multiple conditions and settings that will be used for the analysis. The condition-tree also defines the directory structure to follow for Input and Output directories. We assume a general tree to look something like
 9 | 
10 | .. code-block:: json
11 | 
12 |     'ID' -> 'CONDITION' -> 'SETTING'
13 | 
14 | Here *ID* is the first level and optional *Condition* the second. *Setting* is used by **MONSDA** to enable processing of the same samples under different settings or commandline options for e.g. mapping tools, trimming tools and later also postprocessing tools. **MONSDA** will also build an output directory based on the combination of tools used, e.g. fastqc-cutadapt-star-umitools, to indicate which combination of tools was used to generate the output and to prevent results from being mixed or overwritten.
15 | 
16 | As an example, I want to analyze samples retrieved from LabA on 01012020 (yes that happens), with the mapping tools star and hisat, my condition-tree would look like this **LabA:01012020** and my FASTQ input directory would resemble that like **FASTQ/LabA/01012020**. The '01012020' directory would thereby contain all the fastq.gz files I need for analysis as stated in the corresponding config file. As we assume that settings may change but the input files will stay the same, **MONSDA** will search one level upwards of the deepest level in the condition-tree. This means, if you have a tree like:
17 | 
18 | .. code-block:: json
19 | 
20 |     'ID1' -> 'CONDITION1' -> 'SETTING1', 'SETTING2', 'SETTING3'
21 | 
22 | You do not need to copy input from **FASTQ/LabA/01012020** to **FASTQ/LabA/01012020/SETTING1/2/3**, instead **MONSDA** will find the input in **FASTQ/LabA/01012020** and generate output directories which contain the *Setting* level. This works of course also if you want to analyze samples from different dates and same lab with same settings or different labs and so on.
23 | 
24 | **MONSDA** will automatically define a unique *tool-key* based on currently enabled workflow steps and the combination of tools defined in the config file. From that information it will generate output folders like **MAPPING/LabA/01012020/star** and **MAPPING/LabA/01012020/hisat** if no other tools and workflow steps where configured to be used.
25 | 
26 | Optionally a user can also run one or the other tool with different settings for example to benchmark tools. Define for example  **map_stringent** and **map_relaxed** and indicate this on the *MAPPING* level in the config file. FASTQ input will still be found in **FASTQ/LabA/01012020** while output files will appear in **MAPPING/LabA/01012020/map_stringent/star** and **MAPPING/LabA/01012020/map_stringent/hisat** or **MAPPING/LabA/01012020/map_relaxed/star** and **MAPPING/LabA/01012020/map_relaxed/hisat** respectively.
27 | 


--------------------------------------------------------------------------------
/scripts/Analysis/AddStructure.py:
--------------------------------------------------------------------------------
  1 | #/usr/bin/env python3
  2 | # AddStructure.py ---
  3 | #
  4 | # Filename: AddStructure.py
  5 | # Description:
  6 | # Author: Joerg Fallmann
  7 | # Maintainer:
  8 | # Created: Tue Sep 10 18:00:42 2019 (+0200)
  9 | # Version:
 10 | # Package-Requires: ()
 11 | # Last-Updated: Wed Sep 11 09:10:01 2019 (+0200)
 12 | #           By: Joerg Fallmann
 13 | #     Update #: 33
 14 | # URL:
 15 | # Doc URL:
 16 | # Keywords:
 17 | # Compatibility:
 18 | #
 19 | #
 20 | 
 21 | # Commentary:
 22 | #
 23 | #
 24 | #
 25 | #
 26 | 
 27 | # Change Log:
 28 | #
 29 | #
 30 | #
 31 | #
 32 | # This program is free software: you can redistribute it and/or modify
 33 | # it under the terms of the GNU General Public License as published by
 34 | # the Free Software Foundation, either version 3 of the License, or (at
 35 | # your option) any later version.
 36 | #
 37 | # This program is distributed in the hope that it will be useful, but
 38 | # WITHOUT ANY WARRANTY; without even the implied warranty of
 39 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 40 | # General Public License for more details.
 41 | #
 42 | # You should have received a copy of the GNU General Public License
 43 | # along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
 44 | #
 45 | #
 46 | 
 47 | # Code:
 48 | ###Imports
 49 | import sys,os
 50 | import argparse
 51 | import traceback as tb
 52 | import gzip
 53 | import RNA
 54 | 
 55 | ###Arguments
 56 | def parseargs():
 57 | 	parser = argparse.ArgumentParser(description='Add structure to (bed) file containing sequence')
 58 | 	parser.add_argument("-f", "--field", type=int, default=0, help='Which field contains the sequence, default is last field (0)')
 59 | 	parser.add_argument("-b", "--bed", type=str, help='Bed or other tab separated file containing the sequence')
 60 | 
 61 | 	return parser.parse_args()
 62 | 
 63 | ###CODE
 64 | 
 65 | def addseq(field, bed):
 66 |     try:
 67 |         entries = parse_bed(bed)
 68 |         for line in entries:
 69 |             sequence = line.rstrip().split('\t')[field-1].upper()
 70 |             # create model details
 71 |             md = RNA.md()
 72 |             # create new fold_compound object
 73 |             fc = RNA.fold_compound(sequence, md)
 74 |             # compute minimum free energy (mfe) and corresponding structure
 75 |             (ss, mfe) = fc.mfe()
 76 | 
 77 |             sys.stdout.write('\t'.join([line.strip(),ss])+'\n')
 78 | 
 79 |     except Exception as err:
 80 |         exc_type, exc_value, exc_tb = sys.exc_info()
 81 |         tbe = tb.TracebackException(
 82 |             exc_type, exc_value, exc_tb,
 83 |         )
 84 |         with open('error','a') as h:
 85 |             print(''.join(tbe.format()), file=h)
 86 | 
 87 | def parse_bed(bed, annotated=None):
 88 |     try:
 89 |         if os.path.isfile(os.path.abspath(bed)):
 90 |             if '.gz' in bed:
 91 |                 return gzip.open(bed,'rt')
 92 |             else:
 93 |                 return open(bed,'rt')
 94 | 
 95 |     except Exception as err:
 96 |         exc_type, exc_value, exc_tb = sys.exc_info()
 97 |         tbe = tb.TracebackException(
 98 |             exc_type, exc_value, exc_tb,
 99 |         )
100 |         with open('error','a') as h:
101 |             print(''.join(tbe.format()), file=h)
102 | 
103 | 
104 | ###MAIN
105 | if __name__ == '__main__':
106 | 	args=parseargs()
107 | 	addseq(args.field, args.bed)
108 | 
109 | #
110 | # AddStructure.py ends here
111 | 


--------------------------------------------------------------------------------
/workflows/bwameth.smk:
--------------------------------------------------------------------------------
 1 | MAPPERBIN, MAPPERENV = env_bin_from_config(config,'MAPPING')
 2 | keydict = sub_dict(tool_params(SAMPLES[0], None, config, "MAPPING", MAPPERENV)["OPTIONS"],["INDEX"],)
 3 | keydict["REF"] = REFERENCE
 4 | keydict["DECOY"] = DECOY
 5 | keydict["ENV"] = MAPPERENV
 6 | unik = get_dict_hash(keydict)
 7 | 
 8 | rule generate_index:
 9 |     input:  ref = REFERENCE
10 |     output: idx = directory(INDEX),
11 |             uidx = expand("{refd}/INDICES/{mape}_{unikey}/{pref}", refd=REFDIR, mape=MAPPERENV, unikey=unik, pref=PREFIX+os.path.basename(REFERENCE).replace(".gz", ""))
12 |     log:    expand("LOGS/{sets}/{mape}.idx.log", sets=SETS, mape=MAPPERENV)
13 |     conda:  ""+MAPPERENV+".yaml"
14 |     threads: 1
15 |     params: indexer = 'bwameth.py index-mem2',
16 |             ipara = lambda wildcards, input: tool_params(SAMPLES[0], None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('INDEX', ""),
17 |             linkidx = lambda wildcards, output: str(os.path.abspath(str.join(os.sep, str(output.uidx[0]).split(os.sep)[:-1]))) if PREFIX != '' else str(os.path.abspath(str(output.uidx[0]))),
18 |     shell:  "if [[ -f \"{output.uidx}\" ]]; then ln -fs {params.linkidx} {output.idx} && touch {output.uidx} && echo \"Found bwa index, continue with mapping\";else zcat {input.ref} > {output.uidx} && {params.indexer} {output.uidx} {params.ipara} 2> {log} && ln -fs {params.linkidx} {output.idx};fi"
19 | 
20 | if paired == 'paired':
21 |     rule mapping:
22 |         input:  r1 = "TRIMMED_FASTQ/{combo}/{file}_R1_trimmed.fastq.gz",
23 |                 r2 = "TRIMMED_FASTQ/{combo}/{file}_R2_trimmed.fastq.gz",
24 |                 uidx = rules.generate_index.output.uidx[0]
25 |         output: mapped = temp(report("MAPPED/{combo}/{file}_mapped.sam.gz", category="MAPPING")),
26 |                 unmapped1 = "UNMAPPED/{combo}/{file}_R1_unmapped.fastq.gz",
27 |                 unmapped2 = "UNMAPPED/{combo}/{file}_R2_unmapped.fastq.gz"
28 |         log:    "LOGS/{combo}/{file}/bwameth.log"
29 |         conda:  ""+MAPPERENV+".yaml"
30 |         threads: MAXTHREAD
31 |         params: mpara = lambda wildcards: tool_params(wildcards.file, None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get("MAP", ""),
32 |                 mapp = MAPPERBIN
33 |         shell: "{params.mapp} {params.mpara} --threads {threads} --reference {input.uidx} {input.r1} {input.r2}  2> {log}| tee >(samtools view -h -F 4 |gzip > {output.mapped}) >(samtools view -h -f 4 |samtools collate -u -O -|samtools fastq -n -c 6 -1 {output.unmapped1} -2 {output.unmapped2} ) 2>> {log} &>/dev/null && touch {output.unmapped1} {output.unmapped2}"
34 | else:
35 |     rule mapping:
36 |         input:  query = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fastq.gz",
37 |                 uidx = rules.generate_index.output.uidx[0],
38 |         output: mapped = temp(report("MAPPED/{combo}/{file}_mapped.sam.gz", category="MAPPING")),
39 |                 unmapped = "UNMAPPED/{combo}/{file}_unmapped.fastq.gz"
40 |         log:    "LOGS/{combo}/{file}/mapping.log"
41 |         conda:  ""+MAPPERENV+".yaml"
42 |         threads: MAXTHREAD
43 |         params: mpara = lambda wildcards: tool_params(wildcards.file, None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('MAP', ""),
44 |                 mapp = MAPPERBIN
45 |         shell:  "{params.mapp} {params.mpara} --threads {threads} --reference {input.uidx} {input.query} 2> {log}| tee >(samtools view -h -F 4 |gzip > {output.mapped}) >(samtools view -h -f 4 |samtools fastq -n - | pigz > {output.unmapped}) 2>> {log} &>/dev/null && touch {output.unmapped}"


--------------------------------------------------------------------------------
/workflows/fastqc_dedup_trim.nf:
--------------------------------------------------------------------------------
  1 | QCENV=get_always('QCENV')
  2 | QCBIN=get_always('QCBIN')
  3 | QCPARAMS = get_always('fastqc_params_QC') ?: ''
  4 | 
  5 | //QC RAW
  6 | process qc_raw{
  7 |     conda "$QCENV"+".yaml"
  8 |     cpus THREADS
  9 | 	cache 'lenient'
 10 |     //validExitStatus 0,1
 11 | 
 12 |     publishDir "${workflow.workDir}/../" , mode: 'link',
 13 |     saveAs: {filename ->
 14 |         if (filename.indexOf("zip") > 0)          "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip"
 15 |         else if (filename.indexOf("html") > 0)    "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html"
 16 |         else null
 17 |     }
 18 | 
 19 |     input:
 20 |     path read
 21 | 
 22 |     output:
 23 |     path "*.{zip,html}", emit: fastqc_results
 24 | 
 25 |     script:
 26 |     """
 27 |     fastqc --quiet -t ${task.cpus} $QCPARAMS --noextract -f fastq $read
 28 |     """
 29 | }
 30 | 
 31 | workflow QC_RAW{
 32 |     take: collection
 33 | 
 34 |     main:
 35 |     //SAMPLE CHANNELS
 36 |     if (PAIRED == 'paired'){
 37 |         SAMPLES = SAMPLES.collect{
 38 |             element -> return "${workflow.workDir}/../FASTQ/"+element+"_{R2,R1}.*fastq.gz"
 39 |         }
 40 |     }else{
 41 |         SAMPLES=SAMPLES.collect{
 42 |             element -> return "${workflow.workDir}/../FASTQ/"+element+".*fastq.gz"
 43 |         }
 44 |     }
 45 | 
 46 |     samples_ch = Channel.fromPath(SAMPLES.sort())
 47 | 
 48 |     qc_raw(samples_ch.collect())
 49 | 
 50 |     emit:
 51 |     qc = qc_raw.out.fastqc_results
 52 | }
 53 | 
 54 | 
 55 | //QC TRIM
 56 | 
 57 | process qc_trimmed{
 58 |     conda "$QCENV"+".yaml"
 59 |     cpus THREADS
 60 | 	cache 'lenient'
 61 |     //validExitStatus 0,1
 62 | 
 63 |     publishDir "${workflow.workDir}/../" , mode: 'link',
 64 |     saveAs: {filename ->
 65 |         if (filename.indexOf("zip") > 0)          "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip"
 66 |         else if (filename.indexOf("html") > 0)    "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html"
 67 |         else null
 68 |     }
 69 | 
 70 |     input:
 71 |     //val collect
 72 |     path read
 73 | 
 74 |     output:
 75 |     path "*.{zip,html}", emit: fastqc_results
 76 | 
 77 |     script:
 78 |     """
 79 |     fastqc --quiet -t ${task.cpus} $QCPARAMS --noextract -f fastq $read
 80 |     """
 81 | }
 82 | 
 83 | workflow QC_TRIMMING{
 84 |     take: collection
 85 | 
 86 |     main:
 87 |     
 88 |     qc_trimmed(collection.collect())
 89 | 
 90 |     emit:
 91 |     qc = qc_trimmed.out.fastqc_results
 92 | }
 93 | 
 94 | // DEDUP QC
 95 | 
 96 | process qc_dedup{
 97 |     conda "$QCENV"+".yaml"
 98 |     cpus THREADS
 99 | 	cache 'lenient'
100 |     //validExitStatus 0,1
101 | 
102 |     publishDir "${workflow.workDir}/../" , mode: 'link',
103 |     saveAs: {filename ->
104 |         if (filename.indexOf("zip") > 0)          "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip"
105 |         else if (filename.indexOf("html") > 0)    "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html"
106 |         else null
107 |     }
108 | 
109 |     input:
110 |     path read
111 | 
112 |     output:
113 |     path "*.{zip,html}", emit: fastqc_results
114 | 
115 |     script:
116 |     """
117 |     fastqc --quiet -t ${task.cpus} $QCPARAMS --noextract -f fastq $read
118 |     """
119 | }
120 | 
121 | workflow QC_DEDUP{
122 |     take: collection
123 | 
124 |     main:
125 |    
126 |     qc_dedup(collection.collect())
127 | 
128 |     emit:
129 |     qc = qc_dedup.out.fastqc_results
130 | }
131 | 


--------------------------------------------------------------------------------
/workflows/trimgalore.smk:
--------------------------------------------------------------------------------
 1 | TRIMBIN, TRIMENV = env_bin_from_config(config,'TRIMMING')
 2 | #outdir = 'TRIMMED_FASTQ'
 3 | 
 4 | #wildcard_constraints:
 5 | #    file = '|'.join(list(samplecond(SAMPLES, config))),
 6 | #    read = "R1|R2"
 7 | #    outdir = outdir
 8 | 
 9 | #rule trimthemall:
10 | #    input: expand("{outdir}{file}_{read}_trimmed.fastq.gz", outdir=outdir, file=samplecond(SAMPLES, config), read=["R1","R2"]) if paired == \'paired\' else expand("{outdir}{file}_trimmed.fastq.gz", outdir=outdir, file=samplecond(SAMPLES, config))
11 | 
12 | if paired == 'paired':
13 |     rule trimgalore_trim:
14 |         input:  r1 = lambda wildcards: "FASTQ/{rawfile}_R1.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_R1_dedup.fastq.gz",
15 |                 r2 = lambda wildcards: "FASTQ/{rawfile}_R2.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_R2_dedup.fastq.gz"
16 |         output: o1 = "TRIMMED_FASTQ/{combo}/{file}_R1_val_1.fq.gz" if not prededup else "TRIMMED_FASTQ/{combo}/{file}_R1_dedup_val_1.fq.gz",
17 |                 o2 = "TRIMMED_FASTQ/{combo}/{file}_R2_val_2.fq.gz" if not prededup else "TRIMMED_FASTQ/{combo}/{file}_R2_dedup_val_2.fq.gz"
18 |         log:   "LOGS/{combo}/{file}_trim.log"
19 |         conda: ""+TRIMENV+".yaml"
20 |         threads: min(int(MAXTHREAD/2),4) if min(int(MAXTHREAD/2),4) >= 1 else (4 if int(MAXTHREAD) >= 4 else 1)
21 |         params: odir=lambda wildcards, output:os.path.dirname(output.o1),
22 |                 tpara = lambda wildcards: tool_params(wildcards.file, None, config, "TRIMMING", TRIMENV)['OPTIONS'].get('TRIM', ""),
23 |                 trim=TRIMBIN
24 |         shell:  "{params.trim} --cores {threads} --paired --gzip {params.tpara} -o {params.odir} {input.r1} {input.r2} &> {log}"
25 | 
26 |     rule trimgalore_rename:
27 |         input:  o1 = rules.trimgalore_trim.output.o1,
28 |                 o2 = rules.trimgalore_trim.output.o2
29 |         output: r1 = "TRIMMED_FASTQ/{combo}/{file}_R1_trimmed.fastq.gz",
30 |                 r2 = "TRIMMED_FASTQ/{combo}/{file}_R2_trimmed.fastq.gz"
31 |         conda: ""+TRIMENV+".yaml"
32 |         threads: 1
33 |         shell:  "mv {input.o1} {output.r1} && mv {input.o2} {output.r2}"
34 | 
35 | else:
36 |     rule trimgalore_trim:
37 |         input:  r1 = lambda wildcards: "FASTQ/{rawfile}.fastq.gz".format(rawfile=[x for x in SAMPLES if x.split(os.sep)[-1] in wildcards.file][0]) if not prededup else "DEDUP_FASTQ/{combo}/{file}_dedup.fastq.gz"
38 |         output: o1 = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fq.gz" if not prededup else "TRIMMED_FASTQ/{combo}/{file}_dedup_trimmed.fq.gz"
39 |         log:    "LOGS/{combo}/{file}_trim.log"
40 |         conda: ""+TRIMENV+".yaml"
41 |         threads: min(int(MAXTHREAD/2),4) if min(int(MAXTHREAD/2),4) >= 1 else (4 if int(MAXTHREAD) >= 4 else 1)
42 |         params: odir = lambda wildcards, output: os.path.dirname(output.o1),
43 |                 tpara = lambda wildcards: tool_params(wildcards.file, None, config, "TRIMMING", TRIMENV)['OPTIONS'].get('TRIM', ""),
44 |                 trim=TRIMBIN
45 |         shell:  "{params.trim} --cores {threads} --gzip {params.tpara} -o {params.odir} {input.r1} &> {log}"
46 | 
47 |     rule trimgalore_rename:
48 |         input:  o1 = rules.trimgalore_trim.output.o1
49 |         output: r1 = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fastq.gz"
50 |         conda: ""+TRIMENV+".yaml"
51 |         threads: 1
52 |         shell:  "mv {input.o1} {output.r1}"
53 | 


--------------------------------------------------------------------------------
/envs/drimseq_DTU.yaml:
--------------------------------------------------------------------------------
  1 | name: drimseq_DTU
  2 | channels:
  3 |   - bioconda
  4 |   - conda-forge
  5 |   - defaults
  6 | dependencies:
  7 |   - bioconductor-annotationdbi =1.52.0
  8 |   - bioconductor-biobase =2.50.0
  9 |   - bioconductor-biocfilecache =1.14.0
 10 |   - bioconductor-biocgenerics =0.36.0
 11 |   - bioconductor-biocparallel =1.24.0
 12 |   - bioconductor-biomart =2.46.0
 13 |   - bioconductor-biostrings =2.58.0
 14 |   - bioconductor-delayedarray =0.16.0
 15 |   - bioconductor-drimseq =1.18.0
 16 |   - bioconductor-edger =3.32.0
 17 |   - bioconductor-genomeinfodb =1.26.0
 18 |   - bioconductor-genomeinfodbdata =1.2.4
 19 |   - bioconductor-genomicalignments =1.26.0
 20 |   - bioconductor-genomicfeatures =1.42.0
 21 |   - bioconductor-genomicranges =1.42.0
 22 |   - bioconductor-iranges =2.24.0
 23 |   - bioconductor-limma =3.46.0
 24 |   - bioconductor-matrixgenerics =1.2.0
 25 |   - bioconductor-rhtslib =1.22.0
 26 |   - bioconductor-rsamtools =2.6.0
 27 |   - bioconductor-rtracklayer =1.50.0
 28 |   - bioconductor-s4vectors =0.28.0
 29 |   - bioconductor-summarizedexperiment =1.20.0
 30 |   - bioconductor-tximport =1.18.0
 31 |   - bioconductor-xvector =0.30.0
 32 |   - bioconductor-zlibbioc =1.36.0
 33 |   - r-askpass =1.1
 34 |   - r-assertthat =0.2.1
 35 |   - r-backports =1.2.1
 36 |   - r-base =4.0.3
 37 |   - r-bh =1.75.0_0
 38 |   - r-bit =4.0.4
 39 |   - r-bit64 =4.0.5
 40 |   - r-bitops =1.0_6
 41 |   - r-blob =1.2.1
 42 |   - r-brio =1.1.1
 43 |   - r-cachem =1.0.4
 44 |   - r-callr =3.5.1
 45 |   - r-cli =2.3.0
 46 |   - r-colorspace =2.0_0
 47 |   - r-crayon =1.4.1
 48 |   - r-curl =4.3
 49 |   - r-dbi =1.1.1
 50 |   - r-dbplyr =2.1.0
 51 |   - r-desc =1.2.0
 52 |   - r-diffobj =0.3.3
 53 |   - r-digest =0.6.27
 54 |   - r-dplyr =1.0.4
 55 |   - r-ellipsis =0.3.1
 56 |   - r-evaluate =0.14
 57 |   - r-fansi =0.4.2
 58 |   - r-farver =2.0.3
 59 |   - r-fastmap =1.1.0
 60 |   - r-formatr =1.7
 61 |   - r-futile.logger =1.4.3
 62 |   - r-futile.options =1.0.1
 63 |   - r-generics =0.1.0
 64 |   - r-ggplot2 =3.3.3
 65 |   - r-glue =1.4.2
 66 |   - r-gtable =0.3.0
 67 |   - r-hms =1.0.0
 68 |   - r-httr =1.4.2
 69 |   - r-isoband =0.2.3
 70 |   - r-jsonlite =1.7.2
 71 |   - r-labeling =0.4.2
 72 |   - r-lambda.r =1.2.4
 73 |   - r-lattice =0.20_41
 74 |   - r-lifecycle =1.0.0
 75 |   - r-locfit =1.5_9.4
 76 |   - r-magrittr =2.0.1
 77 |   - r-mass =7.3_53.1
 78 |   - r-matrix =1.3_2
 79 |   - r-matrixstats =0.58.0
 80 |   - r-memoise =2.0.0
 81 |   - r-mgcv =1.8_34
 82 |   - r-mime =0.10
 83 |   - r-munsell =0.5.0
 84 |   - r-nlme =3.1_152
 85 |   - r-openssl =1.4.3
 86 |   - r-pillar =1.4.7
 87 |   - r-pkgbuild =1.2.0
 88 |   - r-pkgconfig =2.0.3
 89 |   - r-pkgload =1.1.0
 90 |   - r-plogr =0.2.0
 91 |   - r-plyr =1.8.6
 92 |   - r-praise =1.0.0
 93 |   - r-prettyunits =1.1.1
 94 |   - r-processx =3.4.5
 95 |   - r-progress =1.2.2
 96 |   - r-ps =1.5.0
 97 |   - r-purrr =0.3.4
 98 |   - r-r6 =2.5.0
 99 |   - r-rappdirs =0.3.3
100 |   - r-rcolorbrewer =1.1_2
101 |   - r-rcpp =1.0.6
102 |   - r-rcurl =1.98_1.2
103 |   - r-rematch2 =2.1.2
104 |   - r-reshape2 =1.4.4
105 |   - r-rlang =0.4.10
106 |   - r-rprojroot =2.0.2
107 |   - r-rsqlite =2.2.3
108 |   - r-rstudioapi =0.13
109 |   - r-scales =1.1.1
110 |   - r-snow =0.4_3
111 |   - r-stringi =1.5.3
112 |   - r-stringr =1.4.0
113 |   - r-sys =3.4
114 |   - r-testthat =3.0.2
115 |   - r-tibble =3.0.6
116 |   - r-tidyselect =1.1.0
117 |   - r-utf8 =1.1.4
118 |   - r-vctrs =0.3.6
119 |   - r-viridislite =0.3.0
120 |   - r-waldo =0.2.4
121 |   - r-withr =2.4.1
122 |   - r-xml =3.99_0.5
123 |   - r-xml2 =1.3.2
124 |   - r-zeallot =0.1.0
125 |   - readline =8.2
126 |   - sed =4.8


--------------------------------------------------------------------------------
/workflows/bwa2.smk:
--------------------------------------------------------------------------------
 1 | MAPPERBIN, MAPPERENV = env_bin_from_config(config,'MAPPING')
 2 | keydict = sub_dict(tool_params(SAMPLES[0], None, config, "MAPPING", MAPPERENV)["OPTIONS"],["INDEX"],)
 3 | keydict["REF"] = REFERENCE
 4 | keydict["DECOY"] = DECOY
 5 | keydict["ENV"] = MAPPERENV
 6 | unik = get_dict_hash(keydict)
 7 | 
 8 | rule generate_index:
 9 |     input:  ref = REFERENCE
10 |     output: idx = directory(INDEX),
11 |             uidx = expand("{refd}/INDICES/{mape}_{unikey}/{pref}", refd=REFDIR, mape=MAPPERENV, unikey=unik, pref=PREFIX)
12 |     log:    expand("LOGS/{sets}/{mape}.idx.log", sets=SETS, mape=MAPPERENV)
13 |     conda:  ""+MAPPERENV+".yaml"
14 |     threads: 1
15 |     params: indexer = MAPPERBIN.split(' ')[0],
16 |             ipara = lambda wildcards, input: tool_params(SAMPLES[0], None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('INDEX', ""),
17 |             linkidx = lambda wildcards, output: str(os.path.abspath(str.join(os.sep, str(output.uidx[0]).split(os.sep)[:-1]))) if PREFIX != '' else str(os.path.abspath(str(output.uidx[0]))),
18 |             tolink = lambda wildcards, output: str(os.path.abspath(str.join(os.sep, str(output.idx).split(os.sep)[:-1])))
19 |     shell:  "if [[ -f \"{output.uidx}\/*\" ]]; then ln -fs {params.linkidx} {output.idx} && touch {output.uidx} && echo \"Found bwa index, continue with mapping\" ; else {params.indexer} index -p {output.uidx} {params.ipara} {input.ref} 2> {log} && ln -fs {params.linkidx} {output.idx} && touch {output.uidx};fi"
20 | 
21 | if paired == 'paired':
22 | 	rule mapping:
23 | 		input:  r1 = "TRIMMED_FASTQ/{combo}/{file}_R1_trimmed.fastq.gz",
24 | 				r2 = "TRIMMED_FASTQ/{combo}/{file}_R2_trimmed.fastq.gz",
25 | 				index = rules.generate_index.output.uidx,
26 | 				ref = REFERENCE
27 | 		output: mapped = temp(report("MAPPED/{combo}/{file}_mapped.sam.gz", category="MAPPING")),
28 | 				unmapped1 = "UNMAPPED/{combo}/{file}_R1_unmapped.fastq.gz",
29 | 				unmapped2 = "UNMAPPED/{combo}/{file}_R2_unmapped.fastq.gz"
30 | 		log:    "LOGS/{combo}/{file}/mapping.log"
31 | 		conda:  ""+MAPPERENV+".yaml"
32 | 		threads: MAXTHREAD
33 | 		params: mpara = lambda wildcards: tool_params(wildcards.file, None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get("MAP", ""),
34 | 				mapp = MAPPERBIN
35 | 				#idx = lambda wildcards, input: str.join(os.sep,[str(input.index), PREFIX]) if PREFIX != '' else input.index
36 | 		shell: "{params.mapp} mem {params.mpara} -t {threads} {input.index} {input.r1} {input.r2}  2> {log}| tee >(samtools view -h -F 4 |gzip > {output.mapped}) >(samtools view -h -f 4 |samtools collate -u -O -|samtools fastq -n -c 6 -1 {output.unmapped1} -2 {output.unmapped2} ) 2>> {log} &>/dev/null && touch {output.unmapped1} {output.unmapped2}"
37 | 
38 | else:
39 | 	rule mapping:
40 | 		input:  query = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fastq.gz",
41 | 				uidx = rules.generate_index.output.uidx[0],
42 | 				ref = REFERENCE
43 | 		output: mapped = temp(report("MAPPED/{combo}/{file}_mapped.sam.gz", category="MAPPING")),
44 | 				unmapped = "UNMAPPED/{combo}/{file}_unmapped.fastq.gz"
45 | 		log:    "LOGS/{combo}/{file}/mapping.log"
46 | 		conda:  ""+MAPPERENV+".yaml"
47 | 		threads: MAXTHREAD
48 | 		params: mpara = lambda wildcards: tool_params(wildcards.file, None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('MAP', ""),
49 | 				mapp = MAPPERBIN
50 | 				#idx = lambda wildcards, input: str.join(os.sep,[str(input.index), PREFIX]) if PREFIX != '' else input.index
51 | 		shell:  "{params.mapp} mem {params.mpara} -t {threads} {input.uidx} {input.query} 2> {log}| tee >(samtools view -h -F 4 |gzip > {output.mapped}) >(samtools view -h -f 4 |samtools fastq -n - | pigz > {output.unmapped}) 2>> {log} &>/dev/null && touch {output.unmapped}"


--------------------------------------------------------------------------------
/workflows/salmon.smk:
--------------------------------------------------------------------------------
 1 | COUNTBIN, COUNTENV = env_bin_from_config(config,'COUNTING')
 2 | keydict = sub_dict(tool_params(SAMPLES[0], None, config, 'COUNTING', COUNTENV)['OPTIONS'], ['INDEX'])
 3 | keydict["REF"] = REFERENCE
 4 | keydict["DECOY"] = DECOY
 5 | keydict["ENV"] = COUNTENV
 6 | unik = get_dict_hash(keydict)
 7 | 
 8 | rule themall:
 9 |     input:  expand("COUNTS/{combo}/{file}_counts.sf.gz", combo=combo, file=samplecond(SAMPLES, config))
10 | 
11 | rule salmon_index:
12 |     input:  fa = REFERENCE
13 |     output: idx = directory(INDEX),
14 |             uidx = directory(expand("{refd}/INDICES/{mape}_{unikey}", refd=REFDIR, mape=COUNTENV, unikey=unik))
15 |     log:    expand("LOGS/{sets}/{cape}.idx.log", sets=SETS, cape=COUNTENV)
16 |     conda:  ""+COUNTENV+".yaml"
17 |     threads: MAXTHREAD
18 |     params: mapp = COUNTBIN,
19 |             ipara = lambda wildcards, input: tool_params(SAMPLES[0], None, config, 'COUNTING', COUNTENV)['OPTIONS'].get('INDEX', ""),
20 |             decoy = f"-d {os.path.abspath(DECOY)}" if DECOY else '',
21 |             linkidx = lambda wildcards, output: str(os.path.abspath(output.uidx[0]))
22 |     shell:  "set +euo pipefail; {params.mapp} index {params.ipara} {params.decoy} -p {threads} -t {input.fa} -i {output.uidx} &>> {log} && ln -fs {params.linkidx} {output.idx}"
23 | 
24 | 
25 | if paired == 'paired':
26 |     rule mapping:
27 |         input:  r1 = expand("TRIMMED_FASTQ/{scombo}/{{file}}_R1_trimmed.fastq.gz", scombo=scombo),
28 |                 r2 = expand("TRIMMED_FASTQ/{scombo}/{{file}}_R2_trimmed.fastq.gz", scombo=scombo),
29 |                 uidx = rules.salmon_index.output.uidx[0]
30 |         output: cnts = report("COUNTS/{combo}/{file}_counts.sf.gz", category="COUNTING"),
31 |                 ctsdir = report(directory("COUNTS/{combo}/{file}"), category="COUNTING")
32 |         log:    "LOGS/{combo}/{file}/salmonquant.log"
33 |         conda:  ""+COUNTENV+".yaml"
34 |         threads: MAXTHREAD
35 |         params: cpara = lambda wildcards: tool_params(wildcards.file, None, config, 'COUNTING', COUNTENV)['OPTIONS'].get('COUNT', ""),
36 |                 mapp=COUNTBIN,
37 |                 stranded = lambda x: '-l ISF' if (stranded == 'fr' or stranded == 'ISF') else '-l ISR' if (stranded == 'rf' or stranded == 'ISR') else '-l IU',
38 |                 linksf = lambda wildcards, output: str(os.path.abspath(output.ctsdir))
39 |         shell: "set +euo pipefail; {params.mapp} quant -p {threads} -i {input.uidx} {params.stranded} {params.cpara} -o {output.ctsdir} -1 {input.r1} -2 {input.r2} &>> {log} && gzip {output.ctsdir}/quant.sf && ln -fs {params.linksf}/quant.sf.gz {output.cnts} &>> {log}"
40 | 
41 | else:
42 |     rule mapping:
43 |         input:  r1 = expand("TRIMMED_FASTQ/{scombo}/{{file}}_trimmed.fastq.gz", scombo=scombo),
44 |                 uidx = rules.salmon_index.output.uidx[0]
45 |         output: cnts = report("COUNTS/{combo}/{file}_counts.sf.gz", category="COUNTING"),
46 |                 ctsdir = report(directory("COUNTS/{combo}/{file}"), category="COUNTING")
47 |         log:    "LOGS/{combo}/{file}/salmonquant.log"
48 |         conda:  ""+COUNTENV+".yaml"
49 |         threads: MAXTHREAD
50 |         params: cpara = lambda wildcards: tool_params(wildcards.file, None, config, 'COUNTING', COUNTENV)['OPTIONS'].get('COUNT', ""),
51 |                 mapp=COUNTBIN,
52 |                 stranded = lambda x: '-l SF' if (stranded == 'fr' or stranded == 'SF') else '-l SR' if (stranded == 'rf' or stranded == 'SR') else '-l U',
53 |                 linksf = lambda wildcards, output: str(os.path.abspath(output.ctsdir))
54 |         shell: "set +euo pipefail; {params.mapp} quant -p {threads} -i {input.uidx} {params.stranded} {params.cpara} -o {output.ctsdir} -r {input.r1} &>> {log} && gzip {output.ctsdir}/quant.sf ; ln -fs {params.linksf}/quant.sf.gz {output.cnts} &>> {log}"
55 | 


--------------------------------------------------------------------------------
/workflows/kallisto.smk:
--------------------------------------------------------------------------------
 1 | COUNTBIN, COUNTENV = env_bin_from_config(config,'COUNTING')
 2 | keydict = sub_dict(tool_params(SAMPLES[0], None, config, 'COUNTING', COUNTENV)['OPTIONS'], ['INDEX'])
 3 | keydict["REF"] = REFERENCE
 4 | keydict["DECOY"] = DECOY
 5 | keydict["ENV"] = COUNTENV
 6 | unik = get_dict_hash(keydict)
 7 | 
 8 | rule themall:
 9 |     input:  expand("COUNTS/{combo}/{file}_counts.gz", combo=combo, file=samplecond(SAMPLES, config))
10 | 
11 | rule kallisto_index:
12 |     input:  fa = REFERENCE
13 |     output: idx = INDEX,
14 |             uidx = expand("{refd}/INDICES/{mape}_{unikey}.idx", refd=REFDIR, mape=COUNTENV, unikey=unik)
15 |     log:    expand("LOGS/{sets}/{cape}.idx.log", sets=SETS, cape=COUNTENV)
16 |     conda:  ""+COUNTENV+".yaml"
17 |     threads: MAXTHREAD
18 |     params: mapp = COUNTBIN,
19 |             ipara = lambda wildcards, input: tool_params(SAMPLES[0], None, config, 'COUNTING', COUNTENV)['OPTIONS'].get('INDEX', ""),
20 |             decoy = f"-d {os.path.abspath(DECOY)}" if DECOY else '',
21 |             linkidx = lambda wildcards, output: str(os.path.abspath(output.uidx[0]))
22 |     shell:  "set +euo pipefail; {params.mapp} index {params.ipara} {params.decoy} -t {threads} -i {output.uidx} {input.fa} &>> {log} && ln -fs {params.linkidx} {output.idx}"
23 | 
24 | 
25 | if paired == 'paired':
26 |     rule mapping:
27 |         input:  r1 = expand("TRIMMED_FASTQ/{scombo}/{{file}}_R1_trimmed.fastq.gz", scombo=scombo),
28 |                 r2 = expand("TRIMMED_FASTQ/{scombo}/{{file}}_R2_trimmed.fastq.gz", scombo=scombo),
29 |                 uidx = rules.kallisto_index.output.uidx[0]
30 |         output: cnts = report("COUNTS/{combo}/{file}_counts.gz", category="COUNTING"),
31 |                 ctsdir = report(directory("COUNTS/{combo}/{file}"), category="COUNTING")
32 |         log:    "LOGS/{combo}/{file}/kallistoquant.log"
33 |         conda:  ""+COUNTENV+".yaml"
34 |         threads: MAXTHREAD
35 |         params: cpara = lambda wildcards: tool_params(wildcards.file, None, config, 'COUNTING', COUNTENV)['OPTIONS'].get('COUNT', ""),
36 |                 mapp=COUNTBIN,
37 |                 stranded = lambda x: '--fr-stranded' if (stranded == 'fr' or stranded == 'ISF') else '-rf-stranded' if (stranded == 'rf' or stranded == 'ISR') else '',
38 |                 linksf = lambda wildcards, output: str(os.path.abspath(output.ctsdir))
39 |         shell: "set +euo pipefail; {params.mapp} quant -t {threads} -i {input.uidx} {params.stranded} {params.cpara} -o {output.ctsdir} {input.r1} {input.r2} &>> {log} && gzip {output.ctsdir}/abundance.tsv && ln -fs {params.linksf}/abundance.tsv.gz {output.cnts} &>> {log}"
40 | 
41 | else:
42 |     rule mapping:
43 |         input:  r1 = expand("TRIMMED_FASTQ/{scombo}/{{file}}_trimmed.fastq.gz", scombo=scombo),
44 |                 uidx = rules.kallisto_index.output.uidx[0]
45 |         output: cnts = report("COUNTS/{combo}/{file}_counts.gz", category="COUNTING"),
46 |                 ctsdir = report(directory("COUNTS/{combo}/{file}"), category="COUNTING")
47 |         log:    "LOGS/{combo}/{file}/kallistoquant.log"
48 |         conda:  ""+COUNTENV+".yaml"
49 |         threads: MAXTHREAD
50 |         params: cpara = lambda wildcards: tool_params(wildcards.file, None, config, 'COUNTING', COUNTENV)['OPTIONS'].get('COUNT', ""),
51 |                 mapp=COUNTBIN,
52 |                 stranded = lambda x: '--fr-stranded' if (stranded == 'fr' or stranded == 'ISF') else '-rf-stranded' if (stranded == 'rf' or stranded == 'ISR') else '',
53 |                 linksf = lambda wildcards, output: str(os.path.abspath(output.ctsdir))
54 |         shell: "set +euo pipefail; {params.mapp} quant -t {threads} -i {input.uidx} {params.stranded} {params.cpara} -o {output.ctsdir} --single {input.r1} &>> {log} && gzip {output.ctsdir}/abundance.tsv && ln -fs {params.linksf}/abundance.tsv.gz {output.cnts} &>> {log}"
55 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import os
  4 | from collections import defaultdict
  5 | from glob import glob
  6 | 
  7 | from setuptools import find_packages, setup
  8 | 
  9 | import versioneer
 10 | 
 11 | NAME = "MONSDA"
 12 | DESCRIPTION = (
 13 |     "MONSDA, Modular Organizer of Nextflow and Snakemake driven HTS Data Analysis"
 14 | )
 15 | # Set __version__ done by versioneer
 16 | # exec(open("MONSDA/__init__.py").read())
 17 | 
 18 | 
 19 | def generate_datafiles():
 20 |     df = list()
 21 |     dirlist = defaultdict(list)
 22 | 
 23 |     libs = list()
 24 |     for l in glob("MONSDA/lib/*"):
 25 |         if any(x in l for x in [".pl", ".pm", ".py", ".sh", ".R", ".groovy"]):
 26 |             libs.append(os.path.relpath(l))
 27 |     for l in libs:
 28 |         dirlist[str(os.path.join("share", os.path.dirname(l)))].append(l)
 29 | 
 30 |     scripts = list()
 31 |     for s in glob("scripts/**", recursive=True):
 32 |         if any(x in s for x in [".pl", ".pm", ".py", ".sh", ".R"]):
 33 |             scripts.append(os.path.relpath(s))  # os.path.join(s, os.path.split(s)[1]))
 34 |     for s in scripts:
 35 |         dirlist[str(os.path.join("share", "MONSDA", os.path.dirname(s)))].append(s)
 36 | 
 37 |     workflows = list()
 38 |     for d in glob("workflows/*"):
 39 |         if "wip" not in d:
 40 |             workflows.append(
 41 |                 os.path.relpath(d)
 42 |             )  # os.path.join(d, os.path.split(d)[1]))
 43 |     for w in workflows:
 44 |         dirlist[os.path.join("share", "MONSDA", os.path.dirname(w))].append(w)
 45 | 
 46 |     envs = list()
 47 |     for e in glob("envs/*"):
 48 |         envs.append(os.path.relpath(e))  # os.path.join(d, os.path.split(d)[1]))
 49 |     for e in envs:
 50 |         dirlist[os.path.join("share", "MONSDA", os.path.dirname(e))].append(e)
 51 | 
 52 |     confs = list()
 53 |     for c in glob("configs/*"):
 54 |         if any(x in c for x in [".json"]):
 55 |             confs.append(os.path.relpath(c))  # os.path.join(d, os.path.split(d)[1]))
 56 |     for c in confs:
 57 |         dirlist[os.path.join("share", "MONSDA", os.path.dirname(c))].append(c)
 58 | 
 59 |     profiles = list()
 60 |     for p in glob("profile_*/**"):
 61 |         profiles.append(os.path.relpath(p))  # os.path.join(d, os.path.split(d)[1]))
 62 |     for p in profiles:
 63 |         dirlist[os.path.join("share", "MONSDA", os.path.dirname(p))].append(p)
 64 | 
 65 |     dirlist[""].append("LICENSE")
 66 | 
 67 |     for k, v in dirlist.items():
 68 |         df.append((k, v))
 69 | 
 70 |     return df
 71 | 
 72 | 
 73 | # requires = open(os.path.abspath("requirements.txt")).read().strip().split("\n")
 74 | 
 75 | setup(
 76 |     name=NAME,
 77 |     version=versioneer.get_version(),
 78 |     cmdclass=versioneer.get_cmdclass(),
 79 |     description=DESCRIPTION,
 80 |     author="Joerg Fallmann",
 81 |     author_email="fall@bioinf.uni-leipzig.de",
 82 |     packages=find_packages(include=["MONSDA", "MONSDA.*"]),
 83 |     include_package_data=True,
 84 |     data_files=generate_datafiles(),
 85 |     entry_points={
 86 |         "console_scripts": [
 87 |             "monsda = MONSDA.RunMONSDA:main",
 88 |             "monsda_configure = MONSDA.Configurator:main",
 89 |         ]
 90 |     },
 91 |     # install_requires=requires,
 92 |     install_requires=[
 93 |         "biopython>=1.83",
 94 |         "snakemake>=8.16.0",
 95 |         "black>=21.5b2",
 96 |         "flake8>=3.8.3",
 97 |         "isort>=5.13.2",
 98 |         "sphinx>=4.1.0",
 99 |     ],
100 |     python_requires=">=3.12.0",
101 |     setup_requires=["pytest-runner"],
102 |     tests_require=["pytest"],
103 |     zip_safe=False,
104 |     license="LICENSE",
105 |     url="https://github.com/jfallmann/MONSDA",
106 |     long_description_content_type="text/markdown",
107 |     long_description=open("README.md").read(),
108 | )
109 | 


--------------------------------------------------------------------------------
/scripts/lib/Logger.py:
--------------------------------------------------------------------------------
  1 | # logger.py ---
  2 | #
  3 | # Filename: logger.py
  4 | # Description:
  5 | # Author: Joerg Fallmann
  6 | # Maintainer:
  7 | # Created: Mon Aug 12 10:26:55 2019 (+0200)
  8 | # Version:
  9 | # Package-Requires: ()
 10 | # Last-Updated: Tue Sep 24 16:53:41 2019 (+0200)
 11 | #           By: Joerg Fallmann
 12 | #     Update #: 64
 13 | # URL:
 14 | # Doc URL:
 15 | # Keywords:
 16 | # Compatibility:
 17 | #
 18 | #
 19 | 
 20 | # Commentary:
 21 | #
 22 | #
 23 | #
 24 | #
 25 | 
 26 | # Change Log:
 27 | #
 28 | #
 29 | #
 30 | #
 31 | # This program is free software: you can redistribute it and/or modify
 32 | # it under the terms of the GNU General Public License as published by
 33 | # the Free Software Foundation, either version 3 of the License, or (at
 34 | # your option) any later version.
 35 | #
 36 | # This program is distributed in the hope that it will be useful, but
 37 | # WITHOUT ANY WARRANTY; without even the implied warranty of
 38 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 39 | # General Public License for more details.
 40 | #
 41 | # You should have received a copy of the GNU General Public License
 42 | # along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
 43 | #
 44 | #
 45 | 
 46 | # Code:
 47 | import logging
 48 | import multiprocessing
 49 | import os
 50 | import sys
 51 | import inspect
 52 | import traceback as tb
 53 | 
 54 | 
 55 | def makelogdir(logdir):
 56 |     if not os.path.isabs(logdir):
 57 |         logdir = os.path.abspath(logdir)
 58 |     if not os.path.exists(logdir):
 59 |         os.makedirs(logdir)
 60 |     return logdir
 61 | 
 62 | 
 63 | def setup_logger(
 64 |     name, log_file, filemode="w", logformat=None, datefmt=None, level="WARNING"
 65 | ):
 66 |     """Function setup as many loggers as you want"""
 67 | 
 68 |     logger = logging.getLogger(name)
 69 |     if log_file != "stdout" and log_file != "stderr":
 70 |         makelogdir(os.path.dirname(log_file))
 71 |         handler = logging.FileHandler(log_file, mode=filemode)
 72 |     else:
 73 |         handler = logging.StreamHandler()
 74 | 
 75 |     handler.setFormatter(logging.Formatter(fmt=logformat, datefmt=datefmt))
 76 | 
 77 |     logger.setLevel(level)
 78 |     logger.addHandler(handler)
 79 | 
 80 |     return logger
 81 | 
 82 | 
 83 | if __name__ == "__main__":
 84 |     try:
 85 |         # set up logging to file
 86 |         logging = setup_logger(
 87 |             name="",
 88 |             log_file="stderr",
 89 |             logformat="%(asctime)s %(name)-12s %(levelname)-8s %(message)s",
 90 |             datefmt="%m-%d %H:%M",
 91 |             level="WARNING",
 92 |         )
 93 | 
 94 |         # define a Handler which writes INFO messages or higher to the sys.stderr
 95 |         # console = logging.StreamHandler()
 96 |         # console.setLevel(logging.INFO)
 97 |         # set a format which is simpler for console use
 98 |         # formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
 99 |         # tell the handler to use this format
100 |         # console.setFormatter(formatter)
101 |         # add the handler to the root logger
102 |         # logging.getLogger('').addHandler(console)
103 | 
104 |         # Now, we can log to the root logger, or any other logger. First the root...
105 |         # logging.info('Imported logger.py')
106 |         # Now, use this in code defining a couple of other loggers which might represent areas in your
107 |         # application, e.g.:
108 |         # log = logging.getLogger('logger.main')
109 | 
110 |     except Exception as err:
111 |         exc_type, exc_value, exc_tb = sys.exc_info()
112 |         tbe = tb.TracebackException(
113 |             exc_type,
114 |             exc_value,
115 |             exc_tb,
116 |         )
117 |         logging.error("".join(tbe.format()))
118 | 
119 | 
120 | # log.py ends here
121 | 


--------------------------------------------------------------------------------
/workflows/segemehl3_bisulfite.smk:
--------------------------------------------------------------------------------
 1 | MAPPERBIN, MAPPERENV = env_bin_from_config(config,'MAPPING')
 2 | keydict = sub_dict(tool_params(SAMPLES[0], None, config, 'MAPPING', MAPPERENV)['OPTIONS'], ['INDEX'])
 3 | keydict["REF"] = REFERENCE
 4 | keydict["DECOY"] = DECOY
 5 | keydict["ENV"] = MAPPERENV
 6 | unik = get_dict_hash(keydict)
 7 | 
 8 | rule generate_index:
 9 |     input:  fa = REFERENCE
10 |     output: idx1 = INDEX,
11 |             idx2 = INDEX2,
12 |             uidx1 = expand("{refd}/INDICES/{mape}_{unikey}.idx", refd=REFDIR, mape=MAPPERENV, unikey=unik),
13 |             uidx2 = expand("{refd}/INDICES/{mape}_{unikey}.idx2", refd=REFDIR, mape=MAPPERENV, unikey=unik+'_bs')
14 |     log:    expand("LOGS/{sets}/{mape}.idx.log", sets=SETS, mape=MAPPERENV)
15 |     conda:  ""+MAPPERENV.replace('bisulfite', '')+".yaml"
16 |     threads: MAXTHREAD
17 |     params: indexer = MAPPERBIN,
18 |             ipara = lambda wildcards, input: tool_params(SAMPLES[0], None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('INDEX', ""),
19 |             linkidx1 = lambda wildcards, output: str(os.path.abspath(output.uidx1[0])),
20 |             linkidx2 = lambda wildcards, output: str(os.path.abspath(output.uidx2[0]))
21 |     shell: "{params.indexer} --threads {threads} {params.ipara} -d {input.fa} -x {output.uidx1} -y {output.uidx2} &> {log} && ln -fs {params.linkidx1} {output.idx1} && ln -fs {params.linkidx2} {output.idx2}"
22 | 
23 | if paired == 'paired':
24 |     rule mapping:
25 |         input:  r1 = "TRIMMED_FASTQ/{combo}/{file}_R1_trimmed.fastq.gz",
26 |                 r2 = "TRIMMED_FASTQ/{combo}/{file}_R2_trimmed.fastq.gz",                
27 |                 uidx1 = rules.generate_index.output.uidx1[0],
28 |                 uidx2 = rules.generate_index.output.uidx2[0],
29 |                 fa = REFERENCE
30 |         output: mapped = temp(report("MAPPED/{combo}/{file}_mapped.sam.gz", category="MAPPING")),
31 |                 unmapped1 = "UNMAPPED/{combo}/{file}_R1_unmapped.fastq.gz",
32 |                 unmapped2 = "UNMAPPED/{combo}/{file}_R2_unmapped.fastq.gz"
33 |         log:    "LOGS/{combo}/{file}/mapping.log"
34 |         conda:  ""+MAPPERENV.replace('bisulfite', '')+".yaml"
35 |         threads: MAXTHREAD
36 |         params: mpara = lambda wildcards: tool_params(wildcards.file, None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('MAP', ""),
37 |                 mapp=MAPPERBIN
38 |         shell: "set +o pipefail;{params.mapp} {params.mpara} -d {input.fa} -i {input.uidx1} -j {input.uidx2} -q {input.r1} -p {input.r2} --threads {threads} 2> {log}| tee >(samtools view -h -F 4 |gzip > {output.mapped}) >(samtools view -h -f 4 |samtools collate -u -O -|samtools fastq -n -c 6 -1 {output.unmapped1} -2 {output.unmapped2} - ) 2>> {log} &>/dev/null && touch {output.unmapped1} {output.unmapped2}"
39 | else:
40 |     rule mapping:
41 |         input:  query = "TRIMMED_FASTQ/{combo}/{file}_trimmed.fastq.gz",
42 |                 uidx1 = rules.generate_index.output.uidx1[0],
43 |                 uidx2= rules.generate_index.output.uidx2[0],
44 |                 fa = REFERENCE
45 |         output: mapped = temp(report("MAPPED/{combo}/{file}_mapped.sam.gz", category="MAPPING")),
46 |                 unmapped = "UNMAPPED/{combo}/{file}_unmapped.fastq.gz"
47 |         log:    "LOGS/{combo}/{file}/mapping.log"
48 |         conda:  ""+MAPPERENV.replace('bisulfite', '')+".yaml"
49 |         threads: MAXTHREAD
50 |         params: mpara = lambda wildcards: tool_params(wildcards.file, None, config, 'MAPPING', MAPPERENV)['OPTIONS'].get('MAP', ""),
51 |                 mapp=MAPPERBIN
52 |         shell: "set +o pipefail; {params.mapp} {params.mpara} -d {input.fa} -i {input.uidx1} -j {input.uidx2} -q {input.query} --threads {threads} 2> {log}| tee >(samtools view -h -F 4 |gzip > {output.mapped}) >(samtools view -h -f 4 |samtools fastq -n --verbosity 0 - | pigz > {output.unmapped}) 2>> {log} &>/dev/null && touch {output.unmapped}"
53 | 


--------------------------------------------------------------------------------
/envs/dexseq_DTU.yaml:
--------------------------------------------------------------------------------
  1 | name: dexseq
  2 | channels:
  3 |   - conda-forge
  4 |   - bioconda
  5 |   - defaults
  6 |   - r
  7 | dependencies:
  8 |   - bioconductor-annotate =1.76.0
  9 |   - bioconductor-annotationdbi =1.60.0
 10 |   - bioconductor-biobase =2.58.0
 11 |   - bioconductor-biocfilecache =2.6.0
 12 |   - bioconductor-biocgenerics =0.44.0
 13 |   - bioconductor-biocio =1.8.0
 14 |   - bioconductor-biocparallel =1.32.5
 15 |   - bioconductor-biomart =2.54.0
 16 |   - bioconductor-biostrings =2.66.0
 17 |   - bioconductor-data-packages =20230202
 18 |   - bioconductor-delayedarray =0.24.0
 19 |   - bioconductor-deseq2 =1.38.0
 20 |   - bioconductor-dexseq =1.44.0
 21 |   - bioconductor-drimseq =1.26.0
 22 |   - bioconductor-edger =3.40.0
 23 |   - bioconductor-genefilter =1.80.0
 24 |   - bioconductor-geneplotter =1.76.0
 25 |   - bioconductor-genomeinfodb =1.34.8
 26 |   - bioconductor-genomeinfodbdata =1.2.9
 27 |   - bioconductor-genomicalignments =1.34.0
 28 |   - bioconductor-genomicfeatures =1.50.2
 29 |   - bioconductor-genomicranges =1.50.0
 30 |   - bioconductor-iranges =2.32.0
 31 |   - bioconductor-keggrest =1.38.0
 32 |   - bioconductor-limma =3.54.0
 33 |   - bioconductor-matrixgenerics =1.10.0
 34 |   - bioconductor-rhtslib =2.0.0
 35 |   - bioconductor-rsamtools =2.14.0
 36 |   - bioconductor-rtracklayer =1.58.0
 37 |   - bioconductor-s4vectors =0.36.0
 38 |   - bioconductor-summarizedexperiment =1.28.0
 39 |   - bioconductor-tximport =1.26.0
 40 |   - bioconductor-xvector =0.38.0
 41 |   - bioconductor-zlibbioc =1.44.0
 42 |   - python =3.10.9
 43 |   - python_abi =3.10
 44 |   - pyyaml =6.0
 45 |   - r-askpass =1.1
 46 |   - r-assertthat =0.2.1
 47 |   - r-base =4.2.0
 48 |   - r-bh =1.81.0_1
 49 |   - r-bit =4.0.5
 50 |   - r-bit64 =4.0.5
 51 |   - r-bitops =1.0_7
 52 |   - r-blob =1.2.3
 53 |   - r-cachem =1.0.6
 54 |   - r-cli =3.6.0
 55 |   - r-codetools =0.2_19
 56 |   - r-colorspace =2.1_0
 57 |   - r-cpp11 =0.4.3
 58 |   - r-crayon =1.5.2
 59 |   - r-curl =4.3.3
 60 |   - r-dbi =1.1.3
 61 |   - r-dbplyr =2.3.0
 62 |   - r-digest =0.6.31
 63 |   - r-dplyr =1.1.0
 64 |   - r-ellipsis =0.3.2
 65 |   - r-fansi =1.0.4
 66 |   - r-farver =2.1.1
 67 |   - r-fastmap =1.1.0
 68 |   - r-filelock =1.0.2
 69 |   - r-formatr =1.14
 70 |   - r-futile.logger =1.4.3
 71 |   - r-futile.options =1.0.1
 72 |   - r-generics =0.1.3
 73 |   - r-ggplot2 =3.4.1
 74 |   - r-glue =1.6.2
 75 |   - r-gtable =0.3.1
 76 |   - r-hms =1.1.2
 77 |   - r-httr =1.4.4
 78 |   - r-hwriter =1.3.2.1
 79 |   - r-isoband =0.2.7
 80 |   - r-jsonlite =1.8.4
 81 |   - r-labeling =0.4.2
 82 |   - r-lambda.r =1.2.4
 83 |   - r-lattice =0.20_45
 84 |   - r-lifecycle =1.0.3
 85 |   - r-locfit =1.5_9.7
 86 |   - r-magrittr =2.0.3
 87 |   - r-mass =7.3_58.2
 88 |   - r-matrix =1.5_3
 89 |   - r-matrixstats =0.63.0
 90 |   - r-memoise =2.0.1
 91 |   - r-mgcv =1.8_41
 92 |   - r-mime =0.12
 93 |   - r-munsell =0.5.0
 94 |   - r-nlme =3.1_157
 95 |   - r-openssl =2.0.5
 96 |   - r-pillar =1.8.1
 97 |   - r-pkgconfig =2.0.3
 98 |   - r-plogr =0.2.0
 99 |   - r-plyr =1.8.8
100 |   - r-png =0.1_8
101 |   - r-prettyunits =1.1.1
102 |   - r-progress =1.2.2
103 |   - r-purrr =1.0.1
104 |   - r-r6 =2.5.1
105 |   - r-rappdirs =0.3.3
106 |   - r-rcolorbrewer =1.1_3
107 |   - r-rcpp =1.0.10
108 |   - r-rcpparmadillo =0.11.4.4.0
109 |   - r-rcurl =1.98_1.10
110 |   - r-reshape2 =1.4.4
111 |   - r-restfulr =0.0.15
112 |   - r-rjson =0.2.21
113 |   - r-rlang =1.0.6
114 |   - r-rsqlite =2.2.20
115 |   - r-scales =1.2.1
116 |   - r-snow =0.4_4
117 |   - r-statmod =1.4.36
118 |   - r-stringi =1.7.6
119 |   - r-stringr =1.5.0
120 |   - r-survival =3.5_3
121 |   - r-sys =3.4.1
122 |   - r-tibble =3.1.8
123 |   - r-tidyselect =1.2.0
124 |   - r-utf8 =1.2.3
125 |   - r-vctrs =0.5.2
126 |   - r-viridislite =0.4.1
127 |   - r-withr =2.5.0
128 |   - r-xml =3.99_0.9
129 |   - r-xml2 =1.3.3
130 |   - r-xtable =1.8_4
131 |   - r-yaml =2.3.7
132 |   - readline =8.2
133 |   - sed =4.8


--------------------------------------------------------------------------------
/envs/isoformswitchanalyzer.yaml:
--------------------------------------------------------------------------------
  1 | name: isoformswitchanalyzer
  2 | channels:
  3 |   - conda-forge
  4 |   - bioconda
  5 |   - defaults
  6 |   - r
  7 | dependencies:
  8 |   - bioconductor-annotate =1.66.0
  9 |   - bioconductor-annotationdbi =1.50.0
 10 |   - bioconductor-biobase =2.48.0
 11 |   - bioconductor-biocfilecache =1.12.0
 12 |   - bioconductor-biocgenerics =0.34.0
 13 |   - bioconductor-biocparallel =1.22.0
 14 |   - bioconductor-biomart =2.44.0
 15 |   - bioconductor-biostrings =2.56.0
 16 |   - bioconductor-bsgenome =1.56.0
 17 |   - bioconductor-delayedarray =0.14.0
 18 |   - bioconductor-deseq2 =1.28.0
 19 |   - bioconductor-dexseq =1.34.0
 20 |   - bioconductor-drimseq =1.16.0
 21 |   - bioconductor-edger =3.30.0
 22 |   - bioconductor-genefilter =1.70.0
 23 |   - bioconductor-geneplotter =1.66.0
 24 |   - bioconductor-genomeinfodb =1.24.0
 25 |   - bioconductor-genomeinfodbdata =1.2.3
 26 |   - bioconductor-genomicalignments =1.24.0
 27 |   - bioconductor-genomicranges =1.40.0
 28 |   - bioconductor-iranges =2.22.1
 29 |   - bioconductor-isoformswitchanalyzer =1.10.0
 30 |   - bioconductor-limma =3.44.1
 31 |   - bioconductor-rhtslib =1.20.0
 32 |   - bioconductor-rsamtools =2.4.0
 33 |   - bioconductor-rtracklayer =1.48.0
 34 |   - bioconductor-s4vectors =0.26.0
 35 |   - bioconductor-summarizedexperiment =1.18.1
 36 |   - bioconductor-tximport =1.16.0
 37 |   - bioconductor-xvector =0.28.0
 38 |   - bioconductor-zlibbioc =1.34.0
 39 |   - r-askpass =1.1
 40 |   - r-assertthat =0.2.1
 41 |   - r-backports =1.1.8
 42 |   - r-base =4.0.2
 43 |   - r-bh =1.72.0_3
 44 |   - r-bit =4.0.4
 45 |   - r-bit64 =4.0.2
 46 |   - r-bitops =1.0_6
 47 |   - r-blob =1.2.1
 48 |   - r-callr =3.4.3
 49 |   - r-cli =2.0.2
 50 |   - r-clipr =0.7.0
 51 |   - r-colorspace =1.4_1
 52 |   - r-crayon =1.3.4
 53 |   - r-curl =4.3
 54 |   - r-dbi =1.1.0
 55 |   - r-dbplyr =1.4.4
 56 |   - r-desc =1.2.0
 57 |   - r-digest =0.6.25
 58 |   - r-dplyr =1.0.2
 59 |   - r-ellipsis =0.3.1
 60 |   - r-evaluate =0.14
 61 |   - r-fansi =0.4.1
 62 |   - r-farver =2.0.3
 63 |   - r-formatr =1.7
 64 |   - r-futile.logger =1.4.3
 65 |   - r-futile.options =1.0.1
 66 |   - r-generics =0.0.2
 67 |   - r-ggplot2 =3.3.2
 68 |   - r-glue =1.4.1
 69 |   - r-gridextra =2.3
 70 |   - r-gtable =0.3.0
 71 |   - r-hms =0.5.3
 72 |   - r-httr =1.4.2
 73 |   - r-hwriter =1.3.2
 74 |   - r-isoband =0.2.2
 75 |   - r-jsonlite =1.7.0
 76 |   - r-labeling =0.3
 77 |   - r-lambda.r =1.2.4
 78 |   - r-lattice =0.20_41
 79 |   - r-lifecycle =0.2.0
 80 |   - r-locfit =1.5_9.4
 81 |   - r-magrittr =1.5
 82 |   - r-mass =7.3_52
 83 |   - r-matrix =1.2_18
 84 |   - r-matrixstats =0.56.0
 85 |   - r-memoise =1.1.0
 86 |   - r-mgcv =1.8_32
 87 |   - r-mime =0.9
 88 |   - r-munsell =0.5.0
 89 |   - r-nlme =3.1_149
 90 |   - r-openssl =1.4.2
 91 |   - r-pillar =1.4.6
 92 |   - r-pkgbuild =1.1.0
 93 |   - r-pkgconfig =2.0.3
 94 |   - r-pkgload =1.1.0
 95 |   - r-plogr =0.2.0
 96 |   - r-plyr =1.8.6
 97 |   - r-praise =1.0.0
 98 |   - r-prettyunits =1.1.1
 99 |   - r-processx =3.4.3
100 |   - r-progress =1.2.2
101 |   - r-ps =1.3.4
102 |   - r-purrr =0.3.4
103 |   - r-r6 =2.4.1
104 |   - r-rappdirs =0.3.1
105 |   - r-rcolorbrewer =1.1_2
106 |   - r-rcpp =1.0.4.6
107 |   - r-rcpparmadillo =0.9.900.2.0
108 |   - r-rcurl =1.98_1.2
109 |   - r-readr =1.3.1
110 |   - r-reshape2 =1.4.4
111 |   - r-rlang =0.4.7
112 |   - r-rprojroot =1.3_2
113 |   - r-rsqlite =2.2.0
114 |   - r-rstudioapi =0.11
115 |   - r-scales =1.1.1
116 |   - r-snow =0.4_3
117 |   - r-statmod =1.4.34
118 |   - r-stringi =1.4.6
119 |   - r-stringr =1.4.0
120 |   - r-survival =3.2_3
121 |   - r-sys =3.4
122 |   - r-testthat =2.3.2
123 |   - r-tibble =3.0.3
124 |   - r-tidyselect =1.1.0
125 |   - r-utf8 =1.1.4
126 |   - r-vctrs =0.3.2
127 |   - r-venndiagram =1.6.20
128 |   - r-viridislite =0.3.0
129 |   - r-withr =2.2.0
130 |   - r-xml =3.99_0.3
131 |   - r-xtable =1.8_4
132 |   - r-zeallot =0.1.0
133 |   - readline =8.2
134 |   - sed =4.8


--------------------------------------------------------------------------------
/workflows/fastqc.nf:
--------------------------------------------------------------------------------
  1 | QCENV=get_always('QCENV')
  2 | QCBIN=get_always('QCBIN')
  3 | QCPARAMS = get_always('fastqc_params_QC') ?: ''
  4 | 
  5 | //QC RAW
  6 | process qc_raw{
  7 |     conda "$QCENV"+".yaml"
  8 |     cpus THREADS
  9 | 	cache 'lenient'
 10 |     //validExitStatus 0,1
 11 | 
 12 |     publishDir "${workflow.workDir}/../" , mode: 'link',
 13 |     saveAs: {filename ->
 14 |         if (filename.indexOf("zip") > 0)          "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip"
 15 |         else if (filename.indexOf("html") > 0)    "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html"
 16 |         else null
 17 |     }
 18 | 
 19 |     input:
 20 |     path read
 21 | 
 22 |     output:
 23 |     path "*.{zip,html}", emit: fastqc_results
 24 | 
 25 |     script:
 26 |     """
 27 |     fastqc --quiet -t ${task.cpus} $QCPARAMS --noextract -f fastq $read
 28 |     """
 29 | }
 30 | 
 31 | workflow QC_RAW{
 32 |     take:
 33 |     collection
 34 | 
 35 |     main:
 36 |     
 37 |     qc_raw(samples_ch)
 38 | 
 39 |     emit:
 40 |     qc = qc_raw.out.fastqc_results
 41 | }
 42 | 
 43 | //QC TRIM
 44 | 
 45 | process qc_trimmed{
 46 |     conda "$QCENV"+".yaml"
 47 |     cpus THREADS
 48 | 	cache 'lenient'
 49 |     //validExitStatus 0,1
 50 | 
 51 |     publishDir "${workflow.workDir}/../" , mode: 'link',
 52 |     saveAs: {filename ->
 53 |         if (filename.indexOf("zip") > 0)          "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip"
 54 |         else if (filename.indexOf("html") > 0)    "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html"
 55 |         else null
 56 |     }
 57 | 
 58 |     input:
 59 |     path read
 60 | 
 61 |     output:
 62 |     path "*.{zip,html}", emit: fastqc_results
 63 | 
 64 |     script:
 65 |     """
 66 |     fastqc --quiet -t ${task.cpus} $QCPARAMS --noextract -f fastq $read
 67 |     """
 68 | }
 69 | 
 70 | workflow QC_TRIMMING{
 71 |     take: collection
 72 | 
 73 |     main:
 74 |     
 75 |     qc_trimmed(collection)
 76 | 
 77 |     emit:
 78 |     qc = qc_trimmed.out.fastqc_results
 79 | }
 80 | 
 81 | 
 82 | //QC MAP
 83 | 
 84 | process qc_mapped{
 85 |     conda "$QCENV"+".yaml"
 86 |     cpus THREADS
 87 | 	cache 'lenient'
 88 |     //validExitStatus 0,1
 89 | 
 90 |     publishDir "${workflow.workDir}/../" , mode: 'link',
 91 |     saveAs: {filename ->
 92 |         if (filename.indexOf("zip") > 0)          "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip"
 93 |         else if (filename.indexOf("html") > 0)    "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html"
 94 |         else null
 95 |     }
 96 | 
 97 |     input:
 98 |     path map
 99 | 
100 |     output:
101 |     path "*.{zip,html}", emit: fastqc_results
102 | 
103 |     script:
104 |     """
105 |     fastqc --quiet -t ${task.cpus} $QCPARAMS -f bam $map
106 |     """
107 | }
108 | 
109 | workflow QC_MAPPING{
110 |     take: collection
111 |     main:
112 |    
113 |     qc_mapped(collection)
114 | 
115 |     emit:
116 |     qc = qc_mapped.out.fastqc_results
117 | }
118 | 
119 | // DEDUP QC
120 | 
121 | process qc_dedup{
122 |     conda "$QCENV"+".yaml"
123 |     cpus THREADS
124 | 	cache 'lenient'
125 |     //validExitStatus 0,1
126 | 
127 |     publishDir "${workflow.workDir}/../" , mode: 'link',
128 |     saveAs: {filename ->
129 |         if (filename.indexOf("zip") > 0)          "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.zip"
130 |         else if (filename.indexOf("html") > 0)    "QC/${COMBO}/${CONDITION}/${file(filename).getSimpleName()}.html"
131 |         else null
132 |     }
133 | 
134 |     input:
135 |     path read
136 | 
137 |     output:
138 |     path "*.{zip,html}", emit: fastqc_results
139 | 
140 |     script:
141 |     """
142 |     fastqc --quiet -t ${task.cpus} $QCPARAMS --noextract -f fastq $read
143 |     """
144 | }
145 | 
146 | workflow QC_DEDUP{
147 |     take: collection
148 | 
149 |     main:
150 |    
151 |     qc_dedup(collection)
152 | 
153 |     emit:
154 |     qc = qc_dedup.out.fastqc_results
155 | }
156 | 


--------------------------------------------------------------------------------