├── NanOlympicsMod_logo.png
├── Docker
    ├── xpore-2.0
    │   ├── .DS_Store
    │   └── Dockerfile
    ├── poretools-0.6.0
    │   └── Dockerfile
    ├── m6anet-v1.1.0
    │   └── Dockerfile
    ├── nanocompore-1.0.3
    │   └── Dockerfile
    ├── ont_fast5_api-4.0.0
    │   └── Dockerfile
    ├── minimap2-2.24.0
    │   └── Dockerfile
    ├── nanopolish-0.8.4
    │   └── Dockerfile
    ├── differr-0.2
    │   └── Dockerfile
    ├── tombo-1.5.1
    │   └── Dockerfile
    ├── postprocessing-0.3
    │   └── Dockerfile
    ├── drummer-28_02_2022
    │   └── Dockerfile
    ├── yanocomp-0.2
    │   └── Dockerfile
    ├── mines-23_02_2022
    │   └── Dockerfile
    ├── nanom6a-22_10_2021
    │   └── Dockerfile
    ├── eligos-2.1.0
    │   └── Dockerfile
    ├── dena-23_02_2022
    │   └── Dockerfile
    ├── epinano-1.2
    │   └── Dockerfile
    └── nanodoc-28_02_2022
    │   └── Dockerfile
├── NanOlympicsMod_tutorial.pdf
├── Scripts
    ├── m6A_peaks_yeast_SK1_liftover.pdf
    ├── nextflowRun.sh
    ├── convert_m6a_to_sk1.py
    ├── statistical_analysis.R
    └── postprocessing.R
├── README.md
├── pipeline.conf
├── LICENSE
└── pipeline.nf


/NanOlympicsMod_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mfurla/NanOlympicsMod/HEAD/NanOlympicsMod_logo.png


--------------------------------------------------------------------------------
/Docker/xpore-2.0/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mfurla/NanOlympicsMod/HEAD/Docker/xpore-2.0/.DS_Store


--------------------------------------------------------------------------------
/NanOlympicsMod_tutorial.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mfurla/NanOlympicsMod/HEAD/NanOlympicsMod_tutorial.pdf


--------------------------------------------------------------------------------
/Scripts/m6A_peaks_yeast_SK1_liftover.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mfurla/NanOlympicsMod/HEAD/Scripts/m6A_peaks_yeast_SK1_liftover.pdf


--------------------------------------------------------------------------------
/Docker/poretools-0.6.0/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:22.04
2 | 
3 | MAINTAINER Mattia Furlan <mattia.furlan@iit.it>
4 | 
5 | RUN export DEBIAN_FRONTEND=noninteractive \
6 | 	&& apt-get update \
7 | 	&& apt update \
8 | 	&& apt-get install -y poretools


--------------------------------------------------------------------------------
/Docker/m6anet-v1.1.0/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:22.04
 2 | 
 3 | MAINTAINER Mattia Furlan <mattia.furlan@iit.it>
 4 | 
 5 | RUN apt-get update
 6 | 
 7 | RUN apt-get install -y pip git
 8 | 
 9 | RUN pip install m6anet
10 | 
11 | RUN git clone https://github.com/GoekeLab/m6anet.git
12 | 


--------------------------------------------------------------------------------
/Docker/xpore-2.0/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:22.04
 2 | 
 3 | MAINTAINER Mattia Furlan <mattia.furlan@iit.it>
 4 | 
 5 | RUN export DEBIAN_FRONTEND=noninteractive \
 6 |     && apt-get update -qq \
 7 |     && apt-get install -y \
 8 |     python3-pip
 9 | 
10 | RUN pip3 install xpore
11 | 


--------------------------------------------------------------------------------
/Docker/nanocompore-1.0.3/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:20.04
 2 | 
 3 | MAINTAINER Mattia Furlan <mattia.furlan@iit.it>
 4 | 
 5 | RUN export DEBIAN_FRONTEND=noninteractive \
 6 |     && apt-get update -qq \
 7 |     && apt-get install -y \
 8 |     python3-pip
 9 | 
10 | RUN pip3 install nanocompore==1.0.3
11 | 


--------------------------------------------------------------------------------
/Scripts/nextflowRun.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #PBS -S /bin/sh
 4 | #PBS -N NanOlympicsMod
 5 | #PBS -l select=1:ncpus=1:mem=2G
 6 | #PBS -M emailAddress
 7 | #PBS -m e
 8 | 
 9 | source /path/to/activate /path/to/nextflow/environment
10 | cd /path/to/pipeline/folder
11 | nextflow -c pipeline.conf run pipeline.nf -w /path/to/work/folder/
12 | source /path/to/deactivate
13 | 


--------------------------------------------------------------------------------
/Docker/ont_fast5_api-4.0.0/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:22.04
 2 | 
 3 | MAINTAINER Mattia Furlan <mattia.furlan@iit.it>
 4 | 
 5 | RUN export DEBIAN_FRONTEND=noninteractive \
 6 | 	&& apt-get update && \
 7 | 	apt-get -y install \
 8 | 		autoconf \
 9 | 		python3 \
10 | 		python3-pip \
11 | 		python3-numpy \
12 | 		python3-h5py \
13 | 		python3-six \
14 | 		python3-progressbar
15 | 
16 | RUN pip install ont-fast5-api
17 | 


--------------------------------------------------------------------------------
/Docker/minimap2-2.24.0/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:22.04
 2 | 
 3 | MAINTAINER Mattia Furlan <mattia.furlan@iit.it>
 4 | 
 5 | RUN apt-get update
 6 | 
 7 | RUN apt-get install -y curl bzip2 samtools
 8 | 
 9 | RUN curl -L https://github.com/lh3/minimap2/releases/download/v2.24/minimap2-2.24_x64-linux.tar.bz2 | tar jxf -
10 | 
11 | RUN mkdir /bin/minimap2 \
12 | 	&& mv minimap2-2.24_x64-linux/* /bin/minimap2 \
13 | 	&& rm -r minimap2-2.24_x64-linux


--------------------------------------------------------------------------------
/Docker/nanopolish-0.8.4/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:22.04
 2 | 
 3 | MAINTAINER Mattia Furlan <mattia.furlan@iit.it>
 4 | 
 5 | RUN export DEBIAN_FRONTEND=noninteractive \
 6 | 	&& apt-get update && \
 7 | 	apt-get -y install \
 8 | 		git \
 9 | 		build-essential \
10 | 		wget \
11 | 		libz-dev
12 | 
13 | RUN git clone --recursive https://github.com/jts/nanopolish.git \
14 | 	&& cd nanopolish \
15 | 	&& make
16 | 
17 | RUN mv /nanopolish /bin
18 | 


--------------------------------------------------------------------------------
/Docker/differr-0.2/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:22.04
 2 | 
 3 | MAINTAINER Mattia Furlan <mattia.furlan@iit.it>
 4 | 
 5 | RUN apt-get update
 6 | 
 7 | RUN export DEBIAN_FRONTEND=noninteractive \
 8 | 	&& apt-get install -y python3-pip wget
 9 | 
10 | RUN wget https://github.com/bartongroup/differr_nanopore_DRS/archive/refs/tags/0.2.tar.gz \
11 | 	&& tar -xvzf 0.2.tar.gz \
12 | 	&& rm 0.2.tar.gz
13 | 
14 | RUN cd differr_nanopore_DRS-0.2 \
15 | 	&& python3 setup.py build \
16 | 	&& python3 setup.py install


--------------------------------------------------------------------------------
/Docker/tombo-1.5.1/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | MAINTAINER Mattia Furlan <mattia.furlan@iit.it>
 4 | 
 5 | RUN apt-get update
 6 | 
 7 | RUN apt-get install -y wget
 8 | 
 9 | RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py37_4.10.3-Linux-x86_64.sh
10 | 
11 | RUN chmod 777 Miniconda3-py37_4.10.3-Linux-x86_64.sh
12 | 
13 | RUN ./Miniconda3-py37_4.10.3-Linux-x86_64.sh -b -p /bin/miniconda3
14 | 
15 | RUN /bin/miniconda3/bin/conda install --yes -c anaconda numpy=1.11.3
16 | 
17 | RUN /bin/miniconda3/bin/conda install --yes -c bioconda ont-tombo


--------------------------------------------------------------------------------
/Docker/postprocessing-0.3/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:22.04
 2 | 
 3 | MAINTAINER Mattia Furlan <mattia.furlan@iit.it>
 4 | 
 5 | ENV DEBIAN_FRONTEND=noninteractive
 6 | 
 7 | RUN apt-get update -qq && \        
 8 | 	apt-get install -y \
 9 | 	r-base \
10 | 	libcurl4-openssl-dev \
11 | 	libxml2-dev \
12 | 	libssl-dev
13 | 
14 | RUN R -e "install.packages('xml2')"
15 | RUN R -e "install.packages('BiocManager')"
16 | RUN R -e "BiocManager::install('IRanges')"  
17 | RUN R -e "BiocManager::install('GenomicRanges')" 
18 | RUN R -e "BiocManager::install('ensembldb')"
19 | RUN R -e "install.packages('pheatmap')"
20 | 


--------------------------------------------------------------------------------
/Docker/drummer-28_02_2022/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:22.04
 2 | 
 3 | MAINTAINER Mattia Furlan <mattia.furlan@iit.it>
 4 | 
 5 | RUN export DEBIAN_FRONTEND=noninteractive \
 6 |     && apt-get update -qq \
 7 |     && apt-get install -y \
 8 |     build-essential \
 9 |     wget \
10 |     unzip \
11 |     bzip2 \
12 |     git \
13 |     libidn11* \
14 |     python3-pip \
15 |     samtools \
16 |     bedtools \
17 |     && apt-get clean \
18 |     && rm -rf /var/lib/apt/lists/*
19 | 
20 | RUN pip3 install seaborn scipy pandas numpy biopython matplotlib statsmodels
21 | RUN git clone https://github.com/DepledgeLab/DRUMMER.git


--------------------------------------------------------------------------------
/Docker/yanocomp-0.2/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:20.04
 2 | 
 3 | MAINTAINER Mattia Furlan <mattia.furlan@iit.it>
 4 | 
 5 | RUN export DEBIAN_FRONTEND=noninteractive \
 6 |     && apt-get update -qq \
 7 |     && apt-get install -y \
 8 |     build-essential \
 9 |     wget
10 | 
11 | RUN wget https://github.com/bartongroup/yanocomp/archive/refs/tags/v0.2.tar.gz \
12 |     && tar -xvzf v0.2.tar.gz \
13 |     && rm v0.2.tar.gz
14 | 
15 | RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py38_4.10.3-Linux-x86_64.sh \
16 |     && chmod 777 Miniconda3-py38_4.10.3-Linux-x86_64.sh \
17 |     && ./Miniconda3-py38_4.10.3-Linux-x86_64.sh -b -p /bin/miniconda3 \
18 |     && /bin/miniconda3/bin/conda env create -f /yanocomp-0.2/yanocomp.yml


--------------------------------------------------------------------------------
/Docker/mines-23_02_2022/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | MAINTAINER Mattia Furlan <mattia.furlan@iit.it>
 4 | 
 5 | RUN export DEBIAN_FRONTEND=noninteractive \
 6 |         && apt-get update && \
 7 |         apt-get -y install \
 8 |         wget \
 9 |         git
10 |         
11 | RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py37_4.10.3-Linux-x86_64.sh
12 | 
13 | RUN chmod 777 Miniconda3-py37_4.10.3-Linux-x86_64.sh
14 | 
15 | RUN ./Miniconda3-py37_4.10.3-Linux-x86_64.sh -b -p /bin/miniconda3
16 | 
17 | RUN git clone https://github.com/YeoLab/MINES.git
18 | 
19 | RUN sed -i 's/1.1.1=h7b6447c_0/1.1.1/' /MINES/environment.yml
20 | 
21 | RUN /bin/miniconda3/bin/conda env create -f /MINES/environment.yml
22 | 
23 | ENV PATH=$PATH:/bin/miniconda3/envs/mines/bin/


--------------------------------------------------------------------------------
/Docker/nanom6a-22_10_2021/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:22.04
 2 | 
 3 | MAINTAINER Mattia Furlan <mattia.furlan@iit.it>
 4 | 
 5 | RUN apt-get update
 6 | 
 7 | RUN export DEBIAN_FRONTEND=noninteractive \
 8 | 	&& apt-get install -y wget python3-pip git libncurses5
 9 | 
10 | RUN pip3 install gdown
11 | 
12 | RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
13 | 	&& chmod 777 Miniconda3-latest-Linux-x86_64.sh \
14 | 	&& ./Miniconda3-latest-Linux-x86_64.sh -b -p /bin/miniconda3
15 | 
16 | RUN /bin/miniconda3/bin/conda install --yes -c hcc jvarkit-sam2tsv
17 | 
18 | RUN gdown https://drive.google.com/uc?id=18HtAc358ks0pcInwS1RUNhWT-vh6k608
19 | 
20 | RUN tar -xvzf nanom6A_2021_10_22.tar.gz \
21 | 	&& rm nanom6A_2021_10_22.tar.gz
22 | 
23 | ENV PATH=$PATH:/bin/miniconda3/bin/
24 | 
25 | RUN git clone https://github.com/broadinstitute/picard.git \
26 | 	&& cd picard/ \
27 | 	&& ./gradlew shadowJar


--------------------------------------------------------------------------------
/Docker/eligos-2.1.0/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | MAINTAINER Mattia Furlan <mattia.furlan@iit.it>
 4 | 
 5 | RUN apt-get update
 6 | 
 7 | RUN export DEBIAN_FRONTEND=noninteractive \
 8 | 	&& apt-get install -y wget
 9 | 
10 | RUN wget https://gitlab.com/piroonj/eligos2/-/archive/v2.1.0/eligos2-v2.1.0.tar.gz \
11 | 	&& tar -xvzf eligos2-v2.1.0.tar.gz \
12 | 	&& rm eligos2-v2.1.0.tar.gz
13 | 
14 | RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py37_4.10.3-Linux-x86_64.sh \
15 | 	&& chmod 777 Miniconda3-py37_4.10.3-Linux-x86_64.sh \
16 | 	&& ./Miniconda3-py37_4.10.3-Linux-x86_64.sh -b -p /bin/miniconda3
17 | 
18 | RUN /bin/miniconda3/bin/conda install --yes -c bioconda -c conda-forge -c anaconda python=3.6 pysam=0.13 pandas=0.23.4 pybedtools=0.8.0 bedtools=2.25 rpy2=2.8.5 r-base=3.4.1 tqdm=4.40.2 numpy=1.11.3
19 | 
20 | RUN /bin/miniconda3/bin/Rscript -e 'install.packages("samplesizeCMH", repos="https://cloud.r-project.org")'
21 | 
22 | ENV PATH=$PATH:/bin/miniconda3/bin/


--------------------------------------------------------------------------------
/Docker/dena-23_02_2022/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:20.04
 2 | 
 3 | RUN export DEBIAN_FRONTEND=noninteractive \
 4 |         && apt-get update && \
 5 |         apt-get -y install \
 6 |                 autoconf \
 7 |                 python3 \
 8 |                 python3-pip \
 9 |                 python3-tk \
10 |                 python3-numpy \
11 |                 python3-scipy \
12 |                 python3-pandas \
13 |                 git \
14 |                 wget \
15 |                 lsb-release \
16 |                 apt-transport-https \
17 |                 vim \
18 |                 zlib1g-dev \
19 |                 libbz2-dev \
20 |                 liblzma-dev \
21 |                 libcurl4-openssl-dev \
22 |                 libidn11 \
23 |                 curl \
24 |                 unzip
25 | 
26 | RUN pip3 install \
27 |     sklearn \
28 |     pysam \
29 |     torch \
30 |     torchvision \
31 |     ont-tombo
32 | 
33 | RUN git clone https://github.com/weir12/DENA.git
34 | 
35 | RUN wget https://qpoviq.DM.files.1drv.com/y4mUf8TBx4Ji7PjPRvldPKYBJyfbZSXxo7beR6J-m98d3oPYK3YVr_Ah6zFwtdeq_06pZ0uvCMH-jsRSq5yeg9FA80H-SE_uPamEiVLCDLMe4fqKkVz8z1eMLckE7yvrnLHdf37qAsMInIQVcussqVDw_nxoMCjh1giHfc7lW6HL76cG2c4bW6OViG_GWbbTJyMF3axQbkMqStR6M5Wlrh3NQ -O denaModels.zip \
36 | && mkdir DENA/denaModels/ \
37 | && mv denaModels.zip DENA/denaModels \
38 | && cd DENA/denaModels \
39 | && unzip denaModels.zip


--------------------------------------------------------------------------------
/Docker/epinano-1.2/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | MAINTAINER Mattia Furlan <mattia.furlan@iit.it>
 4 | 
 5 | RUN apt-get update
 6 | RUN apt update
 7 | 
 8 | RUN export DEBIAN_FRONTEND=noninteractive \
 9 | 	&& apt-get install -y wget python3-pip python3-venv gcc git default-jdk samtools curl libxml2-dev libssl-dev libcurl4-openssl-dev
10 | 
11 | RUN pip3 install --upgrade pip
12 | RUN pip3 install atomicwrites==1.4.0 attrs==21.2.0 biopython==1.76 cloudpickle==1.3.0
13 | RUN pip3 install dask==2.5.2 fsspec==2021.6.1 future==0.17.1 h5py==2.10.0 importlib-metadata==4.6.1
14 | RUN pip3 install locket==0.2.1  more-itertools==8.8.0 numpy==1.17.2 pandas==0.24.2
15 | RUN pip3 install partd==1.2.0 pluggy==0.13.1 py==1.10.0 pysam==0.15.4 pytest==4.4.1 python-dateutil==2.8.1
16 | RUN pip3 install pytz==2021.1 scikit-learn==0.20.2 scipy==1.5.4 six==1.16.0 toolz==0.11.1 typing-extensions==3.10.0.0 zipp==3.5.0
17 | 
18 | RUN wget https://github.com/novoalab/EpiNano/archive/refs/tags/Epinano1.2.1.tar.gz \
19 | 	&& tar -xvzf Epinano1.2.1.tar.gz \
20 | 	&& rm Epinano1.2.1.tar.gz
21 | 
22 | RUN git clone "https://github.com/lindenb/jvarkit.git" \
23 | 	&& cd jvarkit \
24 | 	&& ./gradlew sam2tsv
25 | 
26 | RUN sed -i 's/java -jar  {sam2tsv} -r {reference_file}/java -jar  {sam2tsv} -R {reference_file}/' /EpiNano-Epinano1.2.1/Epinano_Variants.py
27 | 
28 | RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py37_4.10.3-Linux-x86_64.sh \
29 | 	&& chmod 777 Miniconda3-py37_4.10.3-Linux-x86_64.sh \
30 | 	&& ./Miniconda3-py37_4.10.3-Linux-x86_64.sh -b -p /bin/miniconda3 \
31 | 	&& /bin/miniconda3/bin/conda install --yes -c conda-forge -c r r-base=3.6 r-rcurl r-outliers r-reshape2 \
32 | 	r-ggplot2 r-car r-ggrepel r-tidyverse r-optparse


--------------------------------------------------------------------------------
/Docker/nanodoc-28_02_2022/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | MAINTAINER Mattia Furlan <mattia.furlan@iit.it>
 4 | 
 5 | RUN export DEBIAN_FRONTEND=noninteractive \
 6 |     && apt-get update -qq \
 7 |     && apt-get install -y \
 8 |     build-essential \
 9 |     wget \
10 |     unzip \
11 |     bzip2 \
12 |     git \
13 |     libidn11* \
14 |     nano \
15 |     less \
16 |     bc \
17 |  && apt-get clean \
18 |  && rm -rf /var/lib/apt/lists/*
19 | 
20 | RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py37_4.10.3-Linux-x86_64.sh \
21 |     && chmod 777 Miniconda3-py37_4.10.3-Linux-x86_64.sh \
22 |     && ./Miniconda3-py37_4.10.3-Linux-x86_64.sh -b -p /bin/miniconda3
23 | 
24 | RUN /bin/miniconda3/bin/conda install -c bioconda -c anaconda -c pytorch pip faiss-cpu python=3.7 numpy=1.18
25 | 
26 | RUN git clone https://github.com/uedaLabR/nanoDoc.git
27 | 
28 | RUN sed -i 's/Bio==0.0.6/Bio==1.3.3/' /nanoDoc/src/requirements.txt
29 | RUN sed -i 's/biopython==1.72/biopython==1.79/' /nanoDoc/src/requirements.txt
30 | RUN sed -i 's/faiss-gpu==1.5.3//' /nanoDoc/src/requirements.txt
31 | RUN sed -i 's/numpy==1.16.2//' /nanoDoc/src/requirements.txt
32 | RUN sed -i 's/tqdm==4.31.1//' /nanoDoc/src/requirements.txt
33 | 
34 | RUN /bin/miniconda3/bin/pip install -r /nanoDoc/src/requirements.txt
35 | 
36 | RUN mkdir /nanoDoc/weight5mer && mv /nanoDoc/weight5mer_1/* /nanoDoc/weight5mer && mv /nanoDoc/weight5mer_2/* /nanoDoc/weight5mer && rm -r /nanoDoc/weight5mer_*
37 | 
38 | RUN sed -i 's/nanoDocAnalysis.modCall(wight,param, ref, refraw,tgraw, output, chrom, chrom, start, end, strand, minreadlen)/\n    try:\n        nanoDocAnalysis.modCall(wight,param, ref, refraw,tgraw, output, chrom, chrom, start, end, strand, minreadlen)\n    except OSError:\n        pass/' /nanoDoc/src/nanoDoc.py
39 | 
40 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # NanOlympicsMod
 2 | 
 3 | NanOlympicsMod is a Nextflow pipeline for running multiple m6A detection tools based on Nanopore direct RNA sequencing data.
 4 | 
 5 | <p align="center">
 6 |   <img src="NanOlympicsMod_logo.png" alt="drawing" width=200" title="NanOlympicsMod_logo">
 7 | </p>
 8 | 
 9 | ## Repository content
10 | 
11 | * Docker: folder containing the Dockerfiles to assemble all the images required by the pipeline
12 | * Scripts: folder containing a bash script to run the nextflow pipeline, a set of R scripts for data post-processing and statistical analysis, and a python script to lift-over m6A peaks to SK1 reference genome
13 | * pipeline.nf: nextflow pipeline main script
14 | * pipeline.conf: nextflow pipeline configuration file
15 | * NanOlympicsMod_tutorial.pdf: tutorial describing how to add a tool to the NanOlympicsMod pipeline
16 |                                                                                          
17 | ## Getting started
18 | 
19 | **Prerequisites**
20 | 
21 | * [Nextflow](https://nf-co.re/usage/installation)
22 | * [Singularity](https://sylabs.io/guides/3.0/user-guide/installation.html)                                                                                  
23 |                                                                                    
24 | **Installation**
25 | 
26 | ```
27 | git clone https://github.com/mfurla/NanOlympicsMod.git
28 | cd NanOlympicsMod
29 | chmod 755 *
30 | ```
31 | 
32 | ## Usage
33 | 
34 | The NanOlympicsMod pipeline requires you to open pipeline.conf configuration file and set the desired options.
35 | ```
36 |    Usage:
37 |    nextflow -c pipeline.conf run pipeline.nf --samples="/path/to/samples.txt" --resultsDir="/path/to/resultsDir" 
38 |    Mandatory arguments which may be specified in the pipeline.conf file
39 | 
40 | --samples                                                Path to the tab-separated sample file including sample name, condition and path to base-called fast5 folder
41 | --test_condition                                         Condition that we are interested to profile (e. g. 'WT')
42 | --resultsDir                                             Path to a folder where to store results
43 | --fast5_slot                                             FAST5 slot containing the basecalled bases
44 | --fast5_slot_id                                          FAST5 slot containing the basecalled bases (redundant)
45 | --tombo_slot                                             FAST5 slot containing the resquiggled data
46 | --tombo_subslot                                          FAST5 slot containing the resquiggled data
47 | --transcriptome_fasta                                    Path to the transcriptome fasta file
48 | --transcriptome_fai                                      Path to the transcriptome fasta index file
49 | --genome_fasta                                           Path to the genome fasta file
50 | --genome_fai                                             Path to the genome fasta index file
51 | --genes2transcripts                                      Path to gene-to-transcripts file for Nanom6A
52 | --transcriptomebed                                       Path to transcripts bed12 file
53 | --genesbed                                               Path to genes bed file
54 | --gtf                                                    Path to genome annotation gtf file
55 | --nanom6AP                                               nanom6A probability thresholds for PR curve plotting
56 | --yanocompFDR                                            yanocomp FDR threshold
57 | --differrFDR                                             differr FDR threshold
58 | --drummerPval                                            drummer Pvalue threshold
59 | --epinanoErrorSumErr                                     epinanoError threshold sum of errors
60 | --epinanoErrorResiduals                                  epinanoError threshold residuals
61 | --postprocessingScript                                   Path to postprocessing R script
62 | --statisticalAnalysis                                    Path to statistical_analysis R script
63 | --binLength                                              Size of windows for genome binning
64 | --threshold                                              Set of thresholds to use for the filtering of m6A sites (choose between 'default' and 'relaxed') 
65 | --peaksfile                                              Path to bed file with set of m6A gold-standard peaks
66 | ```
67 | 
68 | ## Citation
69 | 
70 | If this tool is useful for your work, please consider citing our [manuscript](https://academic.oup.com/bib/article/25/2/bbae001/7590315).
71 | 
72 | Maestri S, Furlan M, Mulroney L, et al. Benchmarking of computational methods for m6A profiling with Nanopore direct RNA sequencing. Brief Bioinform. 2024;25(2):bbae001. doi:10.1093/bib/bbae001
73 | 


--------------------------------------------------------------------------------
/Scripts/convert_m6a_to_sk1.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import argparse, sys, collections
  4 | import numpy as np
  5 | 
  6 | ###############################################################################
  7 | def parse_args(args):
  8 |     '''
  9 |     parses command line arguments
 10 |     '''
 11 |     parser = argparse.ArgumentParser(description ='Performs lift over from m6A-seq and MAZTER-seq reference to sk1 reference')
 12 | 
 13 |     parser.add_argument('--bed', '-b', type = str, help = 'input sk1 bed12 file')
 14 | 
 15 |     parser.add_argument('--annotation', '-a', type = str, help = 'Schwartz transcriptome annotation table file (Supp Table 5)')
 16 | 
 17 |     parser.add_argument('--m6a', '-m', type = str, help = 'Schwartz m6a sites table file (Supp Table 1)')
 18 | 
 19 |     parser.add_argument('--mazter', '-i', type = str, help = 'Schwartz mazter m6a sites table file (Supp Table 4)')
 20 | 
 21 |     parser.add_argument('--outfile', '-o', type = str, help = 'output bed file')
 22 | 
 23 |     return parser.parse_args()
 24 | ###############################################################################
 25 | 
 26 | ###############################################################################
 27 | class Annotation:
 28 |     def __init__(self, name, chrom, start, stop, strand):
 29 |         self.name = name
 30 |         self.start = start
 31 |         self.stop = stop
 32 |         self.strand = strand
 33 |         self.chrom = chrom
 34 | ###############################################################################
 35 | 
 36 | ###############################################################################
 37 | def parse_schwartz_annotation(infile):
 38 |     '''
 39 |     '''
 40 |     ref_coords = collections.defaultdict()
 41 |     for gene_symbol, chrom, start, stop, strand in read_schwartz_annotation_tsv(infile):
 42 |         ref_coords[gene_symbol] = Annotation(gene_symbol, chrom, start, stop, strand)
 43 | 
 44 |     return ref_coords
 45 | ###############################################################################
 46 | 
 47 | ###############################################################################
 48 | def read_schwartz_annotation_tsv(infile):
 49 |     '''
 50 |     Generator script used to parse the tsv version of Supp Table 5 from Schwartz et al Cell 2013
 51 |     input : tsv file
 52 |     yeilds the gene_symbol, chr name, CDS start coordinate, CDS end coordinate, and the strand
 53 |     for each annotated gene in the table
 54 |     '''
 55 |     with open(infile, 'r') as tsv:
 56 |         #Reads the header line
 57 |         line = tsv.readline()
 58 |         #Begins reading the data lines in the tsv
 59 |         for line in tsv:
 60 |             line = line.strip().split('\t')
 61 |             gene_symbol = line[0].strip()
 62 |             chrom = line[1].strip()
 63 |             strand = line[6].strip()
 64 |             try:
 65 |                 cds_start = int(line[4].strip())
 66 |             except:
 67 |                 cds_start = float('NaN')
 68 | 
 69 |             try:
 70 |                 cds_end = int(line[5].strip())
 71 |             except:
 72 |                 cds_end = float('NaN')
 73 | 
 74 |             yield gene_symbol, chrom, cds_start, cds_end, strand
 75 | ###############################################################################
 76 | 
 77 | ##############################################################################
 78 | class Sites:
 79 |     def __init__(self, name, chrom, site):
 80 |         self.name = name
 81 |         self.chrom = chrom
 82 |         self.site = site
 83 | ###############################################################################
 84 | 
 85 | ###############################################################################
 86 | def parse_m6a_tsv(infile, parse_type=''):
 87 |     '''
 88 |     A switch function that determines which of the two m6A site files to read (m6A-seq or MAZTER-seq)
 89 |     and uses the correct reading function. Then converts the site information from either file type into
 90 |     a single unified site object so that the data can be used in the same way for both file types
 91 |     '''
 92 |     m6a_coords = collections.defaultdict(list)
 93 | 
 94 |     if parse_type == 'sites':
 95 |         for gene_symbol, chrom, peak in read_m6a_tsv(infile):
 96 |             m6a_coords[gene_symbol].append(Sites(gene_symbol, chrom, peak))
 97 | 
 98 |     elif parse_type == 'mazter':
 99 |         for gene_symbol, chrom, group, site in read_mazter_tsv(infile):
100 |             #Selects MAZTER-seq sites that have a confidence group greater than 1
101 |             #This was done as in Leger et al Nature Com 2021
102 |             if group > 1:
103 |                 m6a_coords[gene_symbol].append(Sites(gene_symbol, chrom, site))
104 |     else:
105 |         sys.stderr.write("No parse type was specified")
106 | 
107 |     return m6a_coords
108 | ###############################################################################
109 | 
110 | ###############################################################################
111 | def read_m6a_tsv(infile):
112 |     '''
113 |     Generator script used to parse the tsv version of Supp Table 1 from Schwartz et al Cell 2013
114 |     input : tsv file
115 |     yeilds the gene_symbol, chr name, and the m6A peak site for each site in the table
116 |     '''
117 |     with open(infile, 'r') as tsv:
118 |         line = tsv.readline()
119 |         for line in tsv:
120 |             line = line.strip().split('\t')
121 |             gene_symbol = line[0].strip()
122 |             chrom = line[2].strip()
123 | 
124 |             try:
125 |                 peak = int(line[3].strip())
126 |             except:
127 |                 peak = float('NaN')
128 | 
129 |             yield gene_symbol, chrom, peak
130 | ###############################################################################
131 | 
132 | ###############################################################################
133 | def read_mazter_tsv(infile):
134 |     '''
135 |     Generator script used to parse the tsv version of Supp Table 4 from Garcia-Campos et al Cell 2019
136 |     input : tsv file
137 |     yeilds the gene_symbol, chr name, confidence group, and the m6A peak site for each site in the table
138 |     '''
139 |     with open(infile, 'r') as tsv:
140 |         line = tsv.readline()
141 |         for line in tsv:
142 |             line = line.strip().split('\t')
143 |             chrom = line[1].strip()
144 |             gene_symbol = line[23].strip()
145 |             try:
146 |                 confidence_group = int(line[22].strip())
147 |             except:
148 |                 confidence_group = float('NaN')
149 | 
150 |             try:
151 |                 peak = int(line[2].strip())
152 |             except:
153 |                 peak = float('NaN')
154 | 
155 |             yield gene_symbol, chrom, confidence_group, peak
156 | ###############################################################################
157 | 
158 | ###############################################################################
159 | def calc_indexes(site_coords, annotations):
160 |     '''
161 |     Converts the m6A site from the reference coordinates used by Schwartz et al Cell 2013 
162 |     to the sk1 MV0 reference genome coordinates based on the distance between the start codons
163 |     '''
164 |     indexes = collections.defaultdict(list)
165 | 
166 |     site_genes = set(site_coords.keys())
167 |     annotations_genes = set(annotations.keys())
168 |     genes = site_genes.intersection(annotations_genes)
169 | 
170 |     for gene in genes:
171 |         site_data = site_coords[gene]
172 |         ref_data = annotations[gene]
173 | 
174 |         for site in site_data:
175 |             index = site.site - ref_data.start
176 |             indexes[gene].append(index)
177 | 
178 |     return indexes
179 | ###############################################################################
180 | 
181 | ###############################################################################
182 | def convert_m6a_coords(m6a_indexes, mazter_indexes, inbed, outfile):
183 |     '''
184 |     '''
185 | 
186 |     with open(outfile, 'w') as out:
187 |         with open(inbed) as bed:
188 |             for line in bed:
189 |                 line = line.strip().split('\t')
190 | 
191 |                 chrom = line[0].strip()
192 |                 start = int(line[1])
193 |                 gene = line[3].strip().rsplit('_', 1)[0]
194 |                 score = line[4].strip()
195 |                 strand = line[5].strip()
196 | 
197 |                 if gene in m6a_indexes:
198 |                     for site in m6a_indexes[gene]:
199 |                         peak = start + site
200 |                         m6a_start = str(peak-1)
201 |                         m6a_stop = str(peak)
202 |                         outstring = f'{chrom}\t{m6a_start}\t{m6a_stop}\t{gene}_m6a-seq\t{score}\t{strand}'
203 |                         out.write(f'{outstring}\n')
204 | 
205 |                 if gene in mazter_indexes:
206 |                     for site in mazter_indexes[gene]:
207 |                         #Remove the comment and tab the remaining code if you don't want to keep MAZTER-seq dublicates
208 |                         #if site not in m6a_indexes[gene]:
209 |                         peak = start + site
210 |                         m6a_start = str(peak-1)
211 |                         m6a_stop = str(peak)
212 |                         outstring = f'{chrom}\t{m6a_start}\t{m6a_stop}\t{gene}_mazter\t{score}\t{strand}'
213 |                         out.write(f'{outstring}\n')
214 | ###############################################################################
215 | 
216 | ###############################################################################
217 | def main(args):
218 |     #Parse the inputs args/options
219 |     options = parse_args(args)
220 | 
221 |     annotations = parse_schwartz_annotation(options.annotation)
222 |     m6a_sites = parse_m6a_tsv(options.m6a, "sites")
223 |     mazter_sites = parse_m6a_tsv(options.mazter, 'mazter')
224 | 
225 |     m6a_indexes = calc_indexes(m6a_sites, annotations)
226 |     mazter_indexes = calc_indexes(mazter_sites, annotations)
227 | 
228 |     convert_m6a_coords(m6a_indexes, mazter_indexes, options.bed, options.outfile)
229 | ###############################################################################
230 | 
231 | if (__name__ == "__main__"):
232 |     main(sys.argv)
233 |     raise SystemExit
234 | 
235 | 


--------------------------------------------------------------------------------
/pipeline.conf:
--------------------------------------------------------------------------------
  1 |  params{
  2 | 	// Path to the sample description file
  3 |  	samples = "/path/to/samples.txt"
  4 | 
  5 |  	// This must match one of the conditions in the samples file
  6 | 	test_condition = "WT"
  7 | 
  8 | 	// Path to a folder where to store results
  9 | 	resultsDir = "/path/to/resultsDir"
 10 | 
 11 | 	// FAST5 slot containing the basecalled bases - WARNING: THE SECOND PARAMETER IS REDUNDANT
 12 | 	fast5_slot = "Basecall_1D_000"
 13 | 	fast5_slot_id = "000"
 14 | 
 15 | 	// FAST5 slot containing the resquiggle data
 16 | 	tombo_slot = "RawGenomeCorrected_000"
 17 | 	tombo_subslot = "BaseCalled_template"
 18 | 
 19 | 	// Path to the transcriptome fasta
 20 | 	transcriptome_fasta = "/path/to/transcriptome.fa"
 21 | 	
 22 | 	// Path to the transcriptome fasta index - WARNING: THIS SHOULD BE CREATED IN A DEDICATED PROCESS
 23 | 	transcriptome_fai = "/path/to/transcriptome.fa.fai"
 24 | 
 25 | 	// Path to the genome fasta
 26 | 	genome_fasta = "/path/to/genome.fa"
 27 | 	
 28 | 	// Path to the genome fasta index - WARNING: THIS SHOULD BE CREATED IN A DEDICATED PROCESS
 29 | 	genome_fai = "/path/to/genome.fa.fai"
 30 | 
 31 | 	// Gene to transcripts file for Nanom6A - WARNING: THIS SHOULD BE CREATED IN A DEDICATED PROCESS
 32 | 	genes2transcripts = "/path/to/file.txt"
 33 | 
 34 | 	// Bed files
 35 | 	transcriptomebed = "/path/to/transcriptome.bed"
 36 | 	genesbed = "/path/to/genes.bed"
 37 | 
 38 | 	// Gtf file
 39 | 	gtf = "/path/to/file.gtf"
 40 | 
 41 | 	// nanom6A probabilities
 42 | 	nanom6AP = "0.5 0.6 0.7 0.8 0.9 0.99"
 43 | 
 44 | 	// yanocomp FDR threshold
 45 | 	yanocompFDR = 1
 46 | 
 47 | 	// differr FDR threshold
 48 | 	differrFDR = 1
 49 | 
 50 | 	// drummer Pvalue threshold
 51 | 	drummerPval = 1
 52 | 
 53 | 	// epinanoError threshold sum of errors
 54 | 	epinanoErrorSumErr = 0
 55 | 
 56 | 	// epinanoError threshold residuals
 57 | 	epinanoErrorResiduals = 3
 58 | 	
 59 | 	// Path to post-processing R script
 60 | 	postprocessingScript = "/path/to/postprocessing.R"
 61 | 	
 62 | 	// Path to statystical analysis R script
 63 |         statisticalAnalysis = "/path/to/statistical_analysis.R"
 64 | 
 65 | 	// Setting of threshold that each tool is going to use for the detection of m6A site
 66 | 	threshold = "relaxed"
 67 | 	
 68 | 	// Bin length
 69 | 	binLength = 50
 70 | 
 71 | 	// File with set of m6A positive sites 
 72 | 	peaksfile = "/path/to/file.bed"
 73 | 
 74 | 	// Flags to select which process to run
 75 | 	multi2single = true
 76 | 	fastq = true
 77 | 	minimap2 = true
 78 | 	minimap2Merge = true
 79 | 	tombo1 = true
 80 | 	tombo2 = true
 81 | 	tombo3 = true
 82 | 	nanom6a = true
 83 | 	differr = true
 84 | 	eligos = true
 85 | 	mines = true
 86 | 	dena = true
 87 | 	epinanoSVM = true
 88 | 	epinanoError = true
 89 | 	nanodoc = false
 90 | 	drummer = true
 91 | 	nanopolish1 = true
 92 | 	xpore = true
 93 | 	nanocompore1 = true
 94 | 	nanocompore2 = true
 95 | 	m6anet1 = true
 96 | 	m6anet2 = true
 97 | 	yanocomp1 = true
 98 | 	yanocomp2 = true
 99 | 	postprocessing = true
100 | }
101 | 
102 | singularity {
103 | 	enabled = true
104 | 	autoMounts = false
105 | 	cacheDir = "/path/to/singularity/cache/"
106 | }
107 | 
108 | tower {
109 | 	enabled = false
110 | 	endpoint = '-'
111 | 	accessToken = 'nextflowTowerToken'
112 | }
113 | 
114 | process{
115 | 	cpus = 1
116 | 	executor = 'pbspro'
117 | 	queue = 'workq'
118 | 	perJobMemLimit = true
119 |         containerOptions = '--bind /path/to/work/dir/:/path/to/work/dir/'
120 | 	withName:multi2single{
121 | 		container = 'bproject/ont_fast5_api:v1'
122 | 		cpus = { params.multi2single ? 6 : 1 }
123 | 		memory = { params.multi2single ? 5.GB + (2.GB * (task.attempt-1)) : 1.GB }
124 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
125 | 		maxRetries = 3
126 | 	}
127 | 	withName:fastq{
128 | 		container = 'biocontainers/poretools:v0.6.0dfsg-3-deb_cv1'
129 | 		cpus = { params.fastq ? 6 : 1 }
130 | 		memory = { params.fastq ? 10.GB + (2.GB * (task.attempt-1)) : 1.GB }
131 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
132 | 		maxRetries = 3
133 | 	}
134 | 	withName:minimap2{
135 | 		container = 'bproject/minimap2:v1'
136 | 		cpus = { params.minimap2 ? 6 : 1 }
137 | 		memory = { params.minimap2 ? 10.GB + (2.GB * (task.attempt-1)) : 1.GB }
138 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
139 | 		maxRetries = 3
140 | 	}
141 | 	withName:minimap2Merge{
142 | 		container = 'bproject/minimap2:v1'
143 | 		cpus = { params.minimap2Merge ? 6 : 1 }
144 | 		memory = { params.minimap2Merge ? 5.GB + (2.GB * (task.attempt-1)) : 1.GB }
145 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
146 | 		maxRetries = 3
147 | 	}
148 | 	withName:tombo1{
149 | 		container = 'bproject/tombo:v4'
150 | 		cpus = { params.tombo1 ? 5 : 1 }
151 | 		memory = { params.tombo1 ? 5.GB + (2.GB * (task.attempt-1)) : 1.GB }
152 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
153 | 		maxRetries = 3
154 | 	}
155 | 	withName:tombo2{
156 | 		container = 'bproject/tombo:v4'
157 | 		cpus = { params.tombo2 ? 6 : 1 }
158 | 		memory = { params.tombo2 ? 5.GB + (2.GB * (task.attempt-1)) : 1.GB }
159 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
160 | 		maxRetries = 3
161 | 	}
162 | 	withName:tombo3{
163 | 		container = 'bproject/tombo:v4'
164 | 		cpus = { params.tombo3 ? 6 : 1 }
165 | 		memory = { params.tombo3 ? 5.GB + (2.GB * (task.attempt-1)) : 1.GB }
166 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
167 | 		maxRetries = 3
168 | 	}
169 | 	withName:nanom6a{
170 | 		container = 'bproject/nanom6a:v2'
171 | 		cpus = { params.nanom6a ? 6 : 1 }
172 | 		memory = { params.nanom6a ? 10.GB + (2.GB * (task.attempt-1)) : 1.GB }
173 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
174 | 		maxRetries = 3
175 | 	}
176 | 	withName:differr{
177 | 		container = 'bproject/differr:v1'
178 | 		cpus = { params.differr ? 6 : 1 }
179 | 		memory = { params.differr ? 5.GB + (2.GB * (task.attempt-1)) : 1.GB }
180 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
181 | 		maxRetries = 3
182 | 	}
183 | 	withName:eligos{
184 | 		container = 'bproject/eligos:v1'
185 | 		cpus = { params.eligos ? 6 : 1 }
186 | 		memory = { params.eligos ? 10.GB + (2.GB * (task.attempt-1)) : 1.GB }
187 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
188 | 		maxRetries = 3
189 | 	}
190 | 	withName:mines{
191 | 		container = 'bproject/mines:v1'
192 | 		cpus = { params.mines ? 6 : 1 }
193 | 		memory = { params.mines ? 5.GB + (2.GB * (task.attempt-1)) : 1.GB }
194 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
195 | 		maxRetries = 3
196 | 	}
197 | 	withName:dena{
198 | 		container = 'bproject/dena:v1'
199 | 		cpus = { params.dena ? 6 : 1 }
200 | 		memory = { params.dena ? 20.GB + (2.GB * (task.attempt-1)) : 1.GB }
201 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
202 | 		maxRetries = 3
203 | 	}
204 | 	withName:epinanoSVM{
205 | 		container = 'bproject/epinano:v1'
206 | 		cpus = { params.epinanoSVM ? 6 : 1 }
207 | 		memory = { params.epinanoSVM ? 15.GB + (2.GB * (task.attempt-1)) : 1.GB }
208 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
209 | 		maxRetries = 3
210 | 	}
211 | 	withName:epinanoError{
212 | 		container = 'bproject/epinano:v1'
213 | 		cpus = { params.epinanoError ? 6 : 1 }
214 | 		memory = { params.epinanoError ? 15.GB + (2.GB * (task.attempt-1)) : 1.GB }
215 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
216 | 		maxRetries = 3
217 | 	}
218 | 	withName:nanodoc{
219 | 		container = 'bproject/nanodoc:v2'
220 | 		cpus = { params.nanodoc ? 6 : 1 }
221 | 		memory = { params.nanodoc ? 20.GB + (2.GB * (task.attempt-1)) : 1.GB }
222 | 	}
223 | 	withName:drummer{
224 | 		container = 'bproject/drummer:v1'
225 | 		cpus = { params.drummer ? 3 : 1 }
226 | 		memory = { params.drummer ? 10.GB + (2.GB * (task.attempt-1)) : 1.GB }
227 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
228 | 		maxRetries = 3
229 | 	}
230 | 	withName:nanopolish1{
231 | 		container = 'bproject/nanopolish:v1'
232 | 		cpus = { params.nanopolish1 ? 6 : 1 }
233 | 		memory = { params.nanopolish1 ? 5.GB + (2.GB * (task.attempt-1)) : 1.GB }
234 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
235 | 		maxRetries = 3
236 | 	}
237 | 	withName:xpore1{
238 | 		container = 'bproject/xpore:v1'
239 | 		cpus = { params.xpore ? 3 : 1 }
240 | 		memory = { params.xpore ? 5.GB + (2.GB * (task.attempt-1)) : 1.GB }
241 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
242 | 		maxRetries = 3
243 | 	}
244 | 	withName:xpore2{
245 | 		container = 'bproject/xpore:v1'
246 | 		cpus = { params.xpore ? 3 : 1 }
247 | 		memory = { params.xpore ? 5.GB + (2.GB * (task.attempt-1)) : 1.GB }
248 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
249 | 		maxRetries = 3
250 | 	}
251 | 	withName:m6anet1{
252 | 		container = 'bproject/m6anet:v1'
253 | 		cpus = { params.m6anet1 ? 3 : 1 }
254 | 		memory = { params.m6anet1 ? 5.GB + (2.GB * (task.attempt-1)) : 1.GB }
255 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
256 | 		maxRetries = 3
257 | 	}
258 | 	withName:m6anet2{
259 | 		container = 'bproject/m6anet:v1'
260 | 		cpus = { params.m6anet2 ? 3 : 1 }
261 | 		memory = { params.m6anet2 ? 5.GB + (2.GB * (task.attempt-1)) : 1.GB }
262 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
263 | 		maxRetries = 3
264 | 	}
265 | 	withName:nanocompore1{
266 | 		container = 'bproject/nanocompore:v1'
267 | 		cpus = { params.nanocompore1 ? 7 : 1 }
268 | 		memory = { params.nanocompore1 ? 10.GB + (2.GB * (task.attempt-1)) : 1.GB }
269 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
270 | 		maxRetries = 3
271 | 	}
272 | 	withName:nanocompore2{
273 | 		container = 'bproject/nanocompore:v1'
274 | 		cpus = { params.nanocompore2 ? 7 : 1 }
275 | 		memory = { params.nanocompore2 ? 10.GB + (2.GB * (task.attempt-1)) : 1.GB }
276 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
277 | 		maxRetries = 3
278 | 	}
279 | 	withName:yanocomp1{
280 | 		container = 'bproject/yanocomp:v1'
281 | 		cpus = { params.yanocomp1 ? 3 : 1 }
282 | 		memory = { params.yanocomp1 ? 10.GB + (2.GB * (task.attempt-1)) : 1.GB }
283 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
284 | 		maxRetries = 3
285 | 	}
286 | 	withName:yanocomp2{
287 | 		container = 'bproject/yanocomp:v1'
288 | 		cpus = { params.yanocomp2 ? 3 : 1 }
289 | 		memory = { params.yanocomp2 ? 20.GB + (2.GB * (task.attempt-1)) : 1.GB }
290 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
291 | 		maxRetries = 3
292 | 	}
293 | 	withName:postprocessing{
294 | 		container = 'bproject/postprocessing:v3'
295 | 		cpus = { params.postprocessing ? 3 : 1 }
296 | 		memory = { params.postprocessing ? 10.GB + (2.GB * (task.attempt-1)) : 1.GB }
297 | 		errorStrategy = { task.exitStatus == 130 ? 'retry' : 'terminate' }
298 | 		maxRetries = 3
299 | 	}
300 | 
301 | }
302 | 


--------------------------------------------------------------------------------
/Scripts/statistical_analysis.R:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env Rscript
  2 | ### load input variables ###
  3 | args = commandArgs(trailingOnly=TRUE)
  4 | 
  5 | for(v in args)
  6 | {
  7 |   vTmp <- strsplit(v,"=")[[1]]
  8 |   assign(vTmp[[1]],vTmp[[2]])
  9 | }
 10 | 
 11 | #avoid scientific notation
 12 | options(scipen = 100)
 13 | 
 14 | # Libraries
 15 | library(GenomicRanges)
 16 | library(GenomicAlignments)
 17 | library(GenomicFeatures)
 18 | library(parallel)
 19 | library(PRROC)
 20 | library(stringr)
 21 | library(Biostrings)
 22 | library(pheatmap)
 23 | 
 24 | ## 1) Load all the output files from the output directory
 25 | files <- list.files(bed_folder, full.names = TRUE, pattern = "\\.bed") # output_directory parameter from outside
 26 | 
 27 | listmax <- paste0(c("DENA", "EpiNano-Error", "EpiNano-SVM", "NanoDoc", "m6Anet"), collapse = "|") # for these tools we need to maximize the filtering paramenter when there are more than 1 in a bin
 28 | listmin <- paste0(c("DiffErr", "DRUMMER", "Yanocomp", "Nanocompore", "ELIGOS", "xPore", "Tombo"), collapse = "|") # for these tools we need to minimize the filtering parameter
 29 | 
 30 | threshold_default <- c(0.1, 0.05, 0.05, 0.05, 0.01, 0.0001, 0.1, 0.5, 0.05, 0.02, 0.05, 0.9)
 31 | names(threshold_default) <- c("DENA", "DiffErr", "DRUMMER", "Yanocomp", "Nanocompore", "ELIGOS", 
 32 |                               "EpiNano-Error", "EpiNano-SVM", "xPore", "NanoDoc", "Tombo", "m6Anet")
 33 | 
 34 | chrs <- readDNAStringSet(genomefile, format="fasta")
 35 | RRACH_plus <- GRanges(vmatchPattern(pattern = "RRACH", subject = chrs, fixed = "subject"), strand = "+")
 36 | RRACH_minus <- GRanges(vmatchPattern(pattern = "DGTYY", subject = chrs, fixed = "subject"), strand = "-")
 37 | RRACH <- c(RRACH_plus, RRACH_minus)
 38 | RRACH_bed <- cbind(as.data.frame(seqnames(RRACH)), start(RRACH), end(RRACH), as.data.frame(strand(RRACH)))
 39 | colnames(RRACH_bed) <- c("chr", "start", "end", "strand")
 40 | #write.table(RRACH_bed, file = "RRACH_coords.bed", sep = "\t", quote = F, row.names = F)
 41 | 
 42 | ## 2) Binning the genome in windows of length w
 43 | w <- as.numeric(binLength) # binLength parameter from outside
 44 | genesBed <- read.table(genesbed, sep = "\t") # genesbed parameter from outside
 45 | colnames(genesBed) <- c("chr","start","end","name","score","strand")
 46 | 
 47 | genesBinsList <- mclapply(1:nrow(genesBed),function(k) 
 48 | {
 49 |   i <- genesBed[k,]
 50 |   
 51 |   lTmp <- (i$end - i$start)+1
 52 |   nTmp <- floor(lTmp/w)
 53 |   
 54 |   swTmp <- lTmp - (nTmp*w)
 55 |   
 56 |   if(lTmp<=w)
 57 |   {
 58 |     grangeTmp <- GRanges(seqnames=i$chr,ranges=IRanges(i$start,i$end),strand=i$strand)
 59 |     names(grangeTmp) <- paste0(i$name,"-B1")
 60 |     grangeTmp
 61 |   }else{
 62 |     if(as.character(i$strand)=="+")
 63 |     {
 64 |       if(swTmp!=0)
 65 |       {
 66 |         breaksTmp <- c(1,seq(from=swTmp,to=lTmp,by=w))+(i$start-1)
 67 |       }else{
 68 |         breaksTmp <- seq(from=swTmp,to=lTmp,by=w)+(i$start-1)
 69 |       }
 70 |       grangeTmp <- GRanges(seqnames=i$chr,ranges=IRanges(breaksTmp[-length(breaksTmp)],breaksTmp[-1]-1),strand=i$strand)
 71 |       names(grangeTmp) <- paste0(i$name,"-B",seq_along(breaksTmp[-1]-1))
 72 |       grangeTmp
 73 |     }else{
 74 |       if(swTmp!=0)
 75 |       {
 76 |         breaksTmp <- c(seq(from=1,to=(lTmp-swTmp+1),by=w),lTmp)+(i$start-1)
 77 |       }else{
 78 |         breaksTmp <- seq(from=1,to=(lTmp-swTmp+1),by=w)+(i$start-1)
 79 |       }
 80 |       grangeTmp <- GRanges(seqnames=i$chr,ranges=IRanges(breaksTmp[-length(breaksTmp)],breaksTmp[-1]-1),strand=i$strand)
 81 |       names(grangeTmp) <- paste0(i$name,"-B",seq_along(breaksTmp[-1]-1))
 82 |       grangeTmp
 83 |     }
 84 |   }
 85 | }, mc.cores=as.numeric(mccores))
 86 | 
 87 | genesBins <- unlist(as(genesBinsList,"GRangesList"))
 88 | genesBins <- genesBins[which(width(genesBins) == w), ] # Remove bins without length equal to w
 89 | 
 90 | ###### Find bins with RRACH motifs
 91 | RRACH_granges <- unique(sort(makeGRangesFromDataFrame(RRACH_bed)))
 92 | # Peaks
 93 | peaks_bed <- read.table(peaks, header = TRUE, sep = "\t") # gold-standard peaks file parameter from outside
 94 | colnames(peaks_bed) <- c("chr", "start", "end", "desc", "score", "strand")
 95 | peaks_granges <- unique(sort(makeGRangesFromDataFrame(peaks_bed)))
 96 | ###### Find bins with RRACH motifs
 97 | RRACH_overlap <- findOverlaps(query = RRACH_granges, subject = genesBins, minoverlap = min(5, w), type = "any")
 98 | genesBins_RRACH <- unique(sort(genesBins[subjectHits(RRACH_overlap)]))
 99 | # Peaks
100 | RRACH_overlap_genesBins_peaks <-  findOverlaps(query = genesBins_RRACH, subject = peaks_granges, minoverlap = 1, type = "any")
101 | peaks_RRACH_granges <- genesBins_RRACH[sort(unique(queryHits(RRACH_overlap_genesBins_peaks)))]
102 | 
103 | ##### Find bins with high coverage
104 | 
105 | if (!exists("highcov_bed_file")) {
106 |   highcov_GRanges <- NULL
107 | } else {
108 |   highcov_bed <- read.table(highcov_bed_file, sep = "\t", header = FALSE)
109 |   highcov_GRanges <- GRanges(seqnames = highcov_bed[, 1],
110 |                              ranges = IRanges(start = highcov_bed[, 2], end = highcov_bed[, 3]),
111 |                              strand = highcov_bed[, 6])
112 |   names(highcov_GRanges) <- highcov_bed[, 4]
113 |   highcov_overlap <- findOverlaps(query = highcov_GRanges, subject = genesBins, minoverlap = 1, type = "any")
114 |   genesBins_highcov <- unique(sort(genesBins[subjectHits(highcov_overlap)]))
115 |   overlap_genesBins_highcov_peaks <-  findOverlaps(query = genesBins_highcov, subject = peaks_granges, minoverlap = 1, type = "any")
116 |   peaks_highcov_granges <- genesBins_highcov[sort(unique(queryHits(overlap_genesBins_highcov_peaks)))]
117 |   
118 |   ###### Find high coverage bins with RRACH motifs
119 |   highcov_RRACH_overlap <- findOverlaps(query = RRACH_granges, subject = genesBins_highcov, minoverlap = min(5, w), type = "any")
120 |   genesBins_highcov_RRACH <- unique(sort(genesBins_highcov[subjectHits(highcov_RRACH_overlap)]))
121 |   genesBins_highcov_RRACH_peaks_overlap <-  findOverlaps(query = genesBins_highcov_RRACH, subject = peaks_granges, minoverlap = 1, type = "any")
122 |   peaks_highcov_RRACH_granges <- genesBins_highcov_RRACH[sort(unique(queryHits(genesBins_highcov_RRACH_peaks_overlap)))]
123 | }
124 | 
125 | Run_statistical_analysis <- function(genesBins_par, peaks_par, files_par, notes = "", w) {
126 |   tools <- basename(files_par)
127 |   tools[grep(pattern = "dena", x = tools)] <- "DENA"
128 |   tools[grep(pattern = "drummer", x = tools)] <- "DRUMMER"
129 |   tools[grep(pattern = "differr", x = tools)] <- "DiffErr"
130 |   tools[grep(pattern = "eligos", x = tools)] <- "ELIGOS"
131 |   tools[grep(pattern = "epinanoErr", x = tools)] <- "EpiNano-Error"
132 |   tools[grep(pattern = "epinanoSvm", x = tools)] <- "EpiNano-SVM"
133 |   tools[grep(pattern = "m6anet", x = tools)] <- "m6Anet"
134 |   tools[grep(pattern = "mines", x = tools)] <- "MINES"
135 |   tools[grep(pattern = "nanocompore", x = tools)] <- "Nanocompore"
136 |   tools[grep(pattern = "nanom6a", x = tools)] <- gsub(x = gsub(pattern = "nanom6a_output\\.bed", replacement = "Nanom6A", x = tools[grep(pattern = "nanom6a", x = tools)]), pattern = "\\.tsv\\.bed", replacement = "")
137 |   tools[grep(pattern = "tomboComparison", x = tools)] <- "Tombo"
138 |   tools[grep(pattern = "xpore", x = tools)] <- "xPore"
139 |   tools[grep(pattern = "yanocomp", x = tools)] <- "Yanocomp"
140 |   tools[grep(pattern = "nanodoc", x = tools)] <- "NanoDoc"
141 |   # Build matrix of zeros
142 |   hitsMatrix <- matrix(c(0), nrow = length(genesBins_par), ncol = length(files_par) + 1)
143 |   colnames(hitsMatrix) <- c("Reference_set", tools)
144 |   
145 |   row.names(hitsMatrix) <- c(1:nrow(hitsMatrix))
146 |   
147 |   peaks_overlap <- findOverlaps(query = peaks_par, subject = genesBins_par, minoverlap = 1, type = "any")
148 |   hitsMatrix[unique(subjectHits(peaks_overlap)), "Reference_set"] <- 1
149 |   overlapMatrix <- hitsMatrix
150 |   ind_nodef <- setdiff(grep(x = colnames(overlapMatrix), pattern = "Nanom6A"), grep(x = colnames(overlapMatrix), pattern = "Nanom6A_ratio\\.0\\.5"))
151 |   if (length(ind_nodef) > 0) {
152 |     overlapMatrix <- overlapMatrix[, -ind_nodef]
153 |   }
154 |   
155 |   matrix_nanom6A <- hitsMatrix[,"Reference_set"]
156 |   names_nanom6A <- c()
157 |   
158 |   Performances <- list()
159 |   listPRcurves <- list()
160 |   
161 |   for (y in files) {
162 |     ind <- which(files == y)
163 |     x <- tools[ind]
164 |     
165 |     cat(sprintf("Processing file: %s\n", y))
166 |     recall <- c()
167 |     precision <- c()
168 |     # Extraction of bed file + conversion to granges
169 |     bed_file <- read.table(y, header = T, sep = "\t")
170 |     granges <- makeGRangesFromDataFrame(bed_file, keep.extra.columns = T)
171 |     # Overlap between m6A detected site of each tool and genome binned
172 |     overlap <- as.matrix(findOverlaps(query = granges, subject = genesBins_par, minoverlap = 1, type = "any"))
173 |     if (grepl(x, pattern = paste0(c("DENA","DRUMMER","DiffErr","Yanocomp","Nanocompore","ELIGOS","EpiNano-Error",
174 |                                     "EpiNano-SVM","xPore","NanoDoc","Tombo", "m6Anet"), collapse = "|"))) {
175 |       # Add column of filtering parameter
176 |       filtering_parameter <- bed_file[overlap[,"queryHits"] , 6]
177 |       overlap_w_parameter <- cbind(overlap, filtering_parameter)
178 |       default <- unname(threshold_default[grep(x, pattern = paste0(c("DENA","DRUMMER","DiffErr","Yanocomp","Nanocompore","ELIGOS","EpiNano-Error","EpiNano-SVM",
179 |                                                                      "xPore","NanoDoc","Tombo", "m6Anet"), collapse = "|"), value = T)])
180 |       # Recognize from the name of the tool if we need to keep the maximum or minimum value (when there are more hits in a single bin)
181 |       if(grepl(x, pattern = listmax)) { 
182 |         score <- sapply(split(overlap_w_parameter[,3], overlap_w_parameter[,2]), max)
183 |         default_thr <- default
184 |         hitsMatrix[, x] <- rep(0, length(genesBins_par))
185 |         overlapMatrix[, x] <- rep(0, length(genesBins_par))
186 |       } else {
187 |         score <- -1*sapply(split(overlap_w_parameter[,3], overlap_w_parameter[,2]), min)
188 |         default_thr <- - default
189 |         hitsMatrix[, x] <- rep(-1, length(genesBins_par))
190 |         overlapMatrix[, x] <- rep(0, length(genesBins_par))
191 |       }
192 |       
193 |       #assign score to hitsMatrix
194 |       hitsMatrix[names(score), x] <- score
195 |       
196 |       #assign 0 or 1 value to overlapMatrix for undetected/detected peaks at default values
197 |       pred_pos_def <- which(hitsMatrix[, x] >= default_thr)
198 |       pred_neg_def <- which(hitsMatrix[, x] < default_thr)
199 |       overlapMatrix[pred_pos_def, x] <- 1
200 |       overlapMatrix[pred_neg_def, x] <- 0
201 |       
202 |       positive <- hitsMatrix[which(hitsMatrix[,"Reference_set"] == 1), x]
203 |       negative <- hitsMatrix[which(hitsMatrix[,"Reference_set"] == 0), x]
204 |       #par(mfrow = c(2, 1))
205 |       #pdf(file = paste0(x,"_scores_distribution.pdf"))
206 |       #hist(positive, main = paste0(x, " - Scores for positive peaks"))
207 |       #hist(negative, main = paste0(x, " - Scores for negative peaks"))
208 |       #dev.off()
209 |       if (length(negative) > 0) {
210 |         pr <- pr.curve(scores.class0 = unname(positive), scores.class1 = unname(negative), curve=T, rand.compute=TRUE)
211 |         save(pr, file = paste0(resultsFolder, "/", x,"_PRcurve", notes, "_window_", w, "bp.Rdata"))
212 |         pdf(file = paste0(resultsFolder, "/", x,"_PRcurve", notes, "_window_", w, "bp.pdf"), width = 8, height = 8)
213 |         plot(pr, main = paste0(x, " Precision-Recall curve"), rand.plot=TRUE)
214 |         dev.off()
215 |         listPRcurves[[x]] <- pr
216 |       } else {
217 |         cat(sprintf("All genome bins include peaks, skipping PR curve plotting for file %s\n", x))
218 |       }
219 |       # Plot "manual" PR curve
220 |       thresholds <- c(seq(from = min(score), to = max(score), length.out = 100), default_thr)
221 |       recall <- seq(from = 0, to = 0, length.out = 101)
222 |       precision <- seq(from = 0, to = 0, length.out = 101)
223 |       F1_score <- seq(from = 0, to = 0, length.out = 101)
224 |       for (t in 1:length(thresholds)) {
225 |         thr <- thresholds[t]
226 |         TP <- length(which(positive >= thr))
227 |         FN <- length(which(positive < thr))
228 |         FP <- length(which(negative >= thr))
229 |         TN <- length(which(negative < thr))
230 |         recall[t] <- TP/(TP + FN)
231 |         precision[t] <- TP/(TP + FP)
232 |         F1_score[t] <- 2*recall[t]*precision[t]/(recall[t] + precision[t])
233 |       }
234 |       Performances[[x]] <- data.frame(tool = x, threshold = thresholds, recall = recall, precision = precision, F1_score = F1_score)      
235 |       #pdf(paste0(x, "_PRcurve_manual.pdf"))
236 |       #plot(recall, precision, main = paste0(x, " - PR manual"), type = "l", xlim = c(0, 1), ylim = c(0, 1))
237 |       #dev.off()
238 |     } 
239 |     else if (grepl(x, pattern = "MINES")) {
240 |       hitsMatrix[overlap[, 2], x] <- 1
241 |       overlapMatrix[overlap[, 2], x] <- 1
242 |       TP <- 0
243 |       for (y in 1:nrow(hitsMatrix)){
244 |         if ((hitsMatrix[y, "Reference_set"] == 1) && (hitsMatrix[y, x] == 1)){
245 |           TP <- TP + 1
246 |         }
247 |       }
248 |       # Recall + Precision
249 |       totPositiveGS <- length(which(hitsMatrix[, "Reference_set"] == 1))
250 |       totPositiveTool <- length(which(hitsMatrix[, x] == 1))
251 |       recall <- TP/totPositiveGS
252 |       precision <- TP/totPositiveTool
253 |       # F1 score
254 |       F1_score <- 2*(precision*recall)/(precision+recall)
255 |       Performances[[x]]	<- data.frame(tool = x, threshold = thresholds, recall = recall, precision = precision, F1_score = F1_score)
256 |     } 
257 |     else if (grepl(x, pattern = "Nanom6A")) {
258 |       hitsMatrix[overlap[, 2], x] <- 1
259 |       matrix_nanom6A <- cbind(matrix_nanom6A, hitsMatrix[, x])
260 |       names_nanom6A <- c(names_nanom6A, x)
261 |       if (grepl(x, pattern = "0\\.5")){
262 |         overlapMatrix[overlap[, 2], x] <- 1
263 |         TP <- 0
264 |         for (y in 1:nrow(hitsMatrix)){
265 |           if ((hitsMatrix[y, "Reference_set"] == 1) && (hitsMatrix[y, x] == 1)){
266 |             TP <- TP + 1
267 |           }
268 |         }
269 |         # Recall + Precision
270 |         totPositiveGS <- length(which(hitsMatrix[, "Reference_set"] == 1))
271 |         totPositiveTool <- length(which(hitsMatrix[, x] == 1))
272 |         recall <- TP/totPositiveGS
273 |         precision <- TP/totPositiveTool
274 |         # F1 score
275 |         F1_score <- 2*(precision*recall)/(precision+recall)
276 |         Performances[[x]] <- data.frame(tool = x, threshold = "0.5", recall = recall, precision = precision, F1_score = F1_score)
277 |       }
278 |     }
279 |   }
280 |   
281 |   if (length(grep(x = tools, pattern = "Nanom6A")) > 0) {
282 |     ### Code for PR curve nanom6A which has multiple files each run with a different threshold
283 |     names_nanom6A <- str_extract(names_nanom6A, "[0-9]\\.[0-9]*")
284 |     colnames(matrix_nanom6A) <- c("RS", names_nanom6A)
285 |     
286 |     max_thr <- c()
287 |     short <- matrix_nanom6A[,2:ncol(matrix_nanom6A)]
288 |     if (length(which(short == 0)) > 0) {
289 |       max_thr <- vector(length = nrow(short), mode = "numeric")
290 |       names(max_thr) <- rownames(short)
291 |       
292 |       short_nozero <- short[names(which(apply(short, 1, function(x) any(as.logical(x)) != 0 ))), ]
293 |       tmp <- apply(short_nozero, 1, function(x) {names(which(x == 1))[length(which(x == 1))]})
294 |       max_thr[names(tmp)] <- as.numeric(tmp)
295 |       max_thr <- unname(max_thr)
296 |       
297 |       new_matrix <- cbind(matrix_nanom6A, max_thr)
298 |       posit <- new_matrix[which(new_matrix[, "RS"] == 1), "max_thr"]
299 |       negat <- new_matrix[which(new_matrix[, "RS"] == 0), "max_thr"]
300 |       
301 |       pr <- pr.curve(posit, negat, curve = T, rand.compute=TRUE)
302 |       save(pr, file = paste0(resultsFolder, "/Nanom6A_PRcurve", notes, "_window_", w, "bp.Rdata"))
303 |       pdf(file = paste0(resultsFolder, "/Nanom6A_PRcurve", notes, "_window_", w, "bp.pdf"), width = 8, height = 8)
304 |       plot(pr, main = "Nanom6A Precision-Recall curve", rand.plot=TRUE)
305 |       dev.off()
306 |       listPRcurves[["Nanom6A"]] <- pr      
307 |       #par(mfrow = c(2, 1))
308 |       #pdf(file = paste0(resultsFolder, "nanom6A_scores_distribution.pdf"))
309 |       #hist(posit, 100,main = "nanom6A - Scores for positive peaks")
310 |       #hist(negat, 100,main = "nanom6A - Scores for negative peaks")
311 |       #dev.off()
312 |     } else {
313 |       cat("All genome bins include peaks, skipping PR curve plotting for tool nanom6a\n")
314 |     }
315 |   }
316 |   
317 |   sink(paste0(resultsFolder, "/Performances", notes, "_window_", w, "bp.tsv"))
318 |   print(Performances)
319 |   sink()
320 |   
321 |   if (length(negative) > 0) {
322 |     ### Plot all the Precision-Recall curves together
323 |     col <- c(7,8,420,153,31,100,33,47,53,62,400,454,28,10)
324 |     
325 |     pdf(file = paste0(resultsFolder, "/Summary_PR_curves", notes, "_window_", w, "bp.pdf"), width = 8, height = 8)
326 |     for (x in 1:length(listPRcurves)) {
327 |       if (x == 1){
328 |         plot(listPRcurves[[x]], color = colors()[col[x]], main = "Summary PR curves", rand.plot=TRUE)
329 |       }
330 |       else{
331 |         plot(listPRcurves[[x]], add = T, color = colors()[col[x]], main = "Summary PR curves")
332 |       }
333 |     }
334 |     legend("bottomright", legend = names(listPRcurves), col = colors()[col[1:length(listPRcurves)]], lty=1:1, cex=0.8, bg = "lightblue" )
335 |     dev.off()
336 |   } else {
337 |     cat("All genome bins include peaks, skipping summary PR curve plotting\n")
338 |   }
339 |   
340 |   ind_nanom6A <- grep(pattern = "Nanom6A", x = colnames(overlapMatrix))
341 |   if (length(ind_nanom6A) > 0) {
342 |     colnames(overlapMatrix)[ind_nanom6A] <- gsub(x = colnames(overlapMatrix)[ind_nanom6A], pattern = "_ratio.*", replacement = "")
343 |   }
344 |   colnames(overlapMatrix) <- gsub(pattern = "_output.*", replacement = "", x = colnames(overlapMatrix))
345 |   
346 |   data_ovlp <- matrix(data = 0, nrow = dim(overlapMatrix)[2] - 1, ncol = dim(overlapMatrix)[2] - 1)
347 |   tools <- setdiff(colnames(overlapMatrix), "Reference_set")
348 | 
349 |   colnames(data_ovlp) <- tools
350 |   rownames(data_ovlp) <- tools
351 |   
352 |   for (i in tools) {
353 |     for (j in tools) {
354 |       data_ovlp[i, j] <- length(intersect(which(overlapMatrix[, i] == 1), which(overlapMatrix[, j] == 1)))/length(which(overlapMatrix[, i] == 1))
355 |     }
356 |   }
357 |   pdf(paste0(resultsFolder, "/Tools_overlap_default_par", notes, "_window_", w, "bp.pdf"))
358 |   pheatmap(data_ovlp, cluster_rows = FALSE, cluster_cols = FALSE, show_rownames = TRUE, show_colnames = TRUE, fontsize = 15, display_numbers = FALSE, color = colorRampPalette(c("white", "red"))(30))
359 |   dev.off()
360 |   return(list(hitsMatrix, overlapMatrix, Performances, data_ovlp))
361 | }
362 | 
363 | results <- Run_statistical_analysis(genesBins_par = genesBins, peaks_par = peaks_granges, files_par = files, notes = "", w = w)
364 | hitsMatrix <- results[[1]]
365 | overlapMatrix <- results[[2]]
366 | Performances <- results[[3]]
367 | data_ovlp <- results[[4]]
368 | save(results, file = paste0(resultsFolder, "/Results_window_", w, "bp.rda"))
369 | 
370 | results_RRACH <- Run_statistical_analysis(genesBins_par = genesBins_RRACH, peaks_par = peaks_RRACH_granges, files_par = files, notes = "_RRACH", w = w)
371 | hitsMatrix_RRACH <- results_RRACH[[1]]
372 | overlapMatrix <- results_RRACH[[2]]
373 | Performances_RRACH <- results_RRACH[[3]]
374 | data_ovlp_RRACH <- results_RRACH[[4]]
375 | save(results_RRACH, file = paste0(resultsFolder, "/Results_window_", w, "bp_RRACH.rda"))
376 | 
377 | if (!is.null(highcov_GRanges)) {
378 |   results_highcov <- Run_statistical_analysis(genesBins_par = genesBins_highcov, peaks_par = peaks_highcov_granges, files_par = files, notes = "_highcov", w = w)
379 |   hitsMatrix_highcov <- results_highcov[[1]]
380 |   overlapMatrix_highcov <- results_highcov[[2]]
381 |   Performances_highcov <- results_highcov[[3]]
382 |   data_ovlp_highcov <- results_highcov[[4]]
383 |   save(results_highcov, file = paste0(resultsFolder, "/Results_window_", w, "bp_highcov.rda"))
384 |   
385 |   results_highcov_RRACH <- Run_statistical_analysis(genesBins_par = genesBins_highcov_RRACH, peaks_par = peaks_highcov_RRACH_granges, files_par = files, notes = "_highcov_RRACH", w = w)
386 |   hitsMatrix_highcov_RRACH <- results_highcov_RRACH[[1]]
387 |   overlapMatrix_highcov_RRACH <- results_highcov_RRACH[[2]]
388 |   Performances_highcov_RRACH <- results_highcov_RRACH[[3]]
389 |   data_ovlp_highcov_RRACH <- results_highcov_RRACH[[4]]
390 |   save(results_highcov_RRACH, file = paste0(resultsFolder, "/Results_window_", w, "bp_highcov_RRACH.rda"))
391 | }
392 | 
393 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU General Public License is a free, copyleft license for
 11 | software and other kinds of works.
 12 | 
 13 |   The licenses for most software and other practical works are designed
 14 | to take away your freedom to share and change the works.  By contrast,
 15 | the GNU General Public License is intended to guarantee your freedom to
 16 | share and change all versions of a program--to make sure it remains free
 17 | software for all its users.  We, the Free Software Foundation, use the
 18 | GNU General Public License for most of our software; it applies also to
 19 | any other work released this way by its authors.  You can apply it to
 20 | your programs, too.
 21 | 
 22 |   When we speak of free software, we are referring to freedom, not
 23 | price.  Our General Public Licenses are designed to make sure that you
 24 | have the freedom to distribute copies of free software (and charge for
 25 | them if you wish), that you receive source code or can get it if you
 26 | want it, that you can change the software or use pieces of it in new
 27 | free programs, and that you know you can do these things.
 28 | 
 29 |   To protect your rights, we need to prevent others from denying you
 30 | these rights or asking you to surrender the rights.  Therefore, you have
 31 | certain responsibilities if you distribute copies of the software, or if
 32 | you modify it: responsibilities to respect the freedom of others.
 33 | 
 34 |   For example, if you distribute copies of such a program, whether
 35 | gratis or for a fee, you must pass on to the recipients the same
 36 | freedoms that you received.  You must make sure that they, too, receive
 37 | or can get the source code.  And you must show them these terms so they
 38 | know their rights.
 39 | 
 40 |   Developers that use the GNU GPL protect your rights with two steps:
 41 | (1) assert copyright on the software, and (2) offer you this License
 42 | giving you legal permission to copy, distribute and/or modify it.
 43 | 
 44 |   For the developers' and authors' protection, the GPL clearly explains
 45 | that there is no warranty for this free software.  For both users' and
 46 | authors' sake, the GPL requires that modified versions be marked as
 47 | changed, so that their problems will not be attributed erroneously to
 48 | authors of previous versions.
 49 | 
 50 |   Some devices are designed to deny users access to install or run
 51 | modified versions of the software inside them, although the manufacturer
 52 | can do so.  This is fundamentally incompatible with the aim of
 53 | protecting users' freedom to change the software.  The systematic
 54 | pattern of such abuse occurs in the area of products for individuals to
 55 | use, which is precisely where it is most unacceptable.  Therefore, we
 56 | have designed this version of the GPL to prohibit the practice for those
 57 | products.  If such problems arise substantially in other domains, we
 58 | stand ready to extend this provision to those domains in future versions
 59 | of the GPL, as needed to protect the freedom of users.
 60 | 
 61 |   Finally, every program is threatened constantly by software patents.
 62 | States should not allow patents to restrict development and use of
 63 | software on general-purpose computers, but in those that do, we wish to
 64 | avoid the special danger that patents applied to a free program could
 65 | make it effectively proprietary.  To prevent this, the GPL assures that
 66 | patents cannot be used to render the program non-free.
 67 | 
 68 |   The precise terms and conditions for copying, distribution and
 69 | modification follow.
 70 | 
 71 |                        TERMS AND CONDITIONS
 72 | 
 73 |   0. Definitions.
 74 | 
 75 |   "This License" refers to version 3 of the GNU General Public License.
 76 | 
 77 |   "Copyright" also means copyright-like laws that apply to other kinds of
 78 | works, such as semiconductor masks.
 79 | 
 80 |   "The Program" refers to any copyrightable work licensed under this
 81 | License.  Each licensee is addressed as "you".  "Licensees" and
 82 | "recipients" may be individuals or organizations.
 83 | 
 84 |   To "modify" a work means to copy from or adapt all or part of the work
 85 | in a fashion requiring copyright permission, other than the making of an
 86 | exact copy.  The resulting work is called a "modified version" of the
 87 | earlier work or a work "based on" the earlier work.
 88 | 
 89 |   A "covered work" means either the unmodified Program or a work based
 90 | on the Program.
 91 | 
 92 |   To "propagate" a work means to do anything with it that, without
 93 | permission, would make you directly or secondarily liable for
 94 | infringement under applicable copyright law, except executing it on a
 95 | computer or modifying a private copy.  Propagation includes copying,
 96 | distribution (with or without modification), making available to the
 97 | public, and in some countries other activities as well.
 98 | 
 99 |   To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies.  Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 | 
103 |   An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License.  If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 | 
112 |   1. Source Code.
113 | 
114 |   The "source code" for a work means the preferred form of the work
115 | for making modifications to it.  "Object code" means any non-source
116 | form of a work.
117 | 
118 |   A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 | 
123 |   The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form.  A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 | 
134 |   The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities.  However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work.  For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 | 
147 |   The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 | 
151 |   The Corresponding Source for a work in source code form is that
152 | same work.
153 | 
154 |   2. Basic Permissions.
155 | 
156 |   All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met.  This License explicitly affirms your unlimited
159 | permission to run the unmodified Program.  The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work.  This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 | 
164 |   You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force.  You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright.  Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 | 
175 |   Conveying under any other circumstances is permitted solely under
176 | the conditions stated below.  Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 | 
179 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 | 
181 |   No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 | 
187 |   When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 | 
195 |   4. Conveying Verbatim Copies.
196 | 
197 |   You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 | 
205 |   You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 | 
208 |   5. Conveying Modified Source Versions.
209 | 
210 |   You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 | 
214 |     a) The work must carry prominent notices stating that you modified
215 |     it, and giving a relevant date.
216 | 
217 |     b) The work must carry prominent notices stating that it is
218 |     released under this License and any conditions added under section
219 |     7.  This requirement modifies the requirement in section 4 to
220 |     "keep intact all notices".
221 | 
222 |     c) You must license the entire work, as a whole, under this
223 |     License to anyone who comes into possession of a copy.  This
224 |     License will therefore apply, along with any applicable section 7
225 |     additional terms, to the whole of the work, and all its parts,
226 |     regardless of how they are packaged.  This License gives no
227 |     permission to license the work in any other way, but it does not
228 |     invalidate such permission if you have separately received it.
229 | 
230 |     d) If the work has interactive user interfaces, each must display
231 |     Appropriate Legal Notices; however, if the Program has interactive
232 |     interfaces that do not display Appropriate Legal Notices, your
233 |     work need not make them do so.
234 | 
235 |   A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit.  Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 | 
245 |   6. Conveying Non-Source Forms.
246 | 
247 |   You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 | 
252 |     a) Convey the object code in, or embodied in, a physical product
253 |     (including a physical distribution medium), accompanied by the
254 |     Corresponding Source fixed on a durable physical medium
255 |     customarily used for software interchange.
256 | 
257 |     b) Convey the object code in, or embodied in, a physical product
258 |     (including a physical distribution medium), accompanied by a
259 |     written offer, valid for at least three years and valid for as
260 |     long as you offer spare parts or customer support for that product
261 |     model, to give anyone who possesses the object code either (1) a
262 |     copy of the Corresponding Source for all the software in the
263 |     product that is covered by this License, on a durable physical
264 |     medium customarily used for software interchange, for a price no
265 |     more than your reasonable cost of physically performing this
266 |     conveying of source, or (2) access to copy the
267 |     Corresponding Source from a network server at no charge.
268 | 
269 |     c) Convey individual copies of the object code with a copy of the
270 |     written offer to provide the Corresponding Source.  This
271 |     alternative is allowed only occasionally and noncommercially, and
272 |     only if you received the object code with such an offer, in accord
273 |     with subsection 6b.
274 | 
275 |     d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge.  You need not require recipients to copy the
279 |     Corresponding Source along with the object code.  If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source.  Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | 
288 |     e) Convey the object code using peer-to-peer transmission, provided
289 |     you inform other peers where the object code and Corresponding
290 |     Source of the work are being offered to the general public at no
291 |     charge under subsection 6d.
292 | 
293 |   A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 | 
297 |   A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling.  In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage.  For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product.  A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 | 
310 |   "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source.  The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 | 
318 |   If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information.  But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 |   The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed.  Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 | 
337 |   Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 | 
343 |   7. Additional Terms.
344 | 
345 |   "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law.  If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 | 
354 |   When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it.  (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.)  You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 | 
361 |   Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 | 
365 |     a) Disclaiming warranty or limiting liability differently from the
366 |     terms of sections 15 and 16 of this License; or
367 | 
368 |     b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | 
372 |     c) Prohibiting misrepresentation of the origin of that material, or
373 |     requiring that modified versions of such material be marked in
374 |     reasonable ways as different from the original version; or
375 | 
376 |     d) Limiting the use for publicity purposes of names of licensors or
377 |     authors of the material; or
378 | 
379 |     e) Declining to grant rights under trademark law for use of some
380 |     trade names, trademarks, or service marks; or
381 | 
382 |     f) Requiring indemnification of licensors and authors of that
383 |     material by anyone who conveys the material (or modified versions of
384 |     it) with contractual assumptions of liability to the recipient, for
385 |     any liability that these contractual assumptions directly impose on
386 |     those licensors and authors.
387 | 
388 |   All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10.  If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term.  If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 | 
398 |   If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 | 
403 |   Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 | 
407 |   8. Termination.
408 | 
409 |   You may not propagate or modify a covered work except as expressly
410 | provided under this License.  Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 | 
415 |   However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 | 
422 |   Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 | 
429 |   Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License.  If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 | 
435 |   9. Acceptance Not Required for Having Copies.
436 | 
437 |   You are not required to accept this License in order to receive or
438 | run a copy of the Program.  Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance.  However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work.  These actions infringe copyright if you do
443 | not accept this License.  Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 | 
446 |   10. Automatic Licensing of Downstream Recipients.
447 | 
448 |   Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License.  You are not responsible
451 | for enforcing compliance by third parties with this License.
452 | 
453 |   An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations.  If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 | 
463 |   You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License.  For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 | 
471 |   11. Patents.
472 | 
473 |   A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based.  The
475 | work thus licensed is called the contributor's "contributor version".
476 | 
477 |   A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version.  For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 | 
487 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 | 
492 |   In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement).  To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 | 
499 |   If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients.  "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 | 
513 |   If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 | 
521 |   A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License.  You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 | 
536 |   Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 | 
540 |   12. No Surrender of Others' Freedom.
541 | 
542 |   If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License.  If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all.  For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 | 
552 |   13. Use with the GNU Affero General Public License.
553 | 
554 |   Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work.  The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 | 
563 |   14. Revised Versions of this License.
564 | 
565 |   The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time.  Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 | 
570 |   Each version is given a distinguishing version number.  If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation.  If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 | 
579 |   If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 | 
584 |   Later license versions may give you additional or different
585 | permissions.  However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 | 
589 |   15. Disclaimer of Warranty.
590 | 
591 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 | 
600 |   16. Limitation of Liability.
601 | 
602 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 | 
612 |   17. Interpretation of Sections 15 and 16.
613 | 
614 |   If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 | 
621 |                      END OF TERMS AND CONDITIONS
622 | 
623 |             How to Apply These Terms to Your New Programs
624 | 
625 |   If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 | 
629 |   To do so, attach the following notices to the program.  It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 | 
634 |     <one line to give the program's name and a brief idea of what it does.>
635 |     Copyright (C) <year>  <name of author>
636 | 
637 |     This program is free software: you can redistribute it and/or modify
638 |     it under the terms of the GNU General Public License as published by
639 |     the Free Software Foundation, either version 3 of the License, or
640 |     (at your option) any later version.
641 | 
642 |     This program is distributed in the hope that it will be useful,
643 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
644 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
645 |     GNU General Public License for more details.
646 | 
647 |     You should have received a copy of the GNU General Public License
648 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
649 | 
650 | Also add information on how to contact you by electronic and paper mail.
651 | 
652 |   If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 | 
655 |     <program>  Copyright (C) <year>  <name of author>
656 |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 |     This is free software, and you are welcome to redistribute it
658 |     under certain conditions; type `show c' for details.
659 | 
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License.  Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 | 
664 |   You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | <https://www.gnu.org/licenses/>.
668 | 
669 |   The GNU General Public License does not permit incorporating your program
670 | into proprietary programs.  If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library.  If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License.  But first, please read
674 | <https://www.gnu.org/licenses/why-not-lgpl.html>.
675 | 


--------------------------------------------------------------------------------
/Scripts/postprocessing.R:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env Rscript
  2 | ### load input variables ###
  3 | args = commandArgs(trailingOnly=TRUE)
  4 | 
  5 | for(v in args)
  6 | {
  7 |   vTmp <- strsplit(v,"=")[[1]]
  8 |   assign(vTmp[[1]],vTmp[[2]])
  9 | }
 10 | 
 11 | library(IRanges)
 12 | library(ensembldb)
 13 | library(GenomicRanges)
 14 | library(stringr)
 15 | library(Biostrings)
 16 | library(parallel)
 17 | 
 18 | output_processing <- function(tool, path_folder, output_file, filtering_parameter, genome_gtf, genome_bed){
 19 |   if (!file.exists(path_folder)){
 20 |     message(paste0(tool,"'s output folder doesn't exist."))
 21 |   }
 22 |   else {
 23 |     message(paste0("Processing output file from tool: ", tool,"\n"))
 24 |     switch (tool,
 25 |             differr = {output_differr <- function(){if (file.exists(paste0(path_folder, "/", "differrOut.bed")) 
 26 |                                                         && file.info(paste0(path_folder, "/", "differrOut.bed"))$size != 0){
 27 |               data_differr <- read.table(paste0(path_folder, "/", "differrOut.bed"))
 28 |               differr <- data_differr[, c(1:3, 6)]
 29 |               differr$V2 <- differr$V2 + 1
 30 |               differr$V3 <- differr$V3
 31 |               differr <- cbind(differr, rep("Mod", nrow(differr)))
 32 |               differr <- cbind(differr, 10**(-data_differr$V5)) # Parameter of filtering is FDR not -log10(FDR)
 33 |               colnames(differr) <- c("Chr", "Start", "End", "Strand", "Status", "FDR")
 34 |               write.table(differr, file = output_file, quote = F, sep = "\t", row.names = F)
 35 |             }
 36 |               else {message(paste0(tool,"'s output files don't exist."))}
 37 |             }
 38 |             if (!file.exists(output_file)) {
 39 |               output_differr()
 40 |             }
 41 |             },
 42 |             dena = {output_dena <- function(){if (file.exists(paste0(path_folder, "/", "dena_label.tsv"))
 43 |                                                   && file.info(paste0(path_folder, "/", "dena_label.tsv"))$size != 0){
 44 |               data_dena <- read.table(paste0(path_folder, "/", "dena_label.tsv"))
 45 |               dena <- data_dena[, c(1,2,2,6,6)]
 46 |               dena$V2 <- dena$V2
 47 |               dena$V2.1 <- dena$V2.1
 48 |               dena$V6 <- ifelse(!is.nan(dena$V6) & !is.na(dena$V6) & dena$V6 > filtering_parameter, "Mod", "Unmod")
 49 |               dena <- dena[which(dena$V6 == "Mod"), ]
 50 |               # Creation Edb Database from genome GTF
 51 |               EnsDb <- suppressWarnings(suppressMessages(ensDbFromGtf(gtf = genome_gtf)))
 52 |               edb <- EnsDb(EnsDb)
 53 |               # Lift-over + output bed file
 54 |               test_dena <- IRanges(start = dena[,2], end = dena[,3], names = c(dena[,1]))
 55 |               
 56 |               num_rows_chunk <- 1
 57 |               mc.cores <- as.numeric(mccores)
 58 |               if (length(test_dena) < num_rows_chunk) {
 59 |                 test_dena_split <- list(test_dena)
 60 |               } else {
 61 |                 test_dena_split <- split(test_dena, rep(seq(from = 1, to = ceiling(length(test_dena)/num_rows_chunk)), each = num_rows_chunk)[1:length(test_dena)])
 62 |               }
 63 |               
 64 |               tmp1 <- vector(mode = "list", length = length(test_dena_split))
 65 |               names(tmp1) <- 1:length(test_dena_split)
 66 |               tmp <- vector(mode = "list", length = length(test_dena_split))
 67 |               names(tmp) <- 1:length(test_dena_split)
 68 |               ind_retry <- 1:length(test_dena_split)
 69 |               while(any(unlist(lapply(tmp, is.null)))) {
 70 |                 cat(sprintf("Starting new iteration for dena; %d sites missing\n", length(which(unlist(lapply(tmp, is.null))))))
 71 |                 tmp1 <- tmp1[ind_retry]
 72 |                 tmp1 <- mclapply(test_dena_split[ind_retry], function(x) {
 73 |                   tryCatch({
 74 |                     coordinate_dena_unlisted <- unlist(transcriptToGenome(x, edb))
 75 |                     return(coordinate_dena_unlisted)
 76 |                   }, warning = function(w) {
 77 |                     print("Warning")
 78 |                     return(NULL)
 79 |                   }, error = function(e) {
 80 |                     print("Error")
 81 |                     return(NULL)
 82 |                   }
 83 |                   )}, mc.cores = mc.cores)
 84 |                 ind_retry <- names(which(unlist(lapply(tmp1, function(x) is.null(x)))))
 85 |                 ind_ok <- names(which(unlist(lapply(tmp1, function(x) !is.null(x)))))
 86 |                 tmp[ind_ok] <- tmp1[ind_ok]
 87 |                 if (length(ind_retry) > 0) {
 88 |                   tmp1 <- tmp1[ind_retry]
 89 |                 }
 90 |               }
 91 |               
 92 |               coordinate_dena_unlisted <- unlist(as(tmp, "GRangesList"))
 93 |               df_dena <- as.data.frame(unname(coordinate_dena_unlisted[,c(0,2,4,5)]))[,c(1:3,5,6,7,8)]
 94 |               
 95 |               tmp_rownames <- paste0(df_dena[, 6], "_", df_dena[, 7], "_", df_dena[, 5])
 96 |               dup_names <- names(which(table(tmp_rownames) > 1))
 97 |               ind_dup <- which(tmp_rownames %in% dup_names)
 98 |               ind_dup_rm <- ind_dup[which(duplicated(df_dena[ind_dup, c(5,6,7)]))]
 99 |               if (length(ind_dup_rm) > 0) {
100 |                 df_dena <- df_dena[-ind_dup_rm, ]
101 |               }
102 |               rownames(df_dena) <- paste0(df_dena[, 6], "_", df_dena[, 7], "_", df_dena[, 5])
103 |               rownames(dena) <- paste0(dena[, 2], "_", dena[, 3], "_", dena[, 1])
104 |               
105 |               df_dena$Status <- dena[rownames(df_dena), 4]
106 |               df_dena$Mod.Ratio <- dena[rownames(df_dena), 5]
107 |               df_dena_final <- df_dena[,c(1,2,3,4,8,9)]
108 |               colnames(df_dena_final) <- c("Chr", "Start", "End", "Strand", "Status", "Mod.Ratio")
109 |               write.table(df_dena_final, file = output_file, quote = F, sep = "\t", row.names = F)
110 |             }
111 |               else {message(paste0(tool,"'s output files don't exist."))}
112 |             }
113 |             if (!file.exists(output_file)) {
114 |               output_dena()
115 |             }
116 |             },
117 |             drummer = {output_drummer <- function(){if (length(list.files(path = path_folder, pattern = "multiple_comp.txt", recursive = T)) != 0){
118 |               all_data_drummer <- list.files(path = path_folder, pattern = "multiple_comp.txt", recursive = T)
119 |               data_drummer <- data.frame()
120 |               for (file in all_data_drummer) {
121 |                 if (file.info(paste0(path_folder, "/", file))$size != 0){
122 |                   table <- read.table(paste0(path_folder, "/", file), header = TRUE)
123 |                   data_drummer <- rbind(data_drummer, table)
124 |                 }
125 |               }
126 |               drummer <- data_drummer[, c("transcript_id", "position", "position", "max.G_padj", "max.G_padj")]
127 |               #drummer <- data_drummer[, c(1,2,2,5,5)] #ok if -m is not set to TRUE in DRUMMER command
128 |               #drummer <- data_drummer[, c(1,2,2,7,7)]
129 |               drummer$position <- drummer$position
130 |               drummer$position.1 <- drummer$position.1
131 |               drummer$max.G_padj <- rep("Mod", length(drummer$max.G_padj))
132 |               drummer$strand <- rep("*", nrow(drummer))
133 |               drummer <- drummer[,c(1,2,3,6,4,5)]
134 |               colnames(drummer) <- c("Chr", "Start", "End", "Strand", "Status", "Padj")
135 |               write.table(drummer, file = output_file, quote = F, sep = "\t", row.names = F)
136 |             }
137 |               else {message(paste0(tool,"'s output files don't exist."))}
138 |             }
139 |             if (!file.exists(output_file)) {
140 |               output_drummer()
141 |             }
142 |             },
143 |             yanocomp = {output_yanocomp <- function(){if (file.exists(paste0(path_folder, "/", "yanocomp_output.bed"))
144 |                                                           && file.info(paste0(path_folder, "/", "yanocomp_output.bed"))$size != 0){
145 |               data_yanocomp <- read.table(paste0(path_folder, "/", "yanocomp_output.bed"))
146 |               yanocomp <- data_yanocomp[, c(1:3,6,9,9)]
147 |               yanocomp$V2 <- yanocomp$V2 + 2
148 |               yanocomp$V3 <- yanocomp$V2
149 |               yanocomp$V9 <- rep("Mod", length(yanocomp$V9))
150 |               colnames(yanocomp) <- c("Chr", "Start", "End", "Strand", "Status", "Score")
151 |               write.table(yanocomp, file = output_file, quote = F, sep = "\t", row.names = F)
152 |             }
153 |               else {message(paste0(tool,"'s output files don't exist."))}
154 |             }
155 |             if (!file.exists(output_file)) {
156 |               output_yanocomp()
157 |             }
158 |             },
159 |             nanocompore = {output_nanocompore <- function(){if (file.exists(paste0(path_folder, "/", "outnanocompore_results.tsv"))
160 |                                                                 && file.info(paste0(path_folder, "/", "outnanocompore_results.tsv"))$size != 0){
161 |               data_nanocompore <- read.table(paste0(path_folder, "/", "outnanocompore_results.tsv"), header = TRUE)
162 |               nanocompore <- data_nanocompore[, c(2,3,3,5,7,13,7)]
163 |               nanocompore$genomicPos <- nanocompore$genomicPos + 2
164 |               nanocompore$genomicPos.1 <- nanocompore$genomicPos.1 + 2
165 |               nanocompore$Logit_LOR <- ifelse(nanocompore$Logit_LOR == "NC", NA, nanocompore$Logit_LOR)
166 |               nanocompore$GMM_logit_pvalue <- ifelse(!is.nan(nanocompore$GMM_logit_pvalue) & !is.na(nanocompore$GMM_logit_pvalue) &
167 |                                                        !is.nan(nanocompore$Logit_LOR) & !is.na(nanocompore$Logit_LOR) & 
168 |                                                        nanocompore$GMM_logit_pvalue < filtering_parameter & 
169 |                                                        abs(as.numeric(nanocompore$Logit_LOR)) > 0.5, "Mod", "Unmod")
170 |               nanocompore <- nanocompore[which(nanocompore$GMM_logit_pvalue == "Mod"), ]
171 |               nanocompore <- nanocompore[,c(1,2,3,4,5,7)]
172 |               colnames(nanocompore) <- c("Chr", "Start", "End", "Strand", "Status", "Pvalue")
173 |               write.table(nanocompore, file = output_file, quote = F, sep = "\t", row.names = F)
174 |             }
175 |               else {message(paste0(tool,"'s output files don't exist."))}
176 |             }
177 |             if (!file.exists(output_file)) {
178 |               output_nanocompore()
179 |             }
180 |             },
181 |             eligos = {output_eligos <- function(){if (file.exists(paste0(path_folder, "/", "minimap.sortG.1_vs_minimap.sortG.2_on_genome_combine.txt"))
182 |                                                       && file.info(paste0(path_folder, "/", "minimap.sortG.1_vs_minimap.sortG.2_on_genome_combine.txt"))$size != 0){
183 |               data_eligos <- read.table(paste0(path_folder, "/", "minimap.sortG.1_vs_minimap.sortG.2_on_genome_combine.txt"), header = TRUE, fill = TRUE)
184 |               rows_eligos <- apply(data_eligos, 1, function(x){all(!is.na(x))})
185 |               eligos <- data_eligos[which(rows_eligos),]
186 |               eligos <- data_eligos[, c(1:4,18,16,18)]
187 |               eligos$start_loc <- eligos$start_loc + 1
188 |               eligos$end_loc <- eligos$end_loc
189 |               eligos$adjPval <- ifelse(!is.nan(eligos$adjPval) & !is.na(eligos$adjPval) & 
190 |                                          !is.nan(eligos$oddR) & !is.na(eligos$oddR) &
191 |                                          eligos$adjPval < filtering_parameter & eligos$oddR > 1.2, "Mod", "Unmod")
192 |               eligos <- eligos[which(eligos$adjPval == "Mod"), ]
193 |               eligos <- eligos[,c(1,2,3,4,5,7)]
194 |               colnames(eligos) <- c("Chr", "Start", "End", "Strand", "Status", "Padj")
195 |               write.table(eligos, file = output_file, quote = F, sep = "\t", row.names = F)
196 |             }
197 |               else {message(paste0(tool,"'s output files don't exist."))}
198 |             }
199 |             if (!file.exists(output_file)) {
200 |               output_eligos()
201 |             }
202 |             },
203 |             mines = {output_mines <- function(){if (file.exists(paste0(path_folder, "/", "m6A_output_filename.bed"))
204 |                                                     && file.info(paste0(path_folder, "/", "m6A_output_filename.bed"))$size != 0){
205 |               data_mines <- read.table(paste0(path_folder, "/", "m6A_output_filename.bed")) 
206 |               mines <- data_mines[, c(1:3,7,7)]
207 |               mines$V2 <- mines$V2 + 1
208 |               mines$V3 <- mines$V3
209 |               mines$V7 <- rep("Mod", length(mines$V7))
210 |               # Creation Edb Database from genome GTF
211 |               EnsDb <- suppressWarnings(suppressMessages(ensDbFromGtf(gtf = genome_gtf)))
212 |               edb <- EnsDb(EnsDb)
213 |               # Lift-over + output bed
214 |               test_mines <- IRanges(start = mines[,2], end = mines[,3], names = c(mines[,1]))
215 |               
216 |               num_rows_chunk <- 1
217 |               mc.cores <- as.numeric(mccores)
218 |               if (length(test_mines) < num_rows_chunk) {
219 |                 test_mines_split <- list(test_mines)
220 |               } else {
221 |                 test_mines_split <- split(test_mines, rep(seq(from = 1, to = ceiling(length(test_mines)/num_rows_chunk)), each = num_rows_chunk)[1:length(test_mines)])
222 |               }
223 |               
224 |               tmp1 <- vector(mode = "list", length = length(test_mines_split))
225 |               names(tmp1) <- 1:length(test_mines_split)
226 |               tmp <- vector(mode = "list", length = length(test_mines_split))
227 |               names(tmp) <- 1:length(test_mines_split)
228 |               ind_retry <- 1:length(test_mines_split)
229 |               while(any(unlist(lapply(tmp, is.null)))) {
230 |                 cat(sprintf("Starting new iteration for mines; %d sites missing\n", length(which(unlist(lapply(tmp, is.null))))))
231 |                 tmp1 <- tmp1[ind_retry]
232 |                 tmp1 <- mclapply(test_mines_split[ind_retry], function(x) {
233 |                   tryCatch({
234 |                     coordinate_mines_unlisted <- unlist(transcriptToGenome(x, edb))
235 |                     return(coordinate_mines_unlisted)
236 |                   }, warning = function(w) {
237 |                     print("Warning")
238 |                     return(NULL)
239 |                   }, error = function(e) {
240 |                     print("Error")
241 |                     return(NULL)
242 |                   }
243 |                   )}, mc.cores = mc.cores)
244 |                 ind_retry <- names(which(unlist(lapply(tmp1, function(x) is.null(x)))))
245 |                 ind_ok <- names(which(unlist(lapply(tmp1, function(x) !is.null(x)))))
246 |                 tmp[ind_ok] <- tmp1[ind_ok]
247 |                 if (length(ind_retry) > 0) {
248 |                   tmp1 <- tmp1[ind_retry]
249 |                 }
250 |               }
251 |               
252 |               coordinate_mines_unlisted <- unlist(as(tmp, "GRangesList"))
253 |               
254 |               df_mines <- as.data.frame(unname(coordinate_mines_unlisted[,c(0,2,4,5)]))[,c(1:3,5,6,7,8)]
255 |               
256 |               tmp_rownames <- paste0(df_mines[, 6], "_", df_mines[, 7], "_", df_mines[, 5])
257 |               dup_names <- names(which(table(tmp_rownames) > 1))
258 |               ind_dup <- which(tmp_rownames %in% dup_names)
259 |               ind_dup_rm <- ind_dup[which(duplicated(df_mines[ind_dup, c(5,6,7)]))]
260 |               if (length(ind_dup_rm) > 0) {
261 |                 df_mines <- df_mines[-ind_dup_rm, ]
262 |               }
263 |               rownames(df_mines) <- paste0(df_mines[, 6], "_", df_mines[, 7], "_", df_mines[, 5])
264 |               rownames(mines) <- paste0(mines[, 2], "_", mines[, 3], "_", mines[, 1])
265 |               df_mines$Status <- mines[rownames(df_mines), 4]
266 |               df_mines$Ratiomod<- mines[rownames(df_mines), 5]
267 |               df_mines_final <- df_mines[,c(1,2,3,4,8)]
268 |               colnames(df_mines_final) <- c("Chr", "Start", "End", "Strand", "Status")
269 |               write.table(df_mines_final, file = output_file, quote = F, sep = "\t", row.names = F)
270 |             }
271 |               else {message(paste0(tool,"'s output files don't exist."))}
272 |             }
273 |             if (!file.exists(output_file)) {
274 |               output_mines()
275 |             }
276 |             },
277 |             epinanoErr = {output_epinano_error <- function(){if (file.exists(paste0(path_folder, "/plus/", "diffErr.delta-sum_err.prediction.csv")) 
278 |                                                                  || file.exists(paste0(path_folder, "/minus/", "diffErr.delta-sum_err.prediction.csv"))){
279 |               if (file.exists(paste0(path_folder, "/plus/", "diffErr.delta-sum_err.prediction.csv")) 
280 |                   && file.exists(paste0(path_folder, "/minus/", "diffErr.delta-sum_err.prediction.csv"))){
281 |                 data_epinanoerr_plus <- read.table(paste0(path_folder, "/plus/", "diffErr.delta-sum_err.prediction.csv"), header = TRUE, sep = ",")
282 |                 data_epinanoerr_minus <- read.table(paste0(path_folder, "/minus/", "diffErr.delta-sum_err.prediction.csv"), header = TRUE, sep = ",")
283 |                 data_epinanoerr <- rbind(data_epinanoerr_plus, data_epinanoerr_minus)
284 |               }
285 |               else if (file.exists(paste0(path_folder, "/plus/", "diffErr.delta-sum_err.prediction.csv"))){
286 |                 data_epinanoerr <- read.table(paste0(path_folder, "/plus/", "diffErr.delta-sum_err.prediction.csv"), header = TRUE, sep = ",")
287 |               }
288 |               else {data_epinanoerr <- read.table(paste0(path_folder, "/minus/", "diffErr.delta-sum_err.prediction.csv"), header = TRUE, sep = ",")}
289 |               
290 |               epinanoerr <- data.frame("Chr" = sapply(data_epinanoerr$chr_pos, function(x){return(strsplit(x, split = " ")[[1]][1])}),
291 |                                        "Start" = sapply(data_epinanoerr$chr_pos, function(x){return(as.numeric(strsplit(x, split = " ")[[1]][2]))}),
292 |                                        "End" = sapply(data_epinanoerr$chr_pos, function(x){return(as.numeric(strsplit(x, split = " ")[[1]][2]))}),
293 |                                        "Strand" = sapply(data_epinanoerr$chr_pos, function(x){return(strsplit(x, split = " ")[[1]][4])}),
294 |                                        "Status" = data_epinanoerr$z_score_prediction,
295 |                                        "Delta sum err" = data_epinanoerr$delta_sum_err
296 |               )
297 |               epinanoerr <- epinanoerr[which(epinanoerr$Status == "mod"), ]
298 |               write.table(epinanoerr, file = output_file, quote = F, sep = "\t", row.names = F)
299 |             }
300 |               else {message(paste0(tool,"'s output files don't exist."))}
301 |             }
302 |             if (!file.exists(output_file)) {
303 |               output_epinano_error()
304 |             }
305 |             },
306 |             epinanoSvm = {output_epinano_svm <- function(){if (file.exists(paste0(path_folder, "/", "plus_mod_prediction.q3.mis3.del3.MODEL.rrach.q3.mis3.del3.linear.dump.csv")) 
307 |                                                                || file.exists(paste0(path_folder, "/", "minus_mod_prediction.q3.mis3.del3.MODEL.rrach.q3.mis3.del3.linear.dump.csv"))){
308 |               if (file.exists(paste0(path_folder, "/", "plus_mod_prediction.q3.mis3.del3.MODEL.rrach.q3.mis3.del3.linear.dump.csv")) 
309 |                   && file.exists(paste0(path_folder, "/", "minus_mod_prediction.q3.mis3.del3.MODEL.rrach.q3.mis3.del3.linear.dump.csv"))){
310 |                 data_epinanosvm_plus <- read.table(paste0(path_folder, "/", "plus_mod_prediction.q3.mis3.del3.MODEL.rrach.q3.mis3.del3.linear.dump.csv"), sep=",") 
311 |                 data_epinanosvm_minus <- read.table(paste0(path_folder, "/", "minus_mod_prediction.q3.mis3.del3.MODEL.rrach.q3.mis3.del3.linear.dump.csv"), sep=",")
312 |                 data_epinanosvm <- rbind(data_epinanosvm_plus, data_epinanosvm_minus)
313 |               }
314 |               else if (file.exists(paste0(path_folder, "/", "plus_mod_prediction.q3.mis3.del3.MODEL.rrach.q3.mis3.del3.linear.dump.csv"))){
315 |                 data_epinanosvm <- read.table(paste0(path_folder, "/", "plus_mod_prediction.q3.mis3.del3.MODEL.rrach.q3.mis3.del3.linear.dump.csv"), sep = ",")
316 |               }
317 |               else {data_epinanosvm <- read.table(paste0(path_folder, "/", "minus_mod_prediction.q3.mis3.del3.MODEL.rrach.q3.mis3.del3.linear.dump.csv"), sep = ",")}
318 |               
319 |               epinanosvm <- data.frame("Kmer" = data_epinanosvm$V1,
320 |                                        "Chr" = data_epinanosvm$V3,
321 |                                        "Start" = sapply(data_epinanosvm$V2, function(x){return(strsplit(x, split = "\\-")[[1]][1])}),
322 |                                        "End" = sapply(data_epinanosvm$V2, function(x){return(strsplit(x, split = "\\-")[[1]][1])}),
323 |                                        "Strand" = data_epinanosvm$V4,
324 |                                        "Status" = ifelse(data_epinanosvm$V28 > filtering_parameter, "Mod", "Unmod"), 
325 |                                        "ProbM" = data_epinanosvm$V28
326 |               )
327 |               epinanosvm$Start <- as.numeric(epinanosvm$Start) + 2
328 |               epinanosvm$End <- as.numeric(epinanosvm$End) + 2
329 |               epinanosvm <- epinanosvm[which(epinanosvm$Kmer %in% rrach), ]
330 |               epinanosvm <- epinanosvm[which(epinanosvm$Status == "Mod"), ]
331 |               epinanosvm <- epinanosvm[, 2:7]
332 |               write.table(epinanosvm, file = output_file, quote = F, sep = "\t", row.names = F)
333 |             }
334 |               else {message(paste0(tool,"'s output files don't exist."))}
335 |             }
336 |             if (!file.exists(output_file)) {
337 |               output_epinano_svm()
338 |             }
339 |             },
340 |             xpore = {output_xpore <- function(){if (file.exists(paste0(path_folder, "/", "diffmod.table"))
341 |                                                     && file.info(paste0(path_folder, "/", "diffmod.table"))$size != 0){
342 |               data_xpore <- read.table(paste0(path_folder, "/", "diffmod.table"), header = TRUE, sep=",") 
343 |               xpore <- data.frame("GeneID" = data_xpore[,1],
344 |                                   "Start" = data_xpore[,2] + 2,
345 |                                   "End" = data_xpore[,2] + 2,
346 |                                   "Status" = p.adjust(data_xpore[,5], method = "BH"), # Adviced to use FDR instead of pvalue
347 |                                   "FDR" = p.adjust(data_xpore[,5], method = "BH")
348 |               )
349 |               xpore$Status <- ifelse(!is.nan(xpore$Status) & !is.na(xpore$Status) & xpore$Status < filtering_parameter, "Mod", "Unmod")
350 |               xpore <- xpore[which(xpore$Status == "Mod"), ]
351 |               # Add strand and chromosome comparing gene ID with genome bed file
352 |               genome <- read.table(genome_bed, header = FALSE, sep="\t")
353 |               rownames(genome) <- genome[,4]
354 |               xpore$Strand <- genome[xpore$GeneID, "V6"]
355 |               xpore$Chr <- genome[xpore$GeneID, "V1"]
356 |               xpore <- xpore[,c(7,2,3,6,4,5)]
357 |               write.table(xpore, file = output_file, quote = F, sep = "\t", row.names = F)
358 |             }
359 |               else {message(paste0(tool,"'s output files don't exist."))}
360 |             }
361 |             if (!file.exists(output_file)) {
362 |               output_xpore()
363 |             }
364 |             },
365 |             nanodoc = {output_nanodoc <- function(){if (length(list.files(path = path_folder, pattern = "*.txt")) != 0){
366 |               txt_files_ls <- list.files(path = path_folder, pattern="*.txt")
367 |               txt_files_df <- lapply(txt_files_ls, function(x) {if (file.info(paste0(path_folder, "/", x))$size != 0){
368 |                 table <- read.table(file = paste0(path_folder, "/", x), sep = "\t")
369 |                 table <- cbind(table, x) 
370 |                 table
371 |               } 
372 |               })
373 |               if (length(txt_files_df) != 0){
374 |                 data_nanodoc <- do.call("rbind", lapply(txt_files_df, as.data.frame))
375 |                 nanodoc <- data_nanodoc[,c(1,1,12,12,13)] 
376 |                 nanodoc$V1 <- nanodoc$V1
377 |                 nanodoc$V1.1 <- nanodoc$V1.1
378 |                 nanodoc$V12 <- ifelse(!is.nan(nanodoc$V12) & !is.na(nanodoc$V12) & nanodoc$V12 > filtering_parameter, "Mod", "Unmod")
379 |                 nanodoc <- nanodoc[which(nanodoc$V12 == "Mod"), ]
380 |                 nanodoc$x <- str_extract(nanodoc$x, "(chr)[0-9]+|[IVX]{1,3}")
381 |                 nanodoc$strand <- rep("*", nrow(nanodoc))
382 |                 nanodoc <- nanodoc[,c(5,1,2,6,3,4)]
383 |                 colnames(nanodoc) <- c("Chr", "Start", "End", "Strand", "Status", "Score")
384 |                 if (nrow(nanodoc) > 0) {
385 |                   write.table(nanodoc, file = output_file, quote = F, sep = "\t", row.names = F)
386 |                 }
387 |               }
388 |               else {message(paste0(tool,"'s output files don't exist."))}
389 |             }
390 |               else {message(paste0(tool,"'s output files don't exist."))}
391 |             }
392 |             if (!file.exists(output_file)) {
393 |               output_nanodoc()
394 |             }
395 |             },
396 |             nanom6a = {output_nanom6a <- function(){if (length(list.files(path = path_folder, pattern = "ratio.*.tsv")) != 0){
397 |               files <- list.files(path = path_folder, pattern = "ratio.*.tsv")
398 |               for (file in files){
399 |                 if (file.info(paste0(path_folder, "/", file))$size != 0){
400 |                   data_nanom6a <- read.table(paste0(path_folder, "/", file), sep="\t", fill = T, col.names = 1:100)
401 |                   nanom6a <- data.frame()
402 |                   for (row in 1:nrow(data_nanom6a)) {
403 |                     for (col in 1:length(data_nanom6a[row, ])) {
404 |                       if (col == 1){
405 |                         chr <- strsplit(data_nanom6a[row, col], split = "\\|")[[1]][2]
406 |                         geneID <- strsplit(data_nanom6a[row, col], split = "\\|")[[1]][1]
407 |                       }
408 |                       else {
409 |                         if(data_nanom6a[row, col] != "" && !is.na(data_nanom6a[row, col])){
410 |                           start <- as.numeric(strsplit(data_nanom6a[row, col], split = "\\|")[[1]][1])
411 |                           end <- as.numeric(strsplit(data_nanom6a[row, col], split = "\\|")[[1]][1])
412 |                           mod_ratio <- strsplit(data_nanom6a[row, col], split = "\\|")[[1]][4]
413 |                           x <- c(chr, geneID, start, end, mod_ratio, mod_ratio)
414 |                           nanom6a <- rbind(nanom6a, x)
415 |                         }
416 |                       }
417 |                     }
418 |                   }
419 |                   # Add strand comparing gene ID with genome bed file
420 |                   genome <- read.table(genome_bed, header = FALSE, sep="\t")
421 |                   rownames(genome) <- genome[,4]
422 |                   nanom6a$Strand <- genome[nanom6a[,2], "V6"]
423 |                   nanom6a <- nanom6a[,c(1,3,4,7)]
424 |                   colnames(nanom6a) <- c("Chr", "Start", "End", "Strand")
425 |                   nanom6a$Status <- rep("Mod", nrow(nanom6a))
426 |                   write.table(nanom6a, file = paste0(output_file, "_", file, ".bed"), quote = F, sep = "\t", row.names = F)
427 |                 }
428 |                 else {message(paste0(tool,"'s output files is empty."))}
429 |               }
430 |             }
431 |               else {message(paste0(tool,"'s output files don't exist."))}
432 |             }
433 |             if (!file.exists(output_file)) {
434 |               output_nanom6a()
435 |             }
436 |             },
437 |             tomboComparison = {output_tomboComparison <- function(){if (file.exists(paste0(path_folder, "/", "sample.level_samp_comp_detect.statistic.plus.wig"))
438 |                                                                         && file.info(paste0(path_folder, "/", "sample.level_samp_comp_detect.statistic.plus.wig"))$size != 0){
439 |               data_tombo <- read.table(paste0(path_folder, "/", "sample.level_samp_comp_detect.statistic.plus.wig"), fill = T, header = T)
440 |               #tombo <- data.frame()
441 |               tombo <- matrix(data = NA, nrow = dim(data_tombo)[1], ncol = 5)
442 |               counter <- 1
443 |               for (row in 1:nrow(data_tombo)) {
444 |                 for (col in 1:2) {
445 |                   if (is.na(as.numeric(data_tombo[row,col]))){
446 |                     if (col == 2){
447 |                       transcriptID <- str_extract(data_tombo[row,col], "[A-Z0-9]{6,}(-[A-Z])?(-mRNA)?")
448 |                     }
449 |                   }
450 |                   else if (col == 1){
451 |                     start <- as.numeric(data_tombo[row, col]) 
452 |                     end <- as.numeric(data_tombo[row, col])
453 |                   }
454 |                   else if (col == 2){
455 |                     pvalue <- data_tombo[row, col]
456 |                     x <- c(transcriptID, start, end, pvalue, pvalue)
457 |                     #tombo <- rbind(tombo, x)
458 |                     tombo[counter, ] <- x
459 |                     counter <- counter + 1 
460 |                   }
461 |                 }
462 |               }
463 |               tombo <- as.data.frame(tombo)
464 |               tombo[,4] <- ifelse(!is.nan(tombo[,4]) & !is.na(tombo[,4]) & 10**(-as.numeric(tombo[,4])) < filtering_parameter, "Mod", "Unmod")
465 |               tombo <- tombo[which(tombo[,4] == "Mod"), ]
466 |               # Creation Edb Database from genome GTF
467 |               EnsDb <- suppressWarnings(suppressMessages(ensDbFromGtf(gtf = genome_gtf)))
468 |               edb <- EnsDb(EnsDb)
469 |               # Lift-over + Creation of bed file
470 |               test_tombo <- IRanges(start = as.numeric(tombo[,2]), end = as.numeric(tombo[,3]), names = c(tombo[,1]))
471 |               
472 |               num_rows_chunk <- 1
473 |               mc.cores <- as.numeric(mccores)
474 |               if (length(test_tombo) < num_rows_chunk) {
475 |                 test_tombo_split <- list(test_tombo)
476 |               } else {
477 |                 test_tombo_split <- split(test_tombo, rep(seq(from = 1, to = ceiling(length(test_tombo)/num_rows_chunk)), each = num_rows_chunk)[1:length(test_tombo)])
478 |               }
479 |               
480 |               tmp1 <- vector(mode = "list", length = length(test_tombo_split))
481 |               names(tmp1) <- 1:length(test_tombo_split)
482 |               tmp <- vector(mode = "list", length = length(test_tombo_split))
483 |               names(tmp) <- 1:length(test_tombo_split)
484 |               ind_retry <- 1:length(test_tombo_split)
485 |               while(any(unlist(lapply(tmp, is.null)))) {
486 |                 cat(sprintf("Starting new iteration for Tombo; %d sites missing\n", length(which(unlist(lapply(tmp, is.null))))))
487 |                 tmp1 <- tmp1[ind_retry]
488 |                 tmp1 <- mclapply(test_tombo_split[ind_retry], function(x) {
489 |                   tryCatch({
490 |                     coordinate_tombo_unlisted <- unlist(transcriptToGenome(x, edb))
491 |                     return(coordinate_tombo_unlisted)
492 |                   }, warning = function(w) {
493 |                     print("Warning")
494 |                     return(NULL)
495 |                   }, error = function(e) {
496 |                     print("Error")
497 |                     return(NULL)
498 |                   }
499 |                   )}, mc.cores = mc.cores)
500 |                 
501 |                 ind_retry <- names(which(unlist(lapply(tmp1, function(x) is.null(x)))))
502 |                 ind_ok <- names(which(unlist(lapply(tmp1, function(x) !is.null(x)))))
503 |                 tmp[ind_ok] <- tmp1[ind_ok]
504 |                 if (length(ind_retry) > 0) {
505 |                   tmp1 <- tmp1[ind_retry]
506 |                 }
507 |               }
508 |               coordinate_tombo_unlisted <- unlist(as(tmp, "GRangesList"))
509 |               df_tombo <- as.data.frame(unname(coordinate_tombo_unlisted[,c(0,2,4,5)]))
510 |               df_tombo <- df_tombo[,c(1:3,5,6,7,8)]
511 |               names_df_tombo <- paste0(df_tombo[, 6], "_", df_tombo[, 7], "_", df_tombo[, 5])
512 |               rownames(tombo) <- paste0(tombo[, 2], "_", tombo[, 3], "_", tombo[, 1])
513 |               df_tombo$Status <- tombo[names_df_tombo, 4]
514 |               df_tombo$Pvalue <- 10**(-as.numeric(tombo[names_df_tombo, 5])) # Parameter of filtering is Pvalue not -log10(Pvalue)
515 |               df_tombo_final <- df_tombo[,c(1,2,3,4,8,9)]
516 |               colnames(df_tombo_final) <- c("Chr", "Start", "End", "Strand", "Status", "Pvalue")
517 |               df_tombo_final$Start <- df_tombo_final$Start
518 |               df_tombo_final$End <- df_tombo_final$End + 1
519 |               write.table(df_tombo_final, file = output_file, quote = F, sep = "\t", row.names = F)
520 |             }
521 |               else {message(paste0(tool,"'s output files don't exist."))}
522 |             }
523 |             if (!file.exists(output_file)) {
524 |               output_tomboComparison()
525 |             }
526 |             },
527 |             m6anet = {output_m6anet <- function(){if (file.exists(paste0(path_folder, "/", "data.result.csv"))
528 |                                                       && file.info(paste0(path_folder, "/", "data.result.csv"))$size != 0){
529 |               data_m6anet <- read.table(paste0(path_folder, "/", "data.result.csv"), header = TRUE, sep=",") 
530 |               m6anet <- data.frame("TranscriptID" = data_m6anet[,1],
531 |                                    "Start" = data_m6anet[,2] + 2,
532 |                                    "End" = data_m6anet[,2] + 2,
533 |                                    "Status" = data_m6anet[,4],
534 |                                    "Prob_mod" = data_m6anet[,4]
535 |               )
536 |               m6anet$Status <- ifelse(!is.nan(m6anet$Status) & !is.na(m6anet$Status) & m6anet$Status > filtering_parameter, "Mod", "Unmod")
537 |               m6anet <- m6anet[which(m6anet$Status == "Mod"), ]
538 |               # Creation Edb Database from genome GTF
539 |               EnsDb <- suppressWarnings(suppressMessages(ensDbFromGtf(gtf = genome_gtf)))
540 |               edb <- EnsDb(EnsDb)
541 |               # Lift-over + output bed
542 |               test_m6anet <- IRanges(start = m6anet[,2], end = m6anet[,3], names = c(m6anet[,1]))
543 |               num_rows_chunk <- 1
544 |               mc.cores <- as.numeric(mccores)
545 |               if (length(test_m6anet) < num_rows_chunk) {
546 |                 test_m6anet_split <- list(test_m6anet)
547 |               } else {
548 |                 test_m6anet_split <- split(test_m6anet, rep(seq(from = 1, to = ceiling(length(test_m6anet)/num_rows_chunk)), each = num_rows_chunk)[1:length(test_m6anet)])
549 |               }
550 |               
551 |               tmp1 <- vector(mode = "list", length = length(test_m6anet_split))
552 |               names(tmp1) <- 1:length(test_m6anet_split)
553 |               tmp <- vector(mode = "list", length = length(test_m6anet_split))
554 |               names(tmp) <- 1:length(test_m6anet_split)
555 |               ind_retry <- 1:length(test_m6anet_split)
556 |               while(any(unlist(lapply(tmp, is.null)))) {
557 |                 cat(sprintf("Starting new iteration for m6Anet; %d sites missing\n", length(which(unlist(lapply(tmp, is.null))))))
558 |                 tmp1 <- tmp1[ind_retry]
559 |                 tmp1 <- mclapply(test_m6anet_split[ind_retry], function(x) {
560 |                   tryCatch({
561 |                     coordinate_m6anet_unlisted <- unlist(transcriptToGenome(x, edb))
562 |                     return(coordinate_m6anet_unlisted)
563 |                   }, warning = function(w) {
564 |                     print("Warning")
565 |                     return(NULL)
566 |                   }, error = function(e) {
567 |                     print("Error")
568 |                     return(NULL)
569 |                   }
570 |                   )}, mc.cores = mc.cores)
571 |                 ind_retry <- names(which(unlist(lapply(tmp1, function(x) is.null(x)))))
572 |                 ind_ok <- names(which(unlist(lapply(tmp1, function(x) !is.null(x)))))
573 |                 tmp[ind_ok] <- tmp1[ind_ok]
574 |                 if (length(ind_retry) > 0) {
575 |                   tmp1 <- tmp1[ind_retry]
576 |                 }
577 |               }
578 |               
579 |               coordinate_m6anet_unlisted <- unlist(as(tmp, "GRangesList"))
580 |               #coordinate_m6anet_unlisted <- unlist(transcriptToGenome(test_m6anet, edb))
581 |               df_m6anet <- as.data.frame(unname(coordinate_m6anet_unlisted[,c(0,2,4,5)]))[,c(1:3,5,6,7,8)]
582 |               
583 |               tmp_rownames <- paste0(df_m6anet[, 6], "_", df_m6anet[, 7], "_", df_m6anet[, 5])
584 |               dup_names <- names(which(table(tmp_rownames) > 1))
585 |               ind_dup <- which(tmp_rownames %in% dup_names)
586 |               ind_dup_rm <- ind_dup[which(duplicated(df_m6anet[ind_dup, c(5,6,7)]))]
587 |               if (length(ind_dup_rm) > 0) {
588 |                 df_m6anet <- df_m6anet[-ind_dup_rm, ]
589 |               }
590 |               rownames(df_m6anet) <- paste0(df_m6anet[, 6], "_", df_m6anet[, 7], "_", df_m6anet[, 5])
591 |               
592 |               rownames(m6anet) <- paste0(m6anet[, 2], "_", m6anet[, 3], "_", m6anet[, 1])
593 |               
594 |               df_m6anet$Status <- m6anet[rownames(df_m6anet), 4]
595 |               df_m6anet$Prob_Mod <- m6anet[rownames(df_m6anet), 5]
596 |               df_m6anet_final <- df_m6anet[,c(1,2,3,4,8,9)]
597 |               colnames(df_m6anet_final) <- c("Chr", "Start", "End", "Strand", "Status", "Prob_mod")
598 |               write.table(df_m6anet_final, file = output_file, quote = F, sep = "\t", row.names = F)
599 |             }
600 |               else {message(paste0(tool,"'s output files don't exist."))}
601 |             }
602 |             if (!file.exists(output_file)) {
603 |               output_m6anet()
604 |             }
605 |             },
606 |             stop("Enter a valid tool as input!")
607 |     )}
608 | }
609 | 
610 | # Definining a set of parameters used to filter the results for those tools which give as output all the sites 
611 | rrach <- c("AAACA","AAACT","AAACC","GAACA","GAACT","GAACC","GGACA","GGACT","GGACC","GAACA","GAACT","GAACC")
612 | tools <- c("dena", "drummer", "differr", "yanocomp", "nanocompore", "eligos", "mines"
613 |            , "epinanoErr", "epinanoSvm", "xpore", "nanodoc", "nanom6a", "tomboComparison", "m6anet")
614 | 
615 | pathTools <- c(pathdena, pathdrummer, pathdifferr, pathyanocomp, pathnanocompore, patheligos
616 |                , pathmines, pathepinanoError, pathepinanoSVM, pathxpore, pathnanodoc, pathnanom6a
617 |                , pathtomboComparison, pathm6anet)
618 | 
619 | default <- c(0.1, 0.05, 0.05, 0.05, 0.01, 0.0001, NA, 0.1, 0.5, 0.05, 0.02, NA, 0.05, 0.9)
620 | relaxed <- c(0, NA, NA, NA, 1, 1, NA, NA, 0, 1, 0, NA, 1, 0)
621 | value <- rep(threshold, length(default))
622 | 
623 | names(pathTools) <- names(default) <- names(relaxed) <- names(value) <- tools
624 | 
625 | parameters_list <- list("default" = unname(default), "relaxed" = unname(relaxed), "value" = unname(value))
626 | 
627 | if (grepl(x = threshold, pattern = "default")) {
628 |   parameters <- parameters_list$default
629 | } else if (grepl(x = threshold, pattern = "relaxed")) {
630 |   parameters <- parameters_list$relaxed
631 | } else {
632 |   parameters <- as.numeric(parameters_list$value)
633 | }
634 | 
635 | # Data frame containing the results from all the tools
636 | results_df <- data.frame(row.names = tools,
637 |                          path_folder = unname(pathTools),
638 |                          parameter = parameters)
639 | 
640 | # Looping through all the elements in the data frame applying the output_processing function
641 | for (x in row.names(results_df)){output_processing(tool = x, 
642 |                                                    path_folder = paste0(path, "/", results_df[x, "path_folder"]), 
643 |                                                    output_file = paste0(resultsFolder, "/", x, "_", "output.bed"), 
644 |                                                    genome_gtf = genomegtf,
645 |                                                    genome_bed = genomebed,
646 |                                                    filtering_parameter = results_df[x, "parameter"])}
647 | 


--------------------------------------------------------------------------------
/pipeline.nf:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env nextflow
   2 | /*
   3 | ========================================================================================
   4 |                          mfurla/NanOlympicsMod
   5 | ========================================================================================
   6 |  mfurla/NanOlympicsMod analysis pipeline.
   7 |  #### Homepage / Documentation
   8 |  https://github.com/mfurla/NanOlympicsMod
   9 | ----------------------------------------------------------------------------------------
  10 | */
  11 | 
  12 | def helpMessage() {
  13 |         log.info"""
  14 |     Usage:
  15 |     nextflow -c pipeline.conf run pipeline.nf --samples="/path/to/samples.txt" --resultsDir="/path/to/resultsDir"
  16 |     Mandatory arguments which may be specified in the pipeline.conf file
  17 | 
  18 |         --samples                                                Path to the tab-separated sample file including sample name, condition and path to base-called fast5 folder
  19 |         --test_condition                                         Condition that we are interested to profile (e. g. 'WT')
  20 |         --resultsDir                                             Path to a folder where to store results
  21 |         --fast5_slot                                             FAST5 slot containing the basecalled bases
  22 |         --fast5_slot_id                                          FAST5 slot containing the basecalled bases (redundant)
  23 |         --tombo_slot                                             FAST5 slot containing the resquiggled data
  24 |         --tombo_subslot                                          FAST5 slot containing the resquiggled data
  25 |         --transcriptome_fasta                                    Path to the transcriptome fasta file
  26 |         --transcriptome_fai                                      Path to the transcriptome fasta index file
  27 |         --genome_fasta                                           Path to the genome fasta file
  28 |         --genome_fai                                             Path to the genome fasta index file
  29 |         --genes2transcripts                                      Path to gene-to-transcripts file for Nanom6A
  30 |         --transcriptomebed                                       Path to transcripts bed12 file
  31 |         --genesbed                                               Path to genes bed file
  32 |         --gtf                                                    Path to genome annotation gtf file
  33 |         --nanom6AP                                               nanom6A probability thresholds for PR curve plotting
  34 |         --yanocompFDR                                            yanocomp FDR threshold
  35 |         --differrFDR                                             differr FDR threshold
  36 |         --drummerPval                                            drummer Pvalue threshold
  37 |         --epinanoErrorSumErr                                     epinanoError threshold sum of errors
  38 |         --epinanoErrorResiduals                                  epinanoError threshold residuals
  39 |         --postprocessingScript                                   Path to postprocessing R script
  40 |         --statisticalAnalysis                                    Path to statistical_analysis R script
  41 |         --binLength                                              Size of windows for genome binning
  42 |         --threshold                                              Set of thresholds to use for the filtering of m6A sites (choose between 'default' and 'relaxed')
  43 |         --peaksfile                                              Path to bed file with set of m6A gold-standard peaks
  44 |     """.stripIndent()
  45 | }
  46 | 
  47 | // Show help message
  48 | if (params.help) {
  49 |     helpMessage()
  50 |     exit 0
  51 | }
  52 | 
  53 | // Input of sample names, conditions, and FAST5s path.
  54 | Channel
  55 | 	.fromPath( params.samples )
  56 |     .splitCsv(header: true, sep:'\t')
  57 |     .map{ row-> tuple(row.SampleName, row.Condition, file(row.DataPath)) }
  58 |     .set{multi2single_annot}
  59 | 
  60 | Channel
  61 | 	.fromPath( params.samples )
  62 |     .splitCsv(header: true, sep:'\t')
  63 |     .map{ row-> tuple(row.SampleName, row.Condition, file(row.DataPath)) }
  64 |     .set{tombo_annot}
  65 | 
  66 | // Input of transcriptome fasta.
  67 | Channel
  68 | 	.fromPath(params.transcriptome_fasta, checkIfExists:true)
  69 | 	.into{transcriptome_fasta_minimap2;transcriptome_fasta_mines;transcriptome_fasta_nanom6a;transcriptome_fasta_differr;transcriptome_fasta_tombo1;transcriptome_fasta_tombo2;transcriptome_fasta_tombo3;transcriptome_fasta_nanopolish;transcriptome_fasta_dena;transcriptome_fasta_epinanoSVM;transcriptome_fasta_epinanoError;transcriptome_fasta_nanopolish1;transcriptome_fasta_xpore;transcriptome_fasta_nanocompore2}
  70 | 
  71 | // Input of transcriptome fasta index.
  72 | Channel
  73 | 	.fromPath(params.transcriptome_fai, checkIfExists:true)
  74 | 	.into{transcriptome_fai_minimap2;transcriptome_fai_mines;transcriptome_fai_nanom6a;transcriptome_fai_differr;transcriptome_fai_tombo1;transcriptome_fai_tombo2;transcriptome_fai_tombo3;transcriptome_fai_nanopolish;transcriptome_fai_dena;transcriptome_fai_epinanoSVM;transcriptome_fai_epinanoError;transcriptome_fai_nanopolish1;transcriptome_fai_xpore;transcriptome_fai_nanocompore2}
  75 | 
  76 | // Input of genome fasta.
  77 | Channel
  78 | 	.fromPath(params.genome_fasta, checkIfExists:true)
  79 | 	.into{genome_fasta_minimap2;genome_fasta_nanom6a;genome_fasta_differr;genome_fasta_eligos;genome_fasta_nanopolish2;genome_fasta_epinanoSVM;genome_fasta_epinanoError;genome_fasta_nanodoc;genome_fasta_drummer;genome_fasta_nanopolish1;genome_fasta_mines}
  80 | 
  81 | // Input of genome fasta index.
  82 | Channel
  83 | 	.fromPath(params.genome_fai, checkIfExists:true)
  84 | 	.into{genome_fai_minimap2;genome_fai_nanom6a;genome_fai_differr;genome_fai_eligos;genome_fai_nanopolish2;genome_fai_epinanoSVM;genome_fai_epinanoError;genome_fai_nanodoc;genome_fai_drummer;genome_fai_nanopolish1;genome_fai_mines}
  85 | 
  86 | // Input of genome bed.
  87 | Channel
  88 | 	.fromPath(params.genomebed, checkIfExists:true)
  89 | 	.into{bed_eligos;bed_nanodoc}
  90 | 
  91 | // Input of genome bed.
  92 | Channel
  93 | 	.fromPath(params.transcriptomebed, checkIfExists:true)
  94 | 	.into{bed_nanocompore}
  95 | 
  96 | // Input of genome gtf.
  97 | Channel
  98 | 	.fromPath(params.gtf, checkIfExists:true)
  99 | 	.into{gtf_xpore;gtf_yanocomp}
 100 | 
 101 | // From multiple read FAST5s to single read FAST5s.
 102 | process multi2single {
 103 |     input:
 104 | 	tuple val(sample),val(condition),val(fast5) from multi2single_annot
 105 | 
 106 |     output:
 107 |     	tuple val(sample), val(condition) into singleReadFAST5_fastq
 108 |     	tuple val(condition), val(sample) into singleReadFAST5_tombo1
 109 |     	tuple val(condition), val(sample) into singleReadFAST5_nanodoc
 110 | 
 111 |     script:
 112 |     if(params.multi2single)
 113 |     """
 114 |     	mkdir -p ${params.resultsDir}
 115 |     	mkdir -p ${params.resultsDir}/${condition}
 116 |     	mkdir -p ${params.resultsDir}/${condition}/${sample}
 117 |     	mkdir -p ${params.resultsDir}/${condition}/${sample}/FAST5/
 118 | 
 119 |     	multi_to_single_fast5 --recursive --threads ${task.cpus} --input_path ${fast5} --save_path ${params.resultsDir}/${condition}/${sample}/FAST5/
 120 |     """
 121 | 	else
 122 | 	"""
 123 | 	mkdir -p ${params.resultsDir}
 124 |     	mkdir -p ${params.resultsDir}/${condition}
 125 |     	mkdir -p ${params.resultsDir}/${condition}/${sample}
 126 |     	mkdir -p ${params.resultsDir}/${condition}/${sample}/FAST5/
 127 |     	mkdir -p ${params.resultsDir}/${condition}/${sample}/FAST5/0/
 128 | 
 129 | 
 130 |         #ls -trlh ${fast5}
 131 |         f5=\$(find ${fast5} | grep \"\\.fast5\");
 132 |         for single_f5 in \$f5; do
 133 |           cp \$single_f5 ${params.resultsDir}/${condition}/${sample}/FAST5/0/;
 134 |         done
 135 | 
 136 |     """
 137 | }
 138 | 
 139 | // Extract FASTQs from single read FAST5s files.
 140 | process fastq {
 141 |     input:
 142 | 		tuple val(sample),val(condition) from singleReadFAST5_fastq
 143 | 
 144 |     output:
 145 | 		tuple val(sample), val(condition) into singleReadFASTQ
 146 |     
 147 |     script:
 148 |     if(params.fastq)
 149 |     """
 150 |     	mkdir -p ${params.resultsDir}/${condition}/${sample}/FASTQ/
 151 |         fast5_dir=\$(find ${params.resultsDir}/${condition}/${sample}/FAST5/ -maxdepth 1 -type d)
 152 |         if [ -f ${params.resultsDir}/${condition}/${sample}/FASTQ/singleReadsFASTQ.fastq ] ; then
 153 |           rm ${params.resultsDir}/${condition}/${sample}/FASTQ/singleReadsFASTQ.fastq
 154 |         fi
 155 |         for d in \$fast5_dir; do
 156 |     	  poretools fastq --group ${params.fast5_slot_id} \$d >> ${params.resultsDir}/${condition}/${sample}/FASTQ/singleReadsFASTQ.fastq
 157 |         done
 158 | 		mkdir -p ${params.resultsDir}/${condition}/FASTQ/
 159 | 		mkdir -p ${params.resultsDir}/${condition}/FASTQ/${sample}/
 160 | 		ln -sf ${params.resultsDir}/${condition}/${sample}/FASTQ/ ${params.resultsDir}/${condition}/FASTQ/${sample}/
 161 |     """
 162 | 	else
 163 | 	"""
 164 | 		mkdir -p ${params.resultsDir}/${condition}/FASTQ/
 165 | 		mkdir -p ${params.resultsDir}/${condition}/FASTQ/${sample}/
 166 | 		ln -sf ${params.resultsDir}/${condition}/${sample}/FASTQ/ ${params.resultsDir}/${condition}/FASTQ/${sample}/
 167 |     """
 168 | }
 169 | 
 170 | // Genomic and transcriptomic alignment.
 171 | process minimap2 {
 172 |     input:
 173 | 		tuple val(sample),val(condition) from singleReadFASTQ
 174 | 
 175 | 		each file('transcriptome.fa') from transcriptome_fasta_minimap2
 176 | 		each file('transcriptome.fa.fai') from transcriptome_fai_minimap2
 177 | 
 178 | 		each file('genome.fa') from genome_fasta_minimap2
 179 | 		each file('genome.fa.fai') from genome_fai_minimap2
 180 | 
 181 |     output:
 182 |     	tuple val(condition), val(sample) into minimap2_nanopolish1
 183 | 		tuple val(condition), file('minimap.filt.sortT.bam'), file('minimap.sortG.bam') into minimap2_minimap2Merge
 184 | 
 185 |     	tuple val(condition), file('minimap.filt.sortT.bam'), file('minimap.filt.sortT.bam.bai'), file('minimap.sortG.bam'), file('minimap.sortG.bam.bai') into minimap2_differr
 186 |     	tuple val(condition), file('minimap.sortG.bam'), file('minimap.sortG.bam.bai') into minimap2_eligos
 187 |     	tuple val(condition), file('minimap.filt.sortT.bam'), file('minimap.filt.sortT.bam.bai'), file('minimap.sortG.bam'), file('minimap.sortG.bam.bai') into minimap2_drummer
 188 | 
 189 |     script:
 190 |     if(params.minimap2)
 191 |     """
 192 | 		/bin/minimap2/minimap2 -x map-ont -k14 -t ${task.cpus} -a transcriptome.fa ${params.resultsDir}/${condition}/${sample}/FASTQ/singleReadsFASTQ.fastq | samtools view -hSb | samtools sort -@ ${task.cpus} -o minimapT.bam
 193 | 		samtools view minimapT.bam -bh -t transcriptome.fa.fai -F 2324 | samtools sort -@ ${task.cpus} -o minimap.filt.sortT.bam
 194 | 		samtools index -@ ${task.cpus} minimap.filt.sortT.bam
 195 | 
 196 | 		mkdir -p ${params.resultsDir}/${condition}/${sample}/transcriptomeAlignment/
 197 | 		cp minimapT.bam ${params.resultsDir}/${condition}/${sample}/transcriptomeAlignment/minimap.bam
 198 | 		cp minimap.filt.sortT.bam ${params.resultsDir}/${condition}/${sample}/transcriptomeAlignment/minimap.filt.sort.bam
 199 | 		cp minimap.filt.sortT.bam.bai ${params.resultsDir}/${condition}/${sample}/transcriptomeAlignment/minimap.filt.sort.bam.bai
 200 | 
 201 | 		/bin/minimap2/minimap2 -ax splice -k14 -t ${task.cpus} genome.fa ${params.resultsDir}/${condition}/${sample}/FASTQ/singleReadsFASTQ.fastq | samtools view -hSb | samtools sort -@ ${task.cpus} -o minimapG.bam
 202 | 		samtools view minimapG.bam -bh -t genome.fa.fai -F 2308 | samtools sort -@ ${task.cpus} -o minimap.sortG.bam
 203 | 		samtools index -@ ${task.cpus} minimap.sortG.bam
 204 | 
 205 | 		mkdir -p ${params.resultsDir}/${condition}/${sample}/genomeAlignment/
 206 | 		cp minimapG.bam ${params.resultsDir}/${condition}/${sample}/genomeAlignment/minimap.bam
 207 | 		cp minimap.sortG.bam ${params.resultsDir}/${condition}/${sample}/genomeAlignment/minimap.sort.bam
 208 | 		cp minimap.sortG.bam.bai ${params.resultsDir}/${condition}/${sample}/genomeAlignment/minimap.sort.bam.bai
 209 |     """
 210 | 	else
 211 | 	"""
 212 | 		ln -s ${params.resultsDir}/${condition}/${sample}/transcriptomeAlignment/minimap.filt.sort.bam ./minimap.filt.sortT.bam
 213 | 		ln -s ${params.resultsDir}/${condition}/${sample}/transcriptomeAlignment/minimap.filt.sort.bam.bai ./minimap.filt.sortT.bam.bai
 214 | 		
 215 | 		ln -s ${params.resultsDir}/${condition}/${sample}/genomeAlignment/minimap.sort.bam ./minimap.sortG.bam
 216 | 		ln -s ${params.resultsDir}/${condition}/${sample}/genomeAlignment/minimap.sort.bam.bai ./minimap.sortG.bam.bai
 217 |     """
 218 | }
 219 | 
 220 | // From a single channel for all the alignments to one channel for each condition.
 221 | ni_test_minimap2Merge=Channel.create()
 222 | ni_other_minimap2Merge=Channel.create()
 223 | minimap2_minimap2Merge.groupTuple(by:0)
 224 | 	.choice( ni_test_minimap2Merge, ni_other_minimap2Merge ) { a -> a[0] == params.test_condition ? 0 : 1 } 
 225 | 
 226 | // From multiple to single bam files.
 227 | process minimap2Merge {
 228 |     input:
 229 | 	    tuple val('condition1'), file('minimap.filt.sortT.1.*.bam'), file('minimap.sortG.1.*.bam') from ni_test_minimap2Merge
 230 | 	    tuple val('condition2'), file('minimap.filt.sortT.2.*.bam'), file('minimap.sortG.2.*.bam') from ni_other_minimap2Merge
 231 |     
 232 |     output:
 233 | 		tuple file('minimap.filt.sort.1.bam'), file('minimap.filt.sort.1.bam.bai'), file('minimap.filt.sort.2.bam'), file('minimap.filt.sort.2.bam.bai') into minimap2Merge_dena
 234 | 		tuple val(condition1), file('minimap.filt.sort.1.bam'), file('minimap.filt.sort.1.bam.bai'), file('minimap.sort.1.bam'), file('minimap.sort.1.bam.bai') into minimap2Merge_1_epinanoSVM
 235 | 		tuple val(condition2), file('minimap.filt.sort.2.bam'), file('minimap.filt.sort.2.bam.bai'), file('minimap.sort.2.bam'), file('minimap.sort.2.bam.bai') into minimap2Merge_2_epinanoSVM
 236 | 		tuple val(condition1), file('minimap.filt.sort.1.bam'), file('minimap.filt.sort.1.bam.bai'), file('minimap.sort.1.bam'), file('minimap.sort.1.bam.bai') into minimap2Merge_1_epinanoError
 237 | 		tuple val(condition2), file('minimap.filt.sort.2.bam'), file('minimap.filt.sort.2.bam.bai'), file('minimap.sort.2.bam'), file('minimap.sort.2.bam.bai') into minimap2Merge_2_epinanoError
 238 | 
 239 |     script:
 240 |     if(params.minimap2Merge)
 241 |     """
 242 | 		cat ${params.resultsDir}/${condition1}/FASTQ/*/FASTQ/singleReadsFASTQ.fastq > ${params.resultsDir}/${condition1}/FASTQ/singleReadsFASTQ.fastq
 243 | 		cat ${params.resultsDir}/${condition2}/FASTQ/*/FASTQ/singleReadsFASTQ.fastq > ${params.resultsDir}/${condition2}/FASTQ/singleReadsFASTQ.fastq
 244 | 
 245 |     	mkdir -p ${params.resultsDir}/${condition1}/
 246 | 		mkdir -p ${params.resultsDir}/${condition1}/transcriptomeAlignment/
 247 | 		samtools merge -f ${params.resultsDir}/${condition1}/transcriptomeAlignment/minimap.filt.bam minimap.filt.sortT.1.*.bam
 248 | 		samtools sort -@ ${task.cpus} -o ${params.resultsDir}/${condition1}/transcriptomeAlignment/minimap.filt.sort.bam ${params.resultsDir}/${condition1}/transcriptomeAlignment/minimap.filt.bam
 249 | 		samtools index -@ ${task.cpus} ${params.resultsDir}/${condition1}/transcriptomeAlignment/minimap.filt.sort.bam
 250 | 
 251 | 		mkdir -p ${params.resultsDir}/${condition1}/genomeAlignment/
 252 | 		samtools merge -f ${params.resultsDir}/${condition1}/genomeAlignment/minimap.bam minimap.sortG.1.*.bam
 253 | 		samtools sort -@ ${task.cpus} -o ${params.resultsDir}/${condition1}/genomeAlignment/minimap.sort.bam ${params.resultsDir}/${condition1}/genomeAlignment/minimap.bam
 254 | 		samtools index -@ ${task.cpus} ${params.resultsDir}/${condition1}/genomeAlignment/minimap.sort.bam
 255 | 
 256 |     	mkdir -p ${params.resultsDir}/${condition2}/
 257 | 		mkdir -p ${params.resultsDir}/${condition2}/transcriptomeAlignment/
 258 | 		samtools merge -f ${params.resultsDir}/${condition2}/transcriptomeAlignment/minimap.filt.bam minimap.filt.sortT.2.*.bam
 259 | 		samtools sort -@ ${task.cpus} -o ${params.resultsDir}/${condition2}/transcriptomeAlignment/minimap.filt.sort.bam ${params.resultsDir}/${condition2}/transcriptomeAlignment/minimap.filt.bam
 260 | 		samtools index -@ ${task.cpus} ${params.resultsDir}/${condition2}/transcriptomeAlignment/minimap.filt.sort.bam
 261 | 
 262 | 		mkdir -p ${params.resultsDir}/${condition2}/genomeAlignment/
 263 | 		samtools merge -f ${params.resultsDir}/${condition2}/genomeAlignment/minimap.bam minimap.sortG.2.*.bam
 264 | 		samtools sort -@ ${task.cpus} -o ${params.resultsDir}/${condition2}/genomeAlignment/minimap.sort.bam ${params.resultsDir}/${condition2}/genomeAlignment/minimap.bam
 265 | 		samtools index -@ ${task.cpus} ${params.resultsDir}/${condition2}/genomeAlignment/minimap.sort.bam
 266 | 
 267 | 		ln -s ${params.resultsDir}/${condition1}/transcriptomeAlignment/minimap.filt.sort.bam minimap.filt.sort.1.bam
 268 | 		ln -s ${params.resultsDir}/${condition1}/transcriptomeAlignment/minimap.filt.sort.bam.bai minimap.filt.sort.1.bam.bai
 269 | 
 270 | 		ln -s ${params.resultsDir}/${condition2}/transcriptomeAlignment/minimap.filt.sort.bam minimap.filt.sort.2.bam
 271 | 		ln -s ${params.resultsDir}/${condition2}/transcriptomeAlignment/minimap.filt.sort.bam.bai minimap.filt.sort.2.bam.bai
 272 | 
 273 | 		ln -s ${params.resultsDir}/${condition1}/genomeAlignment/minimap.sort.bam minimap.sort.1.bam
 274 | 		ln -s ${params.resultsDir}/${condition1}/genomeAlignment/minimap.sort.bam.bai minimap.sort.1.bam.bai
 275 | 
 276 | 		ln -s ${params.resultsDir}/${condition2}/genomeAlignment/minimap.sort.bam minimap.sort.2.bam
 277 | 		ln -s ${params.resultsDir}/${condition2}/genomeAlignment/minimap.sort.bam.bai minimap.sort.2.bam.bai
 278 |     """
 279 | 	else
 280 | 	"""
 281 | 		ln -s ${params.resultsDir}/${condition1}/transcriptomeAlignment/minimap.filt.sort.bam minimap.filt.sort.1.bam
 282 | 		ln -s ${params.resultsDir}/${condition1}/transcriptomeAlignment/minimap.filt.sort.bam.bai minimap.filt.sort.1.bam.bai
 283 | 
 284 | 		ln -s ${params.resultsDir}/${condition2}/transcriptomeAlignment/minimap.filt.sort.bam minimap.filt.sort.2.bam
 285 | 		ln -s ${params.resultsDir}/${condition2}/transcriptomeAlignment/minimap.filt.sort.bam.bai minimap.filt.sort.2.bam.bai
 286 | 
 287 | 		ln -s ${params.resultsDir}/${condition1}/genomeAlignment/minimap.sort.bam minimap.sort.1.bam
 288 | 		ln -s ${params.resultsDir}/${condition1}/genomeAlignment/minimap.sort.bam.bai minimap.sort.1.bam.bai
 289 | 
 290 | 		ln -s ${params.resultsDir}/${condition2}/genomeAlignment/minimap.sort.bam minimap.sort.2.bam
 291 | 		ln -s ${params.resultsDir}/${condition2}/genomeAlignment/minimap.sort.bam.bai minimap.sort.2.bam.bai
 292 | 	"""
 293 | }
 294 | 
 295 | // From a single channel for all the alignments to one channel for each condition.
 296 | ni_test_tombo1=Channel.create()
 297 | ni_other_tombo1=Channel.create()
 298 | singleReadFAST5_tombo1.groupTuple(by:0)
 299 | 	.choice( ni_test_tombo1, ni_other_tombo1 ) { a -> a[0] == params.test_condition ? 0 : 1 } 
 300 | 
 301 | // Resquiggle for each condition.
 302 | process tombo1 {
 303 |     input:
 304 | 		tuple val('condition1'), val('samples') from ni_test_tombo1
 305 | 		tuple val('condition2'), val('samples') from ni_other_tombo1
 306 | 
 307 | 		each file('transcriptome.fa') from transcriptome_fasta_tombo1
 308 | 		each file('transcriptome.fa.fai') from transcriptome_fai_tombo1
 309 | 
 310 |     output:
 311 | 		tuple val(condition1) into condition1_tombo1_tombo2
 312 | 		tuple val(condition2) into condition2_tombo1_tombo2
 313 | 
 314 | 		tuple val(condition1) into condition1_tombo1_tombo3
 315 | 		tuple val(condition2) into condition2_tombo1_tombo3
 316 | 
 317 | 		tuple val(condition1) into condition1_tombo1_nanom6a
 318 | 		tuple val(condition2) into condition2_tombo1_nanom6a
 319 | 
 320 | 		tuple val(condition1) into condition1_tombo1_dena
 321 | 		tuple val(condition2) into condition2_tombo1_dena
 322 | 
 323 |     script:
 324 |     if(params.tombo1)
 325 |     """
 326 | 		/bin/miniconda3/bin/tombo resquiggle ${params.resultsDir}/${condition1}/ --ignore-read-locks --overwrite --basecall-group ${params.fast5_slot} transcriptome.fa --processes ${task.cpus} --fit-global-scale --include-event-stdev --failed-reads-filename ${params.resultsDir}/${condition1}/failedReads.txt
 327 | 
 328 | 		/bin/miniconda3/bin/tombo resquiggle ${params.resultsDir}/${condition2}/ --ignore-read-locks --overwrite --basecall-group ${params.fast5_slot} transcriptome.fa --processes ${task.cpus} --fit-global-scale --include-event-stdev --failed-reads-filename ${params.resultsDir}/${condition2}/failedReads.txt
 329 |     """
 330 | 	else
 331 | 	"""
 332 | 		echo "Skipped"
 333 |     """
 334 | }
 335 | 
 336 | // RNA modifications detection with Tombo denovo
 337 | process tombo2 {
 338 |     input:
 339 | 	    tuple val('condition1') from condition1_tombo1_tombo2
 340 | 	    tuple val('condition2') from condition2_tombo1_tombo2
 341 | 
 342 | 		each file('transcriptome.fa') from transcriptome_fasta_tombo2
 343 | 		each file('transcriptome.fa.fai') from transcriptome_fai_tombo2
 344 |     
 345 |     output:
 346 | 	    tuple val(condition1) into condition1_tombo2_mines
 347 | 	    tuple val(condition2) into condition2_tombo2_mines
 348 | 	    val('flagtombo2') into tombo2_postprocessing
 349 | 
 350 |     script:
 351 |     if(params.tombo2)
 352 |     """
 353 |     	mkdir -p ${params.resultsDir}/${condition1}/tomboDenovo
 354 | 
 355 | 		/bin/miniconda3/bin/tombo detect_modifications de_novo --per-read-statistics-basename ${params.resultsDir}/${condition1}/tomboDenovo/Per_read_Stats_Filename --fast5-basedirs ${params.resultsDir}/${condition1}/ --statistics-file-basename ${params.resultsDir}/${condition1}/tomboDenovo/Stats_Filename
 356 | 
 357 | 		/bin/miniconda3/bin/tombo text_output browser_files --fast5-basedirs ${params.resultsDir}/${condition1}/ --statistics-filename ${params.resultsDir}/${condition1}/tomboDenovo/Stats_Filename.tombo.stats --browser-file-basename ${params.resultsDir}/${condition1}/tomboDenovo/output_filename --file-types statistic fraction dampened_fraction coverage valid_coverage signal 
 358 | 
 359 | #    	mkdir -p ${params.resultsDir}/${condition2}/tomboDenovo
 360 | 
 361 | #		/bin/miniconda3/bin/tombo detect_modifications de_novo --per-read-statistics-basename ${params.resultsDir}/${condition1}/tomboDenovo/Per_read_Stats_Filename --fast5-basedirs ${params.resultsDir}/${condition2}/ --statistics-file-basename ${params.resultsDir}/${condition2}/tomboDenovo/Stats_Filename
 362 | 
 363 | #		/bin/miniconda3/bin/tombo text_output browser_files --fast5-basedirs ${params.resultsDir}/${condition2}/ --statistics-filename ${params.resultsDir}/${condition2}/tomboDenovo/Stats_Filename.tombo.stats --browser-file-basename ${params.resultsDir}/${condition2}/tomboDenovo/output_filename --file-types statistic fraction dampened_fraction coverage valid_coverage signal 
 364 |     """
 365 | 	else
 366 | 	"""
 367 | 		echo "Skipped"
 368 |     """
 369 | }
 370 | 
 371 | // RNA modifications detection with Tombo comparing samples
 372 | process tombo3 {
 373 |     input:
 374 | 	    tuple val('condition1') from condition1_tombo1_tombo3
 375 | 	    tuple val('condition2') from condition2_tombo1_tombo3
 376 | 
 377 |     output:
 378 |     	val('flagtombo3') into tombo3_postprocessing
 379 |     script:
 380 |     if(params.tombo3)
 381 |     """
 382 |     	mkdir -p ${params.resultsDir}/tomboComparison/
 383 | 
 384 |     	/bin/miniconda3/bin/tombo detect_modifications level_sample_compare \
 385 |     	--fast5-basedirs ${params.resultsDir}/${condition1}/ \
 386 | 		--alternate-fast5-basedirs ${params.resultsDir}/${condition2}/ \
 387 | 		--minimum-test-reads 50 \
 388 | 		--processes ${task.cpus} --statistics-file-basename ${params.resultsDir}/tomboComparison/sample.level_samp_comp_detect \
 389 | 		--store-p-value
 390 | 
 391 | 		/bin/miniconda3/bin/tombo text_output browser_files --statistics-filename ${params.resultsDir}/tomboComparison/sample.level_samp_comp_detect.tombo.stats \
 392 |     	--browser-file-basename ${params.resultsDir}/tomboComparison/sample.level_samp_comp_detect --file-types statistic
 393 |     """
 394 | 	else
 395 | 	"""
 396 |         echo "Skipped"
 397 |     """
 398 | }
 399 | 
 400 | // RNA modifications detection with nanom6A
 401 | process nanom6a {
 402 |     input:
 403 | 		tuple val('condition1') from condition1_tombo1_nanom6a
 404 | 		tuple val('condition2') from condition2_tombo1_nanom6a
 405 | 
 406 | 		each file('genome.fa') from genome_fasta_nanom6a
 407 | 		each file('genome.fa.fai') from genome_fai_nanom6a
 408 | 
 409 | 		each file('transcriptome.fa') from transcriptome_fasta_nanom6a
 410 | 		each file('transcriptome.fa.fai') from transcriptome_fai_nanom6a
 411 | 
 412 |     output:
 413 | 		tuple file('genome.dict'), file('transcriptome.dict') into picard_epinanoSVM
 414 | 		tuple file('genome.dict'), file('transcriptome.dict') into picard_epinanoError
 415 | 		val('flagnanom6a') into nanom6a_postprocessing
 416 |     
 417 |     script:
 418 |     if(params.nanom6a)
 419 |     """
 420 |     	java -jar /picard/build/libs/picard.jar CreateSequenceDictionary -R genome.fa -O genome.dict
 421 | 		java -jar /picard/build/libs/picard.jar CreateSequenceDictionary -R transcriptome.fa -O transcriptome.dict
 422 | 
 423 | 		mkdir -p ${params.resultsDir}/${condition1}/nanom6a/
 424 | 
 425 | 		find ${params.resultsDir}/${condition1}/ -name "*.fast5" > ${params.resultsDir}/${condition1}/nanom6a/files.txt
 426 | 		/nanom6A_2021_10_22/bin/extract_raw_and_feature_fast --cpu=${task.cpus} --fl=${params.resultsDir}/${condition1}/nanom6a/files.txt -o ${params.resultsDir}/${condition1}/nanom6a/result --clip=10 --basecall_group ${params.tombo_slot} --basecall_subgroup ${params.tombo_subslot}
 427 | 
 428 | 		for prob in ${params.nanom6AP};do /nanom6A_2021_10_22/bin/predict_sites --cpu ${task.cpus} -i ${params.resultsDir}/${condition1}/nanom6a/result -o ${params.resultsDir}/${condition1}/nanom6a/result_final -r transcriptome.fa -g genome.fa -b ${params.genes2transcripts} --model /nanom6A_2021_10_22/bin/model/ --proba \$prob; done
 429 | 
 430 | #		mkdir -p ${params.resultsDir}/${condition2}/nanom6a/
 431 | 
 432 | #		find ${params.resultsDir}/${condition2}/ -name "*.fast5" > ${params.resultsDir}/${condition2}/nanom6a/files.txt
 433 | #		/nanom6A_2021_10_22/bin/extract_raw_and_feature_fast --cpu=${task.cpus} --fl=${params.resultsDir}/${condition2}/nanom6a/files.txt -o ${params.resultsDir}/${condition2}/nanom6a/result --clip=10 --basecall_group ${params.tombo_slot} --basecall_subgroup ${params.tombo_subslot}
 434 | 
 435 | #		for prob in ${params.nanom6AP};do /nanom6A_2021_10_22/bin/predict_sites --cpu ${task.cpus} -i ${params.resultsDir}/${condition2}/nanom6a/result -o ${params.resultsDir}/${condition2}/nanom6a/result_final -r transcriptome.fa -g genome.fa -b ${params.genes2transcripts} --model /nanom6A_2021_10_22/bin/model/ --proba \$prob; done
 436 |     """
 437 | 	else
 438 | 	"""
 439 |     	java -jar /picard/build/libs/picard.jar CreateSequenceDictionary R=genome.fa O=genome.dict
 440 | 		java -jar /picard/build/libs/picard.jar CreateSequenceDictionary R=transcriptome.fa O=transcriptome.dict
 441 |     """
 442 | }
 443 | 
 444 | // From a single channel for all the alignments to one channel for each condition.
 445 | ni_test=Channel.create()
 446 | ni_other=Channel.create()
 447 | minimap2_differr.groupTuple(by:0)
 448 | 	.choice( ni_test, ni_other ) { a -> a[0] == params.test_condition ? 0 : 1 } 
 449 | 
 450 | // RNA modifications detection with differr
 451 | process differr {
 452 |     input:
 453 | 	    tuple val('condition1'), file('minimap.filt.sortT.1.*.bam'), file('minimap.filt.sortT.1.*.bam.bai'), file('minimap.sortG.1.*.bam'), file('minimap.sortG.1.*.bam.bai') from ni_test
 454 | 	    tuple val('condition2'), file('minimap.filt.sortT.2.*.bam'), file('minimap.filt.sortT.2.*.bam.bai'), file('minimap.sortG.2.*.bam'), file('minimap.sortG.2.*.bam.bai') from ni_other
 455 | 
 456 | 		each file('genome.fa') from genome_fasta_differr
 457 | 		each file('genome.fa.fai') from genome_fai_differr
 458 | 
 459 | 		each file('transcriptome.fa') from transcriptome_fasta_differr
 460 | 		each file('transcriptome.fa.fai') from transcriptome_fai_differr
 461 | 
 462 |     output:
 463 |     	val('flagdifferr') into differr_postprocessing
 464 |     script:
 465 |     if(params.differr)
 466 |     """
 467 |     	mkdir -p ${params.resultsDir}/differr/
 468 | 
 469 | 		differr -p ${task.cpus} \
 470 | 		\$(for file in minimap.sortG.2*.bam; do echo -a \$file; done) \
 471 | 		\$(for file in minimap.sortG.1*.bam; do echo -b \$file; done) \
 472 | 		-r genome.fa -o ${params.resultsDir}/differr/differrOut.bed \
 473 | 		-f ${params.differrFDR}
 474 | 
 475 |     """
 476 | 	else
 477 | 	"""
 478 |         echo "Skipped"
 479 |     """
 480 | }
 481 | 
 482 | // From a single channel for all the alignments to one channel for each condition.
 483 | ni_test_eligos=Channel.create()
 484 | ni_other_eligos=Channel.create()
 485 | minimap2_eligos.groupTuple(by:0)
 486 | 	.choice( ni_test_eligos, ni_other_eligos ) { a -> a[0] == params.test_condition ? 0 : 1 } 
 487 | 
 488 | // RNA modifications detection with eligos
 489 | process eligos {
 490 |     input:
 491 | 	    tuple val('condition1'), file('minimap.sortG.1.*.bam'), file('minimap.sortG.1.*.bam.bai') from ni_test_eligos
 492 | 	    tuple val('condition2'), file('minimap.sortG.2.*.bam'), file('minimap.sortG.2.*.bam.bai') from ni_other_eligos
 493 | 
 494 | 		each file('genome.fa') from genome_fasta_eligos
 495 | 		each file('genome.fa.fai') from genome_fai_eligos
 496 | 
 497 | 		each file('genome.bed') from bed_eligos
 498 | 
 499 |     output:
 500 |     	val('flageligos') into eligos_postprocessing
 501 |     script:
 502 |     if(params.eligos)
 503 |     """
 504 |     	# Merged replicates comparison
 505 |     	mkdir -p ${params.resultsDir}/eligos/merged/
 506 | 
 507 |     	samtools merge minimap.sortG.1.bam minimap.sortG.1.*.bam
 508 |     	samtools merge minimap.sortG.2.bam minimap.sortG.2.*.bam
 509 | 
 510 |     	samtools index -@ ${task.cpus} minimap.sortG.1.bam
 511 |     	samtools index -@ ${task.cpus} minimap.sortG.2.bam
 512 | 
 513 | 		/eligos2-v2.1.0/eligos2 pair_diff_mod -t ${task.cpus} \
 514 | 		-tbam minimap.sortG.1.bam \
 515 | 		-cbam minimap.sortG.2.bam \
 516 | 		-reg genome.bed \
 517 | 		-ref genome.fa \
 518 | 		-o ${params.resultsDir}/eligos/merged/
 519 | 
 520 |     	# Splitted replicates comparison
 521 |     	mkdir -p ${params.resultsDir}/eligos/
 522 | 
 523 | 		/eligos2-v2.1.0/eligos2 pair_diff_mod -t ${task.cpus} \
 524 | 		-tbam minimap.sortG.1.*.bam \
 525 | 		-cbam minimap.sortG.2.*.bam \
 526 | 		-reg genome.bed \
 527 | 		-ref genome.fa \
 528 | 		-o ${params.resultsDir}/eligos/
 529 |     """
 530 | 	else
 531 | 	"""
 532 |         echo "Skipped"
 533 |     """
 534 | }
 535 | 
 536 | // RNA modifications detection with mines for each condition
 537 | process mines {
 538 |     input:
 539 | 		tuple val('condition1') from condition1_tombo2_mines
 540 | 		tuple val('condition2') from condition2_tombo2_mines
 541 | 
 542 | 		each file('transcriptome.fa') from transcriptome_fasta_mines
 543 | 		each file('transcriptome.fa.fai') from transcriptome_fai_mines
 544 | 
 545 |     output:
 546 |     	val('flagmines') into mines_postprocessing
 547 |     script:
 548 |     if(params.mines)
 549 |     """
 550 | 		mkdir -p ${params.resultsDir}/${condition1}/mines/
 551 | #		mkdir -p ${params.resultsDir}/${condition2}/mines/
 552 | 
 553 | 		wig2bed < ${params.resultsDir}/${condition1}/tomboDenovo/output_filename.fraction_modified_reads.plus.wig > ${params.resultsDir}/${condition1}/mines/output_filename.fraction_modified_reads.plus.wig.bed
 554 | 		
 555 | #		wig2bed < ${params.resultsDir}/${condition2}/tomboDenovo/output_filename.fraction_modified_reads.plus.wig > ${params.resultsDir}/${condition2}/mines/output_filename.fraction_modified_reads.plus.wig.bed
 556 | 
 557 | 		python3 /MINES/cDNA_MINES.py --fraction_modified ${params.resultsDir}/${condition1}/mines/output_filename.fraction_modified_reads.plus.wig.bed --coverage ${params.resultsDir}/${condition1}/tomboDenovo/output_filename.coverage.plus.bedgraph --output ${params.resultsDir}/${condition1}/mines/m6A_output_filename.bed --ref transcriptome.fa --kmer_models /MINES/Final_Models/names.txt
 558 | 		
 559 | #		python3 /MINES/cDNA_MINES.py --fraction_modified ${params.resultsDir}/${condition2}/mines/output_filename.fraction_modified_reads.plus.wig.bed --coverage ${params.resultsDir}/${condition2}/tomboDenovo/output_filename.coverage.plus.bedgraph --output ${params.resultsDir}/${condition2}/mines/m6A_output_filename.bed --ref transcriptome.fa --kmer_models /MINES/Final_Models/names.txt
 560 | 
 561 |    """
 562 | 	else
 563 | 	"""
 564 |         echo "Skipped"
 565 |     """
 566 | }
 567 | 
 568 | // RNA modifications detection with dena for each condition
 569 | process dena {
 570 |     input:
 571 |     	tuple file('minimap.filt.sort.1.bam'), file('minimap.filt.sort.1.bam.bai'), file('minimap.filt.sort.2.bam'), file('minimap.filt.sort.2.bam.bai') from minimap2Merge_dena
 572 | 
 573 | 		tuple val('condition1') from condition1_tombo1_dena
 574 | 		tuple val('condition2') from condition2_tombo1_dena
 575 | 
 576 | 		each file('transcriptome.fa') from transcriptome_fasta_dena
 577 | 		each file('transcriptome.fa.fai') from transcriptome_fai_dena
 578 | 
 579 |     output:
 580 |     	val('flagdena') into dena_postprocessing
 581 |     script:
 582 |     if(params.dena)
 583 |     """
 584 | 		mkdir -p ${params.resultsDir}/${condition1}/dena/
 585 | 
 586 | 		python3 /DENA/step4_predict/LSTM_extract.py get_pos --fasta transcriptome.fa --motif 'RRACH' --output ${params.resultsDir}/${condition1}/dena/candidate_predict_pos.txt
 587 | 
 588 | 		python3 /DENA/step4_predict/LSTM_extract.py predict --fast5 ${params.resultsDir}/${condition1} --corr_grp ${params.tombo_slot} --bam minimap.filt.sort.1.bam --sites ${params.resultsDir}/${condition1}/dena/candidate_predict_pos.txt --label "dena_label" --windows 2 2 --processes ${task.cpus}
 589 | 
 590 | 		mv *_tmp ${params.resultsDir}/${condition1}/dena/
 591 | 
 592 | 		python3 /DENA/step4_predict/LSTM_predict.py -i ${params.resultsDir}/${condition1}/dena/ -m /DENA/denaModels/ -o ${params.resultsDir}/${condition1}/dena/ -p "dena_label"
 593 | 
 594 | 		# mkdir -p ${params.resultsDir}/${condition2}/dena/
 595 | 
 596 | 		# python3 /DENA/step4_predict/LSTM_extract.py get_pos --fasta transcriptome.fa --motif 'RRACH' --output ${params.resultsDir}/${condition2}/dena/candidate_predict_pos.txt
 597 | 
 598 | 		# python3 /DENA/step4_predict/LSTM_extract.py predict --fast5 ${params.resultsDir}/${condition2} --corr_grp ${params.tombo_slot} --bam minimap.filt.sort.2.bam --sites ${params.resultsDir}/${condition2}/dena/candidate_predict_pos.txt --label "dena_label" --windows 2 2 --processes ${task.cpus}
 599 | 
 600 | 		# mv *_tmp ${params.resultsDir}/${condition2}/dena/
 601 | 
 602 | 		# python3 /DENA/step4_predict/LSTM_predict.py -i ${params.resultsDir}/${condition2}/dena/ -m /DENA/denaModels/ -o ${params.resultsDir}/${condition2}/dena/ -p "dena_label"
 603 |     """
 604 | 	else
 605 | 	"""
 606 |         echo "Skipped"
 607 |     """
 608 | }
 609 | 
 610 | // RNA modifications detection with epinano in SVM mode for each condition
 611 | process epinanoSVM {
 612 |     input:
 613 | 		tuple val('condition1'), file('minimap.filt.sort.1.bam'), file('minimap.filt.sort.1.bam.bai'), file('minimap.sort.1.bam'), file('minimap.sort.1.bam.bai') from minimap2Merge_1_epinanoSVM
 614 | 		tuple val('condition2'), file('minimap.filt.sort.2.bam'), file('minimap.filt.sort.2.bam.bai'), file('minimap.sort.2.bam'), file('minimap.sort.2.bam.bai') from minimap2Merge_2_epinanoSVM
 615 | 
 616 | 		each file('genome.fa') from genome_fasta_epinanoSVM
 617 | 		each file('genome.fa.fai') from genome_fai_epinanoSVM
 618 | 
 619 | 		each file('transcriptome.fa') from transcriptome_fasta_epinanoSVM
 620 | 		each file('transcriptome.fa.fai') from transcriptome_fai_epinanoSVM
 621 | 
 622 | 		tuple file('genome.fa.dict'), file('transcriptome.fa.dict') from picard_epinanoSVM
 623 |     output:
 624 |     	val('flagepinanoSVM') into epinanoSVM_postprocessing
 625 |     script:
 626 |     if(params.epinanoSVM)
 627 |     """
 628 |     	mkdir -p ${params.resultsDir}/${condition1}/epinanoSVM/
 629 |         mkdir -p ${params.resultsDir}/${condition2}/epinanoSVM/
 630 | 
 631 |     	samtools view -F16 minimap.sort.1.bam > minimap.sort.1.plus.sam
 632 |         samtools view -f16 minimap.sort.1.bam > minimap.sort.1.minus.sam
 633 |         samtools view -F16 minimap.sort.2.bam > minimap.sort.2.plus.sam
 634 |         samtools view -f16 minimap.sort.2.bam > minimap.sort.2.minus.sam
 635 | 
 636 |         if [[ -s minimap.sort.1.plus.sam ]]; then
 637 |            if [[ -s minimap.sort.1.minus.sam ]]; then
 638 |                /usr/bin/python3 /EpiNano-Epinano1.2.1/Epinano_Variants.py -n ${task.cpus} -T g -R genome.fa -b minimap.sort.1.bam -s /EpiNano-Epinano1.2.1/misc/sam2tsv.jar
 639 |                /usr/bin/python3 /EpiNano-Epinano1.2.1/misc/Slide_Variants.py minimap.sort.1.plus_strand.per.site.csv 5
 640 | 	       /usr/bin/python3 /EpiNano-Epinano1.2.1/misc/Slide_Variants.py minimap.sort.1.minus_strand.per.site.csv 5
 641 | 	       /usr/bin/python3 /EpiNano-Epinano1.2.1/Epinano_Predict.py --model /EpiNano-Epinano1.2.1/models/rrach.q3.mis3.del3.linear.dump --predict minimap.sort.1.plus_strand.per.site.5mer.csv --columns 8,13,23 --out_prefix plus_mod_prediction
 642 | 	       /usr/bin/python3 /EpiNano-Epinano1.2.1/Epinano_Predict.py --model /EpiNano-Epinano1.2.1/models/rrach.q3.mis3.del3.linear.dump --predict minimap.sort.1.minus_strand.per.site.5mer.csv --columns 8,13,23 --out_prefix minus_mod_prediction
 643 | 
 644 | 	       mv minimap.sort.1.plus_strand.per.site.csv minimap.sort.1.minus_strand.per.site.csv minimap.sort.1.plus_strand.per.site.5mer.csv minimap.sort.1.minus_strand.per.site.5mer.csv plus_mod_prediction.q3.mis3.del3.MODEL.rrach.q3.mis3.del3.linear.dump.csv minus_mod_prediction.q3.mis3.del3.MODEL.rrach.q3.mis3.del3.linear.dump.csv ${params.resultsDir}/${condition1}/epinanoSVM/
 645 | 
 646 |            else
 647 |                /usr/bin/python3 /EpiNano-Epinano1.2.1/Epinano_Variants.py -n ${task.cpus} -R genome.fa -b minimap.sort.1.bam -s /EpiNano-Epinano1.2.1/misc/sam2tsv.jar
 648 |                /usr/bin/python3 /EpiNano-Epinano1.2.1/misc/Slide_Variants.py minimap.sort.1.plus_strand.per.site.csv 5
 649 | 	       /usr/bin/python3 /EpiNano-Epinano1.2.1/Epinano_Predict.py --model /EpiNano-Epinano1.2.1/models/rrach.q3.mis3.del3.linear.dump --predict minimap.sort.1.plus_strand.per.site.5mer.csv --columns 8,13,23 --out_prefix plus_mod_prediction
 650 | 
 651 | 	       mv minimap.sort.1.plus_strand.per.site.csv minimap.sort.1.plus_strand.per.site.5mer.csv plus_mod_prediction.q3.mis3.del3.MODEL.rrach.q3.mis3.del3.linear.dump.csv  ${params.resultsDir}/${condition1}/epinanoSVM/
 652 |            fi
 653 |         else
 654 |           echo "No reads mapped for minimap.1.sort.bam"
 655 |         fi
 656 | 
 657 | #        if [[ -s minimap.sort.2.plus.sam ]]; then
 658 | #           if [[ -s minimap.sort.2.minus.sam ]]; then
 659 | #               /usr/bin/python3 /EpiNano-Epinano1.2.1/Epinano_Variants.py -n ${task.cpus} -T g -R genome.fa -b minimap.sort.2.bam -s /EpiNano-Epinano1.2.1/misc/sam2tsv.jar
 660 | #               /usr/bin/python3 /EpiNano-Epinano1.2.1/misc/Slide_Variants.py minimap.sort.2.plus_strand.per.site.csv 5
 661 | #	       /usr/bin/python3 /EpiNano-Epinano1.2.1/misc/Slide_Variants.py minimap.sort.2.minus_strand.per.site.csv 5
 662 | #	       /usr/bin/python3 /EpiNano-Epinano1.2.1/Epinano_Predict.py --model /EpiNano-Epinano1.2.1/models/rrach.q3.mis3.del3.linear.dump --predict minimap.sort.2.plus_strand.per.site.5mer.csv --columns 8,13,23 --out_prefix plus_mod_prediction
 663 | #               /usr/bin/python3 /EpiNano-Epinano1.2.1/Epinano_Predict.py --model /EpiNano-Epinano1.2.1/models/rrach.q3.mis3.del3.linear.dump --predict minimap.sort.2.minus_strand.per.site.5mer.csv --columns 8,13,23 --out_prefix minus_mod_prediction
 664 | 
 665 | #	       mv minimap.sort.2.plus_strand.per.site.csv minimap.sort.2.minus_strand.per.site.csv minimap.sort.2.plus_strand.per.site.5mer.csv minimap.sort.2.minus_strand.per.site.5mer.csv plus_mod_prediction.q3.mis3.del3.MODEL.rrach.q3.mis3.del3.linear.dump.csv minus_mod_prediction.q3.mis3.del3.MODEL.rrach.q3.mis3.del3.linear.dump.csv ${params.resultsDir}/${condition2}/epinanoSVM/
 666 | 
 667 |         #    else
 668 |         #        /usr/bin/python3 /EpiNano-Epinano1.2.1/Epinano_Variants.py -n ${task.cpus} -R genome.fa -b minimap.sort.2.bam -s /EpiNano-Epinano1.2.1/misc/sam2tsv.jar
 669 |         #        /usr/bin/python3 /EpiNano-Epinano1.2.1/misc/Slide_Variants.py minimap.sort.2.plus_strand.per.site.csv 5
 670 | 	       # /usr/bin/python3 /EpiNano-Epinano1.2.1/Epinano_Predict.py --model /EpiNano-Epinano1.2.1/models/rrach.q3.mis3.del3.linear.dump --predict minimap.sort.2.plus_strand.per.site.5mer.csv --columns 8,13,23 --out_prefix plus_mod_prediction
 671 | 
 672 | 	       # mv minimap.sort.2.plus_strand.per.site.csv minimap.sort.2.plus_strand.per.site.5mer.csv plus_mod_prediction.q3.mis3.del3.MODEL.rrach.q3.mis3.del3.linear.dump.csv  ${params.resultsDir}/${condition2}/epinanoSVM/
 673 |         #    fi
 674 |         # else
 675 |         #   echo "No reads mapped for minimap.2.sort.bam"
 676 |         # fi
 677 | 
 678 |     """
 679 | 	else
 680 | 	"""
 681 |         echo "Skipped"
 682 |     """
 683 | }
 684 | 
 685 | // RNA modifications detection with epinano in Error mode
 686 | process epinanoError {
 687 |     input:
 688 | 		tuple val('condition1'), file('minimap.filt.sort.1.bam'), file('minimap.filt.sort.1.bam.bai'), file('minimap.sort.1.bam'), file('minimap.sort.1.bam.bai') from minimap2Merge_1_epinanoError
 689 | 		tuple val('condition2'), file('minimap.filt.sort.2.bam'), file('minimap.filt.sort.2.bam.bai'), file('minimap.sort.2.bam'), file('minimap.sort.2.bam.bai') from minimap2Merge_2_epinanoError
 690 | 
 691 | 		each file('genome.fa') from genome_fasta_epinanoError
 692 | 		each file('genome.fa.fai') from genome_fai_epinanoError
 693 | 
 694 | 		each file('transcriptome.fa') from transcriptome_fasta_epinanoError
 695 | 		each file('transcriptome.fa.fai') from transcriptome_fai_epinanoError
 696 | 
 697 | 		tuple file('genome.fa.dict'), file('transcriptome.fa.dict') from picard_epinanoError
 698 |     output:
 699 |     	val('flagepinanoError') into epinanoError_postprocessing
 700 |     script:
 701 |     if(params.epinanoError)
 702 |     """
 703 |     	mkdir -p ${params.resultsDir}/epinanoError/
 704 | 		mkdir -p ${params.resultsDir}/epinanoError/minus/
 705 | 		mkdir -p ${params.resultsDir}/epinanoError/plus/
 706 | 
 707 | 		samtools view -F16 minimap.sort.1.bam > minimap.sort.1.plus.sam
 708 |                 samtools view -f16 minimap.sort.1.bam > minimap.sort.1.minus.sam
 709 |                 samtools view -F16 minimap.sort.2.bam > minimap.sort.2.plus.sam
 710 |                 samtools view -f16 minimap.sort.2.bam > minimap.sort.2.minus.sam
 711 | 
 712 |         if [[ -s minimap.sort.1.plus.sam ]]; then
 713 |            if [[ -s minimap.sort.1.minus.sam ]]; then
 714 |                /usr/bin/python3 /EpiNano-Epinano1.2.1/Epinano_Variants.py -R genome.fa -b minimap.sort.1.bam -s /EpiNano-Epinano1.2.1/misc/sam2tsv.jar --type g -n ${task.cpus}
 715 |                /usr/bin/python3 /EpiNano-Epinano1.2.1/misc/Epinano_sumErr.py --kmer 0 --file minimap.sort.1.plus*site.csv --out minimap.sort.1.plus.sumErrOut.csv
 716 |                /usr/bin/python3 /EpiNano-Epinano1.2.1/misc/Epinano_sumErr.py --kmer 0 --file minimap.sort.1.minus*site.csv --out minimap.sort.1.minus.sumErrOut.csv
 717 |            else
 718 |                /usr/bin/python3 /EpiNano-Epinano1.2.1/Epinano_Variants.py -R genome.fa -b minimap.sort.1.bam -s /EpiNano-Epinano1.2.1/misc/sam2tsv.jar -n ${task.cpus}
 719 |                /usr/bin/python3 /EpiNano-Epinano1.2.1/misc/Epinano_sumErr.py --kmer 0 --file minimap.sort.1.plus*site.csv --out minimap.sort.1.plus.sumErrOut.csv
 720 |            fi
 721 |         else
 722 |            "No reads mapped for minimap.sort.1.bam"
 723 |         fi
 724 | 
 725 |         if [[ -s minimap.sort.2.plus.sam ]]; then
 726 |            if [[ -s minimap.sort.2.minus.sam ]]; then
 727 |              /usr/bin/python3 /EpiNano-Epinano1.2.1/Epinano_Variants.py -R genome.fa -b minimap.sort.2.bam -s /EpiNano-Epinano1.2.1/misc/sam2tsv.jar --type g -n ${task.cpus}
 728 |              /usr/bin/python3 /EpiNano-Epinano1.2.1/misc/Epinano_sumErr.py --kmer 0 --file minimap.sort.2.plus*site.csv --out minimap.sort.2.plus.sumErrOut.csv
 729 |              /usr/bin/python3 /EpiNano-Epinano1.2.1/misc/Epinano_sumErr.py --kmer 0 --file minimap.sort.2.minus*site.csv --out minimap.sort.2.minus.sumErrOut.csv
 730 |            else
 731 |              /usr/bin/python3 /EpiNano-Epinano1.2.1/Epinano_Variants.py -R genome.fa -b minimap.sort.2.bam -s /EpiNano-Epinano1.2.1/misc/sam2tsv.jar -n ${task.cpus}
 732 |              /usr/bin/python3 /EpiNano-Epinano1.2.1/misc/Epinano_sumErr.py --kmer 0 --file minimap.sort.2.plus*site.csv --out minimap.sort.2.plus.sumErrOut.csv
 733 |            fi
 734 |         else
 735 |            "No reads mapped for minimap.sort.2.bam"
 736 |         fi
 737 | 
 738 | 
 739 |         if [[ -f minimap.sort.1.plus.sumErrOut.csv && -f minimap.sort.2.plus.sumErrOut.csv ]]; then
 740 |             /bin/miniconda3/bin/Rscript /EpiNano-Epinano1.2.1/Epinano_DiffErr.R -k minimap.sort.2.plus.sumErrOut.csv -w minimap.sort.1.plus.sumErrOut.csv -d ${params.epinanoErrorSumErr} -t 3 -p -o diffErr -f sum_err
 741 | 
 742 |             if [[ -f diffErr.delta-sum_err.prediction.csv ]]; then
 743 | 				mv *diffErr* ${params.resultsDir}/epinanoError/plus/
 744 | 			fi
 745 | 
 746 | 			mv *plus_strand.per.site.csv ${params.resultsDir}/epinanoError/plus/
 747 | 			mv *plus.sumErrOut.csv ${params.resultsDir}/epinanoError/plus/
 748 | 		fi
 749 | 
 750 | 		if [[ -f minimap.sort.1.minus.sumErrOut.csv && -f minimap.sort.2.minus.sumErrOut.csv ]]; then
 751 | 			/bin/miniconda3/bin/Rscript /EpiNano-Epinano1.2.1/Epinano_DiffErr.R -k minimap.sort.2.minus.sumErrOut.csv -w minimap.sort.1.minus.sumErrOut.csv -d ${params.epinanoErrorSumErr} -t 3 -p -o diffErr -f sum_err
 752 | 
 753 | 			if [[ -f diffErr.delta-sum_err.prediction.csv ]]; then
 754 | 				mv *diffErr* ${params.resultsDir}/epinanoError/minus/
 755 | 			fi
 756 | 
 757 | 			mv *minus_strand.per.site.csv ${params.resultsDir}/epinanoError/minus/
 758 | 			mv *minus.sumErrOut.csv ${params.resultsDir}/epinanoError/minus/
 759 | 		fi
 760 | 
 761 | 
 762 |     """
 763 | 	else
 764 | 	"""
 765 |         echo "Skipped"
 766 |     """
 767 | }
 768 | 
 769 | // From a single channel for all the alignments to one channel for each condition.
 770 | ni_test_nanodoc=Channel.create()
 771 | ni_other_nanodoc=Channel.create()
 772 | singleReadFAST5_nanodoc.groupTuple(by:0)
 773 | 	.choice( ni_test_nanodoc, ni_other_nanodoc ) { a -> a[0] == params.test_condition ? 0 : 1 } 
 774 | 
 775 | // RNA modifications detection with nanodoc for each condition
 776 | process nanodoc {
 777 |     input:
 778 | 	    tuple val('condition1'), val('samples') from ni_test_nanodoc
 779 | 	    tuple val('condition2'), val('samples') from ni_other_nanodoc
 780 | 
 781 | 		each file('genome.fa') from genome_fasta_nanodoc
 782 | 		each file('genome.fa.fai') from genome_fai_nanodoc
 783 | 		each file('genome.bed') from bed_nanodoc
 784 |     output:
 785 |     	val('flagnanodoc') into nanodoc_postprocessing
 786 |     script:
 787 |     if(params.nanodoc)
 788 |     """
 789 |     	mkdir -p ${params.resultsDir}/nanodoc/
 790 |     	mkdir -p ${params.resultsDir}/nanodoc/${condition1}_output/
 791 |     	mkdir -p ${params.resultsDir}/nanodoc/${condition2}_output/
 792 | 
 793 | 		/bin/miniconda3/bin/python /nanoDoc/src/nanoDoc.py formatfile -i ${params.resultsDir}/${condition1}/ -o . -r genome.fa -t ${task.cpus}
 794 |                 mv index.txt ${params.resultsDir}/nanodoc/${condition1}_output/
 795 |                 pq_c1=\$(find . -maxdepth 1| grep "\\.pq")
 796 | 		mv \$pq_c1 ${params.resultsDir}/nanodoc/${condition1}_output/
 797 |                 /bin/miniconda3/bin/python /nanoDoc/src/nanoDoc.py formatfile -i ${params.resultsDir}/${condition2}/ -o . -r genome.fa -t ${task.cpus}
 798 |                 mv index.txt ${params.resultsDir}/nanodoc/${condition2}_output/
 799 |                 pq_c2=\$(find . -maxdepth 1| grep "\\.pq")
 800 |                 mv \$pq_c2 ${params.resultsDir}/nanodoc/${condition2}_output/
 801 | 		cat genome.bed | while read line; do chr=\$(echo \$line | cut -d' ' -f1); start=\$(echo \$line | cut -d' ' -f2); end=\$(echo \$line | cut -d' ' -f3); /bin/miniconda3/bin/python /nanoDoc/src/nanoDoc.py analysis -w /nanoDoc/weight5mer/ -p /nanoDoc/param20.txt -r genome.fa -rraw ${params.resultsDir}/nanodoc/${condition2}_output/ -traw ${params.resultsDir}/nanodoc/${condition1}_output/ -chrom \$chr --start \$start --end \$end -o "nanoDoc_results_"\$chr"_"\$start"_"\$end".txt"; done
 802 | 
 803 | 		mv nanoDoc_results_*.txt ${params.resultsDir}/nanodoc/
 804 |     """
 805 | 	else
 806 | 	"""
 807 |         echo "Skipped"
 808 |     """
 809 | }
 810 | 
 811 | // From a single channel for all the alignments to one channel for each condition.
 812 | ni_test_drummer=Channel.create()
 813 | ni_other_drummer=Channel.create()
 814 | minimap2_drummer.groupTuple(by:0)
 815 | 	.choice( ni_test_drummer, ni_other_drummer ) { a -> a[0] == params.test_condition ? 0 : 1 } 
 816 | 
 817 | // RNA modifications detection with drummer
 818 | process drummer {
 819 |     input:
 820 | 	    tuple val('condition1'), file('minimap.filt.sortT.1.*.bam'), file('minimap.filt.sortT.1.*.bam.bai'), file('minimap.sortG.1.*.bam'), file('minimap.sortG.1.*.bam.bai') from ni_test_drummer
 821 | 	    tuple val('condition2'), file('minimap.filt.sortT.2.*.bam'), file('minimap.filt.sortT.2.*.bam.bai'), file('minimap.sortG.2.*.bam'), file('minimap.sortG.2.*.bam.bai') from ni_other_drummer
 822 | 
 823 |     output:
 824 |     	val('flagdrummer') into drummer_postprocessing
 825 |     script:
 826 |     if(params.drummer)
 827 |     """
 828 | 		mkdir -p ${params.resultsDir}/drummer/
 829 | 
 830 | 		mkdir -p ${params.resultsDir}/drummer/DRUMMER
 831 | 		rm -rf ${params.resultsDir}/drummer/DRUMMER
 832 | 		
 833 | 		cut ${params.genome_fai} -f 1 > ${params.resultsDir}/drummer/chromosomes.txt
 834 | 
 835 | 		mv 	minimap.sortG*.bam ${params.resultsDir}/drummer/
 836 | 		mv 	minimap.sortG*.bai ${params.resultsDir}/drummer/
 837 | 
 838 |  		cd ${params.resultsDir}/drummer/
 839 | 		cp -r /DRUMMER .
 840 |         cd ${params.resultsDir}/drummer/DRUMMER/
 841 | 
 842 | 		while read -r line; 
 843 | 		do
 844 | 		  python3 DRUMMER.py -r ${params.genome_fasta} \
 845 | 		  -t ${params.resultsDir}/drummer/minimap.sortG.2.*.bam \
 846 | 		  -c ${params.resultsDir}/drummer/minimap.sortG.1.*.bam \
 847 | 		  -o ${params.resultsDir}/drummer/DRUMMER/\$line/ \
 848 | 		  -a exome \
 849 | 		  -p ${params.drummerPval} \
 850 | 		  -n \$line \
 851 |                   -m true ;
 852 | 		done < ${params.resultsDir}/drummer/chromosomes.txt || true
 853 |     """
 854 | 	else
 855 | 	"""
 856 |         echo "Skipped"
 857 |     """
 858 | }
 859 | 
 860 | // Resquigling with nanopolish for each condition
 861 | process nanopolish1 {
 862 |     input:
 863 | 		tuple val(condition), val(sample) from minimap2_nanopolish1
 864 | 
 865 | 		each file('transcriptome.fa') from transcriptome_fasta_nanopolish1
 866 | 		each file('transcriptome.fa.fai') from transcriptome_fai_nanopolish1
 867 | 
 868 | 		each file('genome.fa') from genome_fasta_nanopolish1
 869 | 		each file('genome.fa.fai') from genome_fai_nanopolish1
 870 |     output:
 871 |     	tuple val(condition), val(sample) into nanopolish1_xpore
 872 |     	tuple val(condition), val(sample) into nanopolish1_nanocompore1
 873 |     	tuple val(condition), val(sample) into nanopolish1_yanocomp1
 874 |     	tuple val(condition), val(sample) into nanopolish1_m6anet1
 875 | 
 876 |     script:
 877 |     if(params.nanopolish1)
 878 |     """
 879 | 		mkdir -p ${params.resultsDir}/${condition}/${sample}/nanopolish/
 880 | 		#mkdir -p ${params.resultsDir}/${condition}/${sample}/nanopolish/genome/
 881 | 		mkdir -p ${params.resultsDir}/${condition}/${sample}/nanopolish/transcriptome/
 882 | 
 883 | 		/usr/bin/nanopolish/nanopolish index -d ${params.resultsDir}/${condition}/${sample}/FAST5/ ${params.resultsDir}/${condition}/${sample}/FASTQ/singleReadsFASTQ.fastq
 884 | 
 885 | 		#/usr/bin/nanopolish/nanopolish eventalign --reads ${params.resultsDir}/${condition}/${sample}/FASTQ/singleReadsFASTQ.fastq --bam ${params.resultsDir}/${condition}/${sample}/genomeAlignment/minimap.sort.bam --genome genome.fa --samples --signal-index --scale-events -n --summary ${params.resultsDir}/${condition}/${sample}/nanopolish/genome/summary.txt --threads ${task.cpus} > ${params.resultsDir}/${condition}/${sample}/nanopolish/genome/eventalign_readName.txt
 886 | 
 887 | 		#/usr/bin/nanopolish/nanopolish eventalign --reads ${params.resultsDir}/${condition}/${sample}/FASTQ/singleReadsFASTQ.fastq --bam ${params.resultsDir}/${condition}/${sample}/genomeAlignment/minimap.sort.bam --genome genome.fa --signal-index --scale-events --summary ${params.resultsDir}/${condition}/${sample}/nanopolish/genome/summary.txt --threads ${task.cpus} > ${params.resultsDir}/${condition}/${sample}/nanopolish/genome/eventalign_readIndex.txt
 888 | 
 889 | 		/usr/bin/nanopolish/nanopolish eventalign --reads ${params.resultsDir}/${condition}/${sample}/FASTQ/singleReadsFASTQ.fastq --bam ${params.resultsDir}/${condition}/${sample}/transcriptomeAlignment/minimap.filt.sort.bam --genome transcriptome.fa --samples --signal-index --scale-events -n --summary ${params.resultsDir}/${condition}/${sample}/nanopolish/transcriptome/summary.txt --threads ${task.cpus} > ${params.resultsDir}/${condition}/${sample}/nanopolish/transcriptome/eventalign_readName.txt
 890 | 
 891 | 		/usr/bin/nanopolish/nanopolish eventalign --reads ${params.resultsDir}/${condition}/${sample}/FASTQ/singleReadsFASTQ.fastq --bam ${params.resultsDir}/${condition}/${sample}/transcriptomeAlignment/minimap.filt.sort.bam --genome transcriptome.fa --signal-index --scale-events --summary ${params.resultsDir}/${condition}/${sample}/nanopolish/transcriptome/summary.txt --threads ${task.cpus} > ${params.resultsDir}/${condition}/${sample}/nanopolish/transcriptome/eventalign_readIndex.txt
 892 | 
 893 |     """
 894 | 	else
 895 | 	"""
 896 |         echo "Skipped"
 897 |     """
 898 | }
 899 | 
 900 | // Data formatting for xpore for each sample
 901 | process xpore1 {
 902 |     input:
 903 |     	tuple val(condition), val(sample) from nanopolish1_xpore
 904 |     	each file('genome.gtf') from gtf_xpore
 905 | 		each file('transcriptome.fa') from transcriptome_fasta_xpore
 906 | 		each file('transcriptome.fa.fai') from transcriptome_fai_xpore
 907 | 
 908 |     output:
 909 |     	tuple val(condition), val(sample) into xpore1_xpore2
 910 |     script:
 911 |     if(params.xpore)
 912 |     """
 913 | 
 914 |         mkdir -p ${params.resultsDir}/${condition}/${sample}/xpore/
 915 |         xpore dataprep --eventalign ${params.resultsDir}/${condition}/${sample}/nanopolish/transcriptome/eventalign_readIndex.txt --out_dir ${params.resultsDir}/${condition}/${sample}/xpore --gtf_or_gff genome.gtf --transcript_fasta transcriptome.fa --genome
 916 |     """
 917 | 	else
 918 | 	"""
 919 |         echo "Skipped"
 920 |     """
 921 | }
 922 | 
 923 | // From a single channel for all the alignments to one channel for each condition.
 924 | ni_test_xpore2=Channel.create()
 925 | ni_other_xpore2=Channel.create()
 926 | xpore1_xpore2.groupTuple(by:0)
 927 | 	.choice( ni_test_xpore2, ni_other_xpore2 ) { a -> a[0] == params.test_condition ? 0 : 1 } 
 928 | 
 929 | // RNA modifications detection with xpore
 930 | process xpore2 {
 931 |     input:
 932 | 	    tuple val('condition1'), val('sample1') from ni_test_xpore2
 933 | 	    tuple val('condition2'), val('sample2') from ni_other_xpore2
 934 |     output:
 935 | 	    val('flagxpore') into xpore_postprocessing
 936 |     script:
 937 |     if(params.xpore)
 938 |     """
 939 |     	mkdir -p ${params.resultsDir}/xpore/
 940 | 
 941 |     	echo "data:" > ${params.resultsDir}/xpore/xpore.yaml
 942 |     	echo "    "${condition1}":" >> ${params.resultsDir}/xpore/xpore.yaml
 943 |         for file in \$(find ${params.resultsDir}/${condition1} -maxdepth 2 | grep "xpore"); do sn=\$(basename \$(dirname \$file)); sd=\$(dirname \$file); echo "      rep"\$sn": "\$sd"/xpore"; done >> ${params.resultsDir}/xpore/xpore.yaml
 944 |         echo "    "${condition2}":" >> ${params.resultsDir}/xpore/xpore.yaml
 945 |         for file in \$(find ${params.resultsDir}/${condition2} -maxdepth 2 | grep "xpore"); do sn=\$(basename \$(dirname \$file));  sd=\$(dirname \$file); echo "      rep"\$sn": "\$sd"/xpore"; done >> ${params.resultsDir}/xpore/xpore.yaml
 946 |         echo "" >> ${params.resultsDir}/xpore/xpore.yaml
 947 | 	echo "out: "${params.resultsDir}"/xpore" >> ${params.resultsDir}/xpore/xpore.yaml
 948 | 
 949 | 	xpore diffmod --config ${params.resultsDir}/xpore/xpore.yaml --n_processes ${task.cpus}
 950 | 
 951 | 	xpore postprocessing --diffmod_dir ${params.resultsDir}/xpore/
 952 |     """
 953 | 	else
 954 | 	"""
 955 |         echo "Skipped"
 956 |     """
 957 | }
 958 | 
 959 | // Data formatting for nanocompore for each sample
 960 | process nanocompore1 {
 961 |     input:
 962 | 	    tuple val('condition'), val('sample') from nanopolish1_nanocompore1
 963 | 
 964 |     output:
 965 |     	tuple val(condition), val(sample), file("out_eventalign_collapse.tsv"), file("out_eventalign_collapse.tsv.idx") into nanocompore1_nanocompore2
 966 | 
 967 |     script:
 968 |     if(params.nanocompore1)
 969 |     """
 970 |         mkdir mkdir -p ${params.resultsDir}/${condition}/${sample}/nanocompore/
 971 | 		nanocompore eventalign_collapse --eventalign ${params.resultsDir}/${condition}/${sample}/nanopolish/transcriptome/eventalign_readName.txt --nthreads ${task.cpus} --outpath ${params.resultsDir}/${condition}/${sample}/nanocompore/ --overwrite
 972 | 
 973 | 		ln -sf ${params.resultsDir}/${condition}/${sample}/nanocompore/out_eventalign_collapse.tsv out_eventalign_collapse.tsv
 974 | 		ln -sf ${params.resultsDir}/${condition}/${sample}/nanocompore/out_eventalign_collapse.tsv.idx out_eventalign_collapse.tsv.idx
 975 |     """
 976 | 	else
 977 | 	"""
 978 | 		ln -sf ${params.resultsDir}/${condition}/${sample}/nanocompore/out_eventalign_collapse.tsv out_eventalign_collapse.tsv
 979 | 		ln -sf ${params.resultsDir}/${condition}/${sample}/nanocompore/out_eventalign_collapse.tsv.idx out_eventalign_collapse.tsv.idx
 980 |     """
 981 | }
 982 | 
 983 | 
 984 | // From a single channel for all the alignments to one channel for each condition
 985 | ni_test_nanocompore2=Channel.create()
 986 | ni_other_nanocompore2=Channel.create()
 987 | nanocompore1_nanocompore2.groupTuple(by:0)
 988 | 	.choice( ni_test_nanocompore2, ni_other_nanocompore2 ) { a -> a[0] == params.test_condition ? 0 : 1 } 
 989 | 
 990 | // RNA modifications detection with nanocompore
 991 | process nanocompore2 {
 992 |     input:
 993 | 	    tuple val('condition1'), val('sample1'), file('out_eventalign_collapse.1.*.tsv'), file('out_eventalign_collapse.1.*.tsv.idx') from ni_test_nanocompore2
 994 | 	    tuple val('condition2'), val('sample2'), file('out_eventalign_collapse.2.*.tsv'), file('out_eventalign_collapse.2.*.tsv.idx') from ni_other_nanocompore2
 995 | 
 996 | 		each file('transcriptome.fa') from transcriptome_fasta_nanocompore2
 997 | 		each file('transcriptome.fa.fai') from transcriptome_fai_nanocompore2
 998 | 		each file('transcriptome.bed') from bed_nanocompore
 999 | 
1000 |     output:
1001 |     	val('flagnanocompore') into nanocompore_postprocessing
1002 |     script:
1003 |     if(params.nanocompore2)
1004 |     """
1005 | 		IFS=','
1006 | 		f1=(out_eventalign_collapse.1.*.tsv)
1007 | 		f2=(out_eventalign_collapse.2.*.tsv)
1008 | 
1009 |     	mkdir -p ${params.resultsDir}/nanocompore/
1010 | 
1011 |     	nanocompore sampcomp --file_list1 "\${f1[*]}" --file_list2 "\${f2[*]}" \
1012 | 		--label1 ${condition1} \
1013 | 		--label2 ${condition2} \
1014 | 	    --fasta transcriptome.fa \
1015 | 	    --bed transcriptome.bed \
1016 |     	--outpath ${params.resultsDir}/nanocompore/ \
1017 |     	--allow_warnings \
1018 |     	--logit \
1019 |     	--nthreads ${task.cpus} \
1020 |     	--overwrite
1021 |     """
1022 | 	else
1023 | 	"""
1024 |         echo "Skipped"
1025 |     """
1026 | }
1027 | 
1028 | // Data formatting for m6anet for each sample
1029 | process m6anet1 {
1030 |     input:
1031 | 	    tuple val('condition'), val('sample') from nanopolish1_m6anet1
1032 | 
1033 |     output:
1034 |     	tuple val(condition), val(sample), val() into m6anet1_m6anet2
1035 | 
1036 | 
1037 |     script:
1038 |     if(params.m6anet1)
1039 |     """
1040 |         mkdir -p ${params.resultsDir}/${condition}/${sample}/m6anet/
1041 | 
1042 |         m6anet-dataprep --eventalign  ${params.resultsDir}/${condition}/${sample}/nanopolish/transcriptome/eventalign_readIndex.txt \
1043 |                 --out_dir ${params.resultsDir}/${condition}/${sample}/m6anet --n_processes ${task.cpus}
1044 |     """
1045 | 	else
1046 | 	"""
1047 | 		ln -sf ${params.resultsDir}/${condition}/${sample}/m6anet m6anet
1048 |     """
1049 | }
1050 | 
1051 | 
1052 | // From a single channel for all the alignments to one channel for each condition
1053 | ni_test_m6anet2=Channel.create()
1054 | ni_other_m6anet2=Channel.create()
1055 | m6anet1_m6anet2.groupTuple(by:0)
1056 | 	.choice( ni_test_m6anet2, ni_other_m6anet2 ) { a -> a[0] == params.test_condition ? 0 : 1 } 
1057 | 
1058 | // RNA modifications detection with m6anet
1059 | process m6anet2 {
1060 |     input:
1061 | 	    tuple val('condition1'), val('sample1') from ni_test_m6anet2
1062 | 
1063 |     output:
1064 |     	val('flagm6anet') into m6anet_postprocessing
1065 |     script:
1066 |     if(params.m6anet2)
1067 |     """
1068 |         mkdir -p ${params.resultsDir}/m6anet
1069 | 	preprocessing_dirs=\$(find ${params.resultsDir}/${condition1} -maxdepth 2 -type d | grep "m6anet\$")
1070 |         m6anet-run_inference --input_dir \$preprocessing_dirs --out_dir ${params.resultsDir}/m6anet --infer_mod_rate --n_processes ${task.cpus}
1071 | 	
1072 | 	    zcat ${params.resultsDir}/m6anet/data.result.csv.gz > ${params.resultsDir}/m6anet/data.result.csv
1073 |     """
1074 | 	else
1075 | 	"""
1076 |         echo "Skipped"
1077 |     """
1078 | }
1079 | 
1080 | // Data formatting for yanocomp for each sample
1081 | process yanocomp1 {
1082 |     input:
1083 | 	    tuple val('condition'), val('sample') from nanopolish1_yanocomp1
1084 | 	    each file('genome.gtf') from gtf_yanocomp
1085 |     output:
1086 |     	tuple val(condition), file('outputT.hdf5'), file('outputG.hdf5') into yanocomp1_yanocomp2
1087 | 
1088 |     script:
1089 |     if(params.yanocomp1)
1090 |     """
1091 |     	mkdir -p ${params.resultsDir}/${condition}/${sample}/yanocomp/
1092 | 		mkdir -p ${params.resultsDir}/${condition}/${sample}/yanocomp/transcriptome/
1093 | 		mkdir -p ${params.resultsDir}/${condition}/${sample}/yanocomp/genome/
1094 | 
1095 | 		/bin/miniconda3/envs/yanocomp/bin/yanocomp prep -p ${task.cpus} -e ${params.resultsDir}/${condition}/${sample}/nanopolish/transcriptome/eventalign_readName.txt -h ${params.resultsDir}/${condition}/${sample}/yanocomp/transcriptome/output.hdf5
1096 |  
1097 | 		/bin/miniconda3/envs/yanocomp/bin/yanocomp prep -p ${task.cpus} -e ${params.resultsDir}/${condition}/${sample}/nanopolish/transcriptome/eventalign_readName.txt -h ${params.resultsDir}/${condition}/${sample}/yanocomp/genome/output.hdf5 -g genome.gtf
1098 | 
1099 | 		ln -s ${params.resultsDir}/${condition}/${sample}/yanocomp/transcriptome/output.hdf5 outputT.hdf5
1100 | 		ln -s ${params.resultsDir}/${condition}/${sample}/yanocomp/genome/output.hdf5 outputG.hdf5
1101 |     """
1102 | 	else
1103 | 	"""
1104 | 		ln -s ${params.resultsDir}/${condition}/${sample}/yanocomp/transcriptome/output.hdf5 outputT.hdf5
1105 | 		ln -s ${params.resultsDir}/${condition}/${sample}/yanocomp/genome/output.hdf5 outputG.hdf5
1106 |     """
1107 | }
1108 | 
1109 | // From a single channel for all the alignments to one channel for each condition
1110 | ni_test_yanocomp2=Channel.create()
1111 | ni_other_yanocomp2=Channel.create()
1112 | yanocomp1_yanocomp2.groupTuple(by:0)
1113 | 	.choice( ni_test_yanocomp2, ni_other_yanocomp2 ) { a -> a[0] == params.test_condition ? 0 : 1 } 
1114 | 
1115 | // RNA modifications detection with yanocomp
1116 | process yanocomp2 {
1117 |     input:
1118 | 	    tuple val('condition1'), file('outputT.1.*.hdf5'), file('outputG.1.*.hdf5') from ni_test_yanocomp2
1119 | 	    tuple val('condition2'), file('outputT.2.*.hdf5'), file('outputG.2.*.hdf5') from ni_other_yanocomp2
1120 | 
1121 |     output:
1122 |     	val('flagyanocomp2') into yanocomp2_postprocessing
1123 |     script:
1124 |     if(params.yanocomp2)
1125 |     """
1126 | 		mkdir -p ${params.resultsDir}/yanocomp/
1127 | 		
1128 | 		/bin/miniconda3/envs/yanocomp/bin/yanocomp gmmtest \
1129 | 		\$(for file in outputG.1.*.hdf5; do echo -c \$file; done) \
1130 | 		\$(for file in outputG.2.*.hdf5; do echo -t \$file; done) \
1131 | 		-p ${task.cpus} \
1132 | 		-o ${params.resultsDir}/yanocomp/yanocomp_output.bed \
1133 | 		-s ${params.resultsDir}/yanocomp/yanocomp_output.json.gzip \
1134 | 		-f ${params.yanocompFDR}
1135 |     """
1136 | 	else
1137 | 	"""
1138 |         echo "Skipped"
1139 |     """
1140 | }
1141 | 
1142 | // Processing of each output to obtain bed files
1143 | process postprocessing {
1144 |     input:
1145 | 		val('flagyanocomp2') from yanocomp2_postprocessing
1146 | 		val('flagdena') from dena_postprocessing
1147 | 		val('flagdrummer') from drummer_postprocessing
1148 | 		val('flagdifferr') from differr_postprocessing
1149 | 		val('flagnanom6a') from nanom6a_postprocessing
1150 | 		val('flagnanocompore') from nanocompore_postprocessing
1151 | 		val('flageligos') from eligos_postprocessing
1152 | 		val('flagmines') from mines_postprocessing
1153 | 		val('flagepinanoSVM') from epinanoSVM_postprocessing
1154 | 		val('flagepinanoError') from epinanoError_postprocessing
1155 | 		val('flagxpore') from xpore_postprocessing
1156 | 		val('flagnanodoc') from nanodoc_postprocessing
1157 | 		val('flagtombo2') from tombo2_postprocessing
1158 | 		val('flagtombo3') from tombo3_postprocessing
1159 | 		val('flagm6anet') from m6anet_postprocessing
1160 | 
1161 |     output:
1162 | 
1163 |     script:
1164 |     if(params.postprocessing)
1165 |     """
1166 | 	mkdir -p ${params.resultsDir}/output_bed_files/
1167 | 	mkdir -p ${params.resultsDir}/output_statistical/
1168 | 
1169 | 	Rscript ${params.postprocessingScript} path=${params.resultsDir} genomebed=${params.genesbed} genomegtf=${params.gtf} resultsFolder=${params.resultsDir}/output_bed_files/ mccores=${task.cpus} threshold=${params.threshold} pathdena=${params.test_condition}/dena pathdrummer=drummer pathdifferr=differr pathyanocomp=yanocomp pathmines=${params.test_condition}/mines pathnanocompore=nanocompore patheligos=eligos/merged pathepinanoError=epinanoError pathepinanoSVM=${params.test_condition}/epinanoSVM pathxpore=xpore pathnanodoc=nanodoc pathnanom6a=${params.test_condition}/nanom6a/result_final pathtomboComparison=tomboComparison pathm6anet=m6anet
1170 |         Rscript ${params.statisticalAnalysis} bed_folder=${params.resultsDir}/output_bed_files genomegtf=${params.gtf} genesbed=${params.genesbed} resultsFolder=${params.resultsDir}/output_statistical/ mccores=${task.cpus} peaks=${params.peaksfile} binLength=${params.binLength} genomefile=${params.genome_fasta}
1171 | 
1172 |     """
1173 | 	else
1174 | 	"""
1175 |         echo "Skipped"
1176 |     """
1177 | }
1178 | 


--------------------------------------------------------------------------------