├── docs
    ├── Makefile
    ├── issues.adoc
    ├── description.adoc
    ├── index.adoc
    ├── install.adoc
    ├── results.adoc
    ├── options.adoc
    ├── extra.adoc
    ├── run.adoc
    └── examples.adoc
├── bin
    ├── len.py
    ├── get_sizes.sh
    ├── get_readqual.sh
    ├── get_readstats.sh
    ├── custom_uniprot_hits.R
    ├── addAnnotation.py
    ├── get_busco_val.sh
    ├── heatmap_busco.R
    ├── GO_plots.R
    ├── SOS_busco.py
    ├── busco_comparison.R
    └── TransPi_Report_Ind.Rmd
├── .gitignore
├── remove_failed.sh
├── Dockerfile
├── conf
    ├── test.config
    └── busV4list.txt
├── LICENSE
├── README.md
├── template.nextflow.config
└── precheck_TransPi.sh


/docs/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	/opt/homebrew/bin/asciidoctor -D . index.adoc
3 | 


--------------------------------------------------------------------------------
/bin/len.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | from Bio import SeqIO
 4 | import sys
 5 | 
 6 | arg1=sys.argv[1]
 7 | 
 8 | filename=arg1
 9 | for record in SeqIO.parse(filename, "fasta"):
10 |     print(record.id,"\t", len(record.seq)+1)
11 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | busco_db/
 2 | diamonddb/
 3 | hmmerdb/
 4 | nextflow
 5 | pipeline_info/
 6 | reads/
 7 | uniprot_db/
 8 | work/
 9 | cbs-dtu-tools/
10 | diamonddb_custom/
11 | diamonddb_swiss/
12 | nextflow.config
13 | results/
14 | sqlite_db/
15 | .varfile.sh
16 | .nextflow*
17 | .DS_Store
18 | cbs-dtu-tools.tar.gz
19 | Singularity
20 | Dockerfile
21 | evigene/
22 | 


--------------------------------------------------------------------------------
/bin/get_sizes.sh:
--------------------------------------------------------------------------------
 1 | filename="$1"
 2 | cat $filename | cut -f 1 -d "," | awk '{print $3}' >nam
 3 | cat $filename | cut -f 2 -d "," | awk '{print $2}' >len
 4 | paste nam len | awk '$2<2500 {print $0}' >lt_2500
 5 | he=$( paste nam len | awk '$2>=2500 {print $0}' | wc -l )
 6 | for x in `seq 1 $he`;do
 7 |     echo data >>temp_1
 8 |     echo $x >>temp_2
 9 | done
10 | paste temp_1 temp_2 >he_2500
11 | cat lt_2500 he_2500 >final_sizes.txt
12 | rm nam len temp_1 temp_2 lt_2500 he_2500 $filename
13 | 


--------------------------------------------------------------------------------
/remove_failed.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #Script to remove directories of FAILED and ABORTED processes in a nextflow pipeline
 3 | #INPUT = filename_trace.txt
 4 | file=$1
 5 | if [ "$file" == "" ];then
 6 |     echo -e "\n\t Provide a trace file as input (e.g. filename_trace.txt)"
 7 |     echo -e "\n\t Usage: bash remove_failed.sh filename_trace.txt\n"
 8 |     exit 0
 9 | else
10 |     cat $file | grep "ABORTED" >.erase.txt
11 |     cat $file | grep "FAILED" >>.erase.txt
12 |     while read line;do
13 |         a=$( echo $line | awk '{print $2}' )
14 |         echo $a
15 |         if [ -d work*/${a}* ];then
16 |             rm -rf work*/${a}*
17 |         fi
18 |     done <.erase.txt
19 |     rm .erase.txt
20 | fi
21 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM continuumio/miniconda3
 2 | 
 3 | LABEL authors="Ramon Rivera-Vicens" \
 4 |       description="Docker image containing all requirements for TransPi pipeline" \
 5 |       version="1.0dev"
 6 | 
 7 | RUN apt update; apt install -y gcc bc procps
 8 | 
 9 | COPY transpi_env.yml /
10 | RUN conda env create -f /transpi_env.yml  && conda clean -a
11 | 
12 | ENV PATH /opt/conda/envs/TransPi/bin:$PATH
13 | 
14 | RUN sed -i 's/base/TransPi/g' ~/.bashrc
15 | 
16 | RUN wget http://arthropods.eugenes.org/EvidentialGene/other/evigene_older/evigene19may14.tar
17 | RUN tar -xf evigene19may14.tar && rm evigene19may14.tar
18 | ENV PATH /evigene/scripts/prot/:$PATH
19 | 
20 | RUN mkdir -p /opt/conda/envs/TransPi/lib/python3.6/site-packages/bin && cp /opt/conda/envs/TransPi/bin/skip*.awk /opt/conda/envs/TransPi/lib/python3.6/site-packages/bin/
21 | 


--------------------------------------------------------------------------------
/conf/test.config:
--------------------------------------------------------------------------------
 1 | /*
 2 | ========================================================================================
 3 |                                 Test Config File TransPi
 4 | ========================================================================================
 5 |                             Transcriptome Analysis Pipeline
 6 |                             Author: Ramón E. Rivera-Vicéns
 7 | ----------------------------------------------------------------------------------------
 8 | */
 9 | 
10 | params {
11 |     readsTest = [
12 |         ['Sponge_sample', ['https://github.com/rivera10/test_dataset/raw/master/RNA_data/Tethya_wilhelma_R1.fastq.gz'], ['https://github.com/rivera10/test_dataset/raw/master/RNA_data/Tethya_wilhelma_R2.fastq.gz']]
13 |     ]
14 |     k="25,53"
15 |     maxReadLen=100
16 |     shortTransdecoder = true
17 | }
18 | 


--------------------------------------------------------------------------------
/docs/issues.adoc:
--------------------------------------------------------------------------------
 1 | We tested TransPi using the followings deployment methods:
 2 | 
 3 | - conda = individual conda environments per process
 4 | 
 5 | - docker = using TransPi container (i.e. -profile docker,TransPiContainer)
 6 | 
 7 | - singularity = using TransPi container (i.e. -profile singularity,TransPiContainer)
 8 | 
 9 | 
10 | [NOTE]
11 | Using individual container per process is working for the majority of processes. However, we found a couple os issues with some containers (e.g. transabyss). We are working to find a solution for these issues.
12 | 
13 | 
14 | = Reporting an issue
15 | 
16 | If you find a problem or get an error please let us know by opening an issue in the repository.
17 | 
18 | 
19 | = Test dataset
20 | 
21 | We include a `test` profile to try TransPi using a small dataset. However, this can create issues in some of the process (e.g. contamination removal by psytrans).
22 | 


--------------------------------------------------------------------------------
/bin/get_readqual.sh:
--------------------------------------------------------------------------------
 1 | jfile="$1"
 2 | sampleid=$( echo $jfile | cut -f 1 -d "." )
 3 | r1bn=$( jq '.read1_before_filtering.quality_curves.mean' ${jfile} | grep -c [0-9] )
 4 | r1bq=$( jq '.read1_before_filtering.quality_curves.mean' ${jfile} | grep [0-9] | tr -d "\n" | tr -d " " )
 5 | r2bn=$( jq '.read2_before_filtering.quality_curves.mean' ${jfile} | grep -c [0-9] )
 6 | r2bq=$( jq '.read2_before_filtering.quality_curves.mean' ${jfile} | grep [0-9] | tr -d "\n" | tr -d " " )
 7 | r1an=$( jq '.read1_after_filtering.quality_curves.mean' ${jfile} | grep -c [0-9] )
 8 | r1aq=$( jq '.read1_after_filtering.quality_curves.mean' ${jfile} | grep [0-9] | tr -d "\n" | tr -d " " )
 9 | r2an=$( jq '.read2_after_filtering.quality_curves.mean' ${jfile} | grep -c [0-9] )
10 | r2aq=$( jq '.read2_after_filtering.quality_curves.mean' ${jfile} | grep [0-9] | tr -d "\n" | tr -d " " )
11 | echo -e "${r1bn}\n${r1bq}\n${r2bn}\n${r2bq}\n${r1an}\n${r1aq}\n${r2an}\n${r2aq}" >${sampleid}_reads_qual.csv
12 | 


--------------------------------------------------------------------------------
/docs/description.adoc:
--------------------------------------------------------------------------------
 1 | *TransPi – a comprehensive TRanscriptome ANalysiS PIpeline for de novo transcriptome assembly*
 2 | 
 3 | TransPi provides a useful resource for the generation of de novo transcriptome assemblies,
 4 | with minimum user input but without losing the ability of a thorough analysis.
 5 | 
 6 | * For more info see the https://doi.org/10.1101/2021.02.18.431773[Preprint]
 7 | 
 8 | * Code available at https://www.github.com/palmuc/TransPi[GitHub]
 9 | 
10 | * Author: Ramón Rivera-Vicéns
11 | ** https://twitter.com/rerv787[Twitter]
12 | 
13 | 
14 | = Programs used
15 | 
16 | * List of programs use by TransPi:
17 | ** FastQC
18 | ** fastp
19 | ** sortmerna
20 | ** rnaSPADES
21 | ** SOAP
22 | ** Trinity
23 | ** Velvet
24 | ** Oases
25 | ** TransABySS
26 | ** rnaQUAST
27 | ** EvidentialGene
28 | ** CD-Hit
29 | ** Exonerate
30 | ** Blast
31 | ** BUSCO
32 | ** Psytrans
33 | ** Trandecoder
34 | ** Trinotate
35 | ** Diamond
36 | ** Hmmer
37 | ** Bowtie2
38 | ** rnammer
39 | ** tmhmm
40 | ** signalP
41 | ** iPath
42 | ** SQLite
43 | ** R
44 | ** Python
45 | 
46 | * Databases used by TransPi:
47 | ** Swissprot
48 | ** Uniprot custom database (e.g. all metazoan proteins)
49 | ** Pfam
50 | 


--------------------------------------------------------------------------------
/bin/get_readstats.sh:
--------------------------------------------------------------------------------
 1 | jfile="$1"
 2 | sampleid=$( echo $jfile | cut -f 1 -d "." )
 3 | tb=$( jq '.summary.before_filtering.total_reads' $jfile )
 4 | r1b=$( jq '.read1_before_filtering.total_reads' $jfile )
 5 | r1bl=$( jq '.summary.before_filtering.read1_mean_length' $jfile )
 6 | r2b=$( jq '.read2_before_filtering.total_reads' $jfile )
 7 | r2bl=$( jq '.summary.before_filtering.read2_mean_length' $jfile )
 8 | ta=$( jq '.summary.after_filtering.total_reads' $jfile )
 9 | r1a=$( jq '.read1_after_filtering.total_reads' $jfile )
10 | r1al=$( jq '.summary.after_filtering.read1_mean_length' $jfile )
11 | r2a=$( jq '.read2_after_filtering.total_reads' $jfile )
12 | r2al=$( jq '.summary.after_filtering.read2_mean_length' $jfile )
13 | loss=$( echo "${tb}-${ta}" | bc )
14 | gcb=$( jq '.summary.before_filtering.gc_content' $jfile )
15 | gca=$( jq '.summary.after_filtering.gc_content' $jfile )
16 | echo "Sample_name,Total_before,R1_before,R1_before_length,R2_before,R2_before_length,GC_before,Total_after,R1_after,R1_after_length,R2_after,R2_after_length,GC_after,Reads_discarded" >${sampleid}_reads_stats.csv
17 | echo "${sampleid},${tb},${r1b},${r1bl},${r2b},${r2bl},${gcb},${ta},${r1a},${r1al},${r2a},${r2al},${gca},${loss}" >>${sampleid}_reads_stats.csv
18 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Molecular Geobiology and Paleobiology Lab
 4 | Department of Earth and Environmental Sciences, Palaeontology & Geobiology, Ludwig-Maximilians-Universität München (LMU)
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/bin/custom_uniprot_hits.R:
--------------------------------------------------------------------------------
 1 | args = commandArgs(trailingOnly=TRUE)
 2 | sample_name=args[1]
 3 | 
 4 | library(ggthemes)
 5 | library(ggplot2)
 6 | 
 7 | data=read.csv(paste(sample_name,"_custom_uniprot_hits.txt",sep=""),header=F)
 8 | 
 9 | nlim=round((head(data$V1,n = 1)+450),digits = -2)
10 | p1<-ggplot(data=data, aes(x=reorder(data$V2,data$V1), y=data$V1))+
11 |   geom_bar(stat="identity", fill="dark blue", width=.5)+
12 |   coord_flip()+labs(x="UniProt Species",y="Number of Hits")+
13 |   geom_text(aes(label=data$V1), position=position_dodge(width=0.3), vjust=0.25, hjust=-.10)+
14 |   theme(axis.text=element_text(size=12))+ylim(0,nlim)+theme(axis.text.x=element_text(size=12,angle=0))+
15 |   theme(axis.title=element_text(size=15,face="bold"))+ggtitle(paste(sample_name,"UniProt hits",sep=" "))+
16 |   theme(plot.title = element_text(family="sans", colour = "black", size = rel(1.5)*1, face = "bold"))
17 | 
18 | 
19 | # not working in docker
20 | #ggsave(filename = paste(sample_name,"_custom_uniprot_hits.svg",sep=""),width = 15 ,height = 7)
21 | #ggsave(filename = paste(sample_name,"_custom_uniprot_hits.pdf",sep=""),width = 15 ,height = 7)
22 | pdf(paste(sample_name,"_custom_uniprot_hits.pdf",sep=""),width = 15 ,height = 7)
23 | print(p1)
24 | dev.off()
25 | svg(paste(sample_name,"_custom_uniprot_hits.svg",sep=""),width = 15 ,height = 7)
26 | print(p1)
27 | dev.off()
28 | 


--------------------------------------------------------------------------------
/docs/index.adoc:
--------------------------------------------------------------------------------
 1 | = TransPi Manual
 2 | Ramón Rivera-Vicéns
 3 | v1.1.0-rc, 2021-05-25
 4 | :docinfo:
 5 | :keywords: TransPi, transcriptome, assembly, annotation, Nextflow, pipeline
 6 | :description: TransPi – a comprehensive TRanscriptome ANalysiS PIpeline for de novo transcriptome assembly
 7 | :icons: font
 8 | :toclevels: 3
 9 | :imagesdir: img
10 | :toc: left
11 | :toc-title: TransPi Manual
12 | :source-highlighter: coderay
13 | :coderay-linenums-mode: table
14 | :sectnums:
15 | :sectlinks:
16 | 
17 | How to use TransPi
18 | 
19 | == Description
20 | :leveloffset: +2
21 | 
22 | include::description.adoc[]
23 | 
24 | :leveloffset: -2
25 | 
26 | == Installing TransPi
27 | :leveloffset: +2
28 | 
29 | include::install.adoc[]
30 | 
31 | :leveloffset: -2
32 | 
33 | == Running TransPi
34 | :leveloffset: +2
35 | 
36 | include::run.adoc[]
37 | 
38 | :leveloffset: -2
39 | 
40 | == Results
41 | :leveloffset: +2
42 | 
43 | include::results.adoc[]
44 | 
45 | :leveloffset: -2
46 | 
47 | == Additional options
48 | :leveloffset: +2
49 | 
50 | include::options.adoc[]
51 | 
52 | :leveloffset: -2
53 | 
54 | == Examples
55 | 
56 | :leveloffset: +2
57 | 
58 | include::examples.adoc[]
59 | 
60 | :leveloffset: -2
61 | 
62 | == Extra information
63 | :leveloffset: +2
64 | 
65 | include::extra.adoc[]
66 | 
67 | :leveloffset: -2
68 | 
69 | == Issues
70 | :leveloffset: +2
71 | 
72 | include::issues.adoc[]
73 | 
74 | :leveloffset: -2
75 | 


--------------------------------------------------------------------------------
/docs/install.adoc:
--------------------------------------------------------------------------------
 1 | = Requirements
 2 | 
 3 | - System: Linux OS
 4 | 
 5 | - Data type: Paired-end reads
 6 | 
 7 |     Example:
 8 |         IndA_R1.fastq.gz, IndA_R2.fastq.gz
 9 | 
10 | [NOTE]
11 | Make sure reads end with `_R1.fastq.gz` and `_R2.fastq.gz`.
12 | Multiple individuals can be run at the same time.
13 | 
14 | 
15 | = Downloading TransPi
16 | 
17 | 1- Clone the repository
18 | 
19 | [source,bash]
20 | ----
21 | 
22 | git clone https://github.com/palmuc/TransPi.git
23 | 
24 | ----
25 | 
26 | 2- Move to the TransPi directory
27 | 
28 | [source,bash]
29 | ----
30 | 
31 | cd TransPi
32 | 
33 | ----
34 | 
35 | = Configuration
36 | 
37 | TransPi requires various databases to run. The precheck script will installed the databases and software, if necessary, to run the tool.
38 | The precheck run needs a `PATH` as an argument for installing (locally) all the databases the pipeline needs.
39 | 
40 | ```
41 | 
42 | bash precheck_TransPi.sh /YOUR/PATH/HERE/
43 | 
44 | ```
45 | 
46 | [NOTE]
47 | This process may take a while depending on the options you select. Step that takes longer is downloading, if desired, the entire metazoan proteins from UniProt (6Gb).
48 | Other processes and databases are relatively fast depending on internet connection.
49 | 
50 | Once the precheck run is done it will create a file named `nextflow.config` that contains the various `PATH` for the databases.
51 | If  selected, it will also have the local conda environment `PATH`.
52 | 
53 | The `nextflow.config` file has also other important parameters for pipeline execution that will be discussed further
54 | in the following sections.
55 | 


--------------------------------------------------------------------------------
/bin/addAnnotation.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | import glob
 5 | import sys
 6 | import argparse
 7 | 
 8 | parser = argparse.ArgumentParser(usage='xls.py -trinotate_file FILENAME', description='')
 9 | parser.add_argument('-trinotateFile', dest='trinotate_file', required=True)
10 | parser.add_argument('-db', dest='db_name', required=False, default="swissprot", help="DB to use for header: uniprot or swissprot")
11 | parser.add_argument('-type', dest='db_type', required=False, default="prot", help="Type of DB to use: nucl or prot")
12 | parser.add_argument('-combine', dest='db_combine', required=False, default="false", help='Use two DBs in headers')
13 | args = parser.parse_args()
14 | 
15 | swissProtCount=0
16 | uniProtCount=0
17 | for line in open(args.trinotate_file, 'r'):
18 |     line = line.strip()
19 |     lineSplit = line.split("\t")
20 |     if args.db_name == "swissprot" and args.db_type == "nucl":
21 |         if lineSplit[2] != ".":
22 |             print(">" + lineSplit[0] + " SwissProt_Blastx:" + lineSplit[2].split("^")[0])
23 |             uniProtCount += 1
24 |         else:
25 |             print(">" + lineSplit[0] + " SwissProt_Blastx:" + "noHit")
26 |             uniProtCount += 1
27 |     elif args.db_name == "swissprot" and args.db_type == "prot":
28 |         if lineSplit[6] != ".":
29 |             print(">" + lineSplit[0] + " SwissProt_Blastp:" + lineSplit[6].split("^")[0])
30 |             swissProtCount += 1
31 |         else:
32 |             print(">" + lineSplit[0] + " SwissProt_Blastp:" + "noHit")
33 |             swissProtCount += 1
34 |     elif args.db_name == "uniprot" and args.db_type == "nucl":
35 |         if lineSplit[7] != ".":
36 |             print(">" + lineSplit[0] + " UniProt_Blastx:" + lineSplit[7].split("^")[0])
37 |             uniProtCount += 1
38 |         else:
39 |             print(">" + lineSplit[0] + " UniProt_Blastx:" + "noHit")
40 |             uniProtCount += 1
41 |     elif args.db_name == "uniprot" and args.db_type == "prot":
42 |         if lineSplit[8] != ".":
43 |             print(">" + lineSplit[0] + " UniProt_Blastp:" + lineSplit[8].split("^")[0])
44 |             uniProtCount += 1
45 |         else:
46 |             print(">" + lineSplit[0] + " UniProt_Blastp:" + "noHit")
47 |             uniProtCount += 1
48 | 


--------------------------------------------------------------------------------
/bin/get_busco_val.sh:
--------------------------------------------------------------------------------
 1 | name_tri=$1
 2 | name_transpi=$2
 3 | version=$3
 4 | a=$4
 5 | if [ "$version" == "v3" ];then
 6 |     #trinity
 7 |     for x in $name_tri;do
 8 |         echo "'${a}','${a}','${a}','${a}'," >>tspec.txt
 9 |         b=$( cat $x | grep "(C)" -A5 | awk '{print $1}' | awk -v RS= -v OFS=, '{$1 = $1} 1' | cut -f 2,3,4,5 -d "," )
10 |         echo "${b}," >>tnum.txt
11 |         c=$( cat $x | grep "C:" | cut -f 2 -d "[" | cut -f 1,2,3,4 -d "," | tr -d "%" | tr -d "]" | tr -d [A-Z] | tr -d ":" )
12 |         echo "${c}," >>tperc.txt
13 |     done
14 |     #transpi
15 |     for x in $name_transpi;do
16 |         echo "'${a}_TP','${a}_TP','${a}_TP','${a}_TP'" >>pspec.txt
17 |         b=$( cat $x | grep "(C)" -A5 | awk '{print $1}' | awk -v RS= -v OFS=, '{$1 = $1} 1' | cut -f 2,3,4,5 -d "," )
18 |         echo "${b}" >>pnum.txt
19 |         c=$( cat $x | grep "C:" | cut -f 2 -d "[" | cut -f 1,2,3,4 -d "," | tr -d "%" | tr -d "]" | tr -d [A-Z] | tr -d ":" )
20 |         echo "${c}" >>pperc.txt
21 |     done
22 |     cat tspec.txt pspec.txt | tr "\t" "\n" | tr -d "\n" >final_spec
23 |     cat tnum.txt pnum.txt | tr "\t" "\n" | tr -d "\n" >final_num
24 |     cat tperc.txt pperc.txt | tr "\t" "\n" | tr -d "\n" >final_perc
25 |     rm tnum.txt tperc.txt tspec.txt
26 |     rm pnum.txt pperc.txt pspec.txt
27 | elif [ "$version" == "v4" ];then
28 |     #trinity
29 |     for x in $name_tri;do
30 |         echo "'${a}','${a}','${a}','${a}'," >>tspec.txt
31 |         b=$( cat $x | grep "(C)" -A5 | awk '{print $1}' | awk -v RS= -v OFS=, '{$1 = $1} 1' | cut -f 2,3,4,5 -d "," )
32 |         echo "${b}," >>tnum.txt
33 |         c=$( cat $x | grep "C:" | cut -f 2 -d "[" | cut -f 1,2,3,4 -d "," | tr -d "%" | tr -d "]" | tr -d [A-Z] | tr -d ":" )
34 |         echo "${c}," >>tperc.txt
35 |     done
36 |     #transpi
37 |     for x in $name_transpi;do
38 |         echo "'${a}_TP','${a}_TP','${a}_TP','${a}_TP'" >>pspec.txt
39 |         b=$( cat $x | grep "(C)" -A5 | awk '{print $1}' | awk -v RS= -v OFS=, '{$1 = $1} 1' | cut -f 2,3,4,5 -d "," )
40 |         echo "${b}" >>pnum.txt
41 |         c=$( cat $x | grep "C:" | cut -f 2 -d "[" | cut -f 1,2,3,4 -d "," | tr -d "%" | tr -d "]" | tr -d [A-Z] | tr -d ":" )
42 |         echo "${c}" >>pperc.txt
43 |     done
44 |     cat tspec.txt pspec.txt | tr "\t" "\n" | tr -d "\n" >final_spec
45 |     cat tnum.txt pnum.txt | tr "\t" "\n" | tr -d "\n" >final_num
46 |     cat tperc.txt pperc.txt | tr "\t" "\n" | tr -d "\n" >final_perc
47 |     rm tnum.txt tperc.txt tspec.txt
48 |     rm pnum.txt pperc.txt pspec.txt
49 | fi
50 | 


--------------------------------------------------------------------------------
/docs/results.adoc:
--------------------------------------------------------------------------------
 1 | = Directories
 2 | 
 3 | == `results`
 4 | After a successful run of TransPi the results are saved in a directory called `results`. This directory is divided into multiple directories for each major step of the pipeline.
 5 | 
 6 | [NOTE]
 7 | Directories will be created based on the options selected in the pipeline execution
 8 | 
 9 | [horizontal]
10 | fastqc:: Fastqc html files
11 | filter:: Filter step html files
12 | rRNA_reads:: Info and reads of rRNA removal process
13 | normalization:: Normalized reads files
14 | saveReads:: Folder with reads saved from the filter and normalization processes
15 | assemblies:: All individual assemblies
16 | evigene:: Non-redundant final transcriptome (ends with name `.combined.okay.fa`)
17 | rnaQuast:: rnaQUAST output
18 | mapping:: Mapping results
19 | busco4:: BUSCO V4 results
20 | transdecoder:: Transdecoder results
21 | trinotate:: Annotation results
22 | report:: Interactive report of TransPi
23 | figures:: Figures created by TransPi (BUSCO comparison, Annotation, GO, etc)
24 | stats:: Basic stats from all steps of TransPi
25 | pipeline_info:: Nextflow report, trace file and others
26 | RUN_INFO.txt:: File with all versions of the tools used by TransPi. Also info from the run like command and PATH
27 | 
28 | .NOTES
29 | 
30 | ****
31 | 
32 | - Name of output directory can be changed by using the `--outdir` parameter when executing the pipeline. Example `--outdir Results_SampleA`.
33 | - If multiple samples are run, each directory will have all files together but each one with a unique sample name.
34 | 
35 | 
36 | ****
37 | 
38 | == `work`
39 | 
40 | A directory called `work` is also created when running TransPi. It contains all the Nextflow working files, TransPi results and intermediate files.
41 | 
42 | [NOTE]
43 | Directory `work` can be removed after the pipeline is done since all important files are stored in the `results` directory.
44 | 
45 | 
46 | = Figures
47 | 
48 | TransPi produces multiple figures that are stored in the results directory.
49 | 
50 | Example:
51 | 
52 | image:https://sync.palmuc.org/index.php/s/kxetdGiNiSyHzrg/preview[UniProt,800,400,float="center", role="Uniprot"]
53 | 
54 | 
55 | = Report
56 | 
57 | TransPi creates an interactive custom HTML report for ease data exploration.
58 | 
59 | Report https://sync.palmuc.org/index.php/s/XCxeCNwAfParBHX[Sponge transcriptome]
60 | 
61 | .NOTE
62 | ****
63 | - Example report here is a PDF file and not a HTML file. However the original HTML file with interactive visualization (i.e. as generated in TransPi) can be downloaded https://sync.palmuc.org/index.php/s/nP3TKPawmoX4xqL[here]
64 | ****
65 | 


--------------------------------------------------------------------------------
/bin/heatmap_busco.R:
--------------------------------------------------------------------------------
 1 | args = commandArgs(trailingOnly=TRUE)
 2 | sample_name=args[1]
 3 | comp_table=args[2]
 4 | transpi_table=args[3]
 5 | 
 6 | library(plotly)
 7 | library(reshape2)
 8 | 
 9 | # comparison table
10 | csv=read.csv(comp_table, header=TRUE, sep="\t")
11 | 
12 | csv <- data.frame(lapply(csv, function(x) {gsub("Complete", "3", x)}))
13 | csv <- data.frame(lapply(csv, function(x) {gsub("Duplicated", "2", x)}))
14 | csv <- data.frame(lapply(csv, function(x) {gsub("Fragmented", "1", x)}))
15 | csv <- data.frame(lapply(csv, function(x) {gsub("Missing", "0", x)}))
16 | csv
17 | c=melt(csv,id.vars = 'Busco.ID')
18 | dec=c(0,.25,.25,.50,.50,.75,.75,1)
19 | my_colors <- c("#081D58","#081D58", "#2280B8","#2280B8", "#99D6B9", "#99D6B9","#f8f9fc","#f8f9fc")
20 | colz <- setNames(data.frame(dec, my_colors), NULL)
21 | fig <- plot_ly(c,x=~variable, y=~Busco.ID, z=~value, colorscale=colz, reversescale=T, type = "heatmap",
22 |               colorbar=list(tickmode='array', tickvals=c(.35,1.1,1.87,2.60), thickness=30,
23 |               ticktext= c("Missing","Fragmented","Duplicated","Complete"), len=0.4))
24 | fig <- fig %>% layout(xaxis=list(title="", showline = TRUE, mirror = TRUE),
25 |               yaxis=list(title="BUSCO ID", tickmode="auto", nticks=length(csv$Busco.ID),
26 |               tickfont=list(size=8), showline = TRUE, mirror = TRUE))
27 | 
28 | orca(fig, paste(sample_name,"_all_missing_BUSCO.png",sep=""))
29 | orca(fig, paste(sample_name,"_all_missing_BUSCO.pdf",sep=""))
30 | 
31 | # TransPi table
32 | csv=read.csv(transpi_table, header=TRUE, sep="\t")
33 | 
34 | csv <- data.frame(lapply(csv, function(x) {gsub("Complete", "3", x)}))
35 | csv <- data.frame(lapply(csv, function(x) {gsub("Duplicated", "2", x)}))
36 | csv <- data.frame(lapply(csv, function(x) {gsub("Fragmented", "1", x)}))
37 | csv <- data.frame(lapply(csv, function(x) {gsub("Missing", "0", x)}))
38 | csv
39 | c=melt(csv,id.vars = 'Busco.ID')
40 | dec=c(0,.25,.25,.50,.50,.75,.75,1)
41 | my_colors <- c("#081D58","#081D58", "#2280B8","#2280B8", "#99D6B9", "#99D6B9","#f8f9fc","#f8f9fc")
42 | colz <- setNames(data.frame(dec, my_colors), NULL)
43 | fig <- plot_ly(c,x=~variable, y=~Busco.ID, z=~value, colorscale=colz, reversescale=T, type = "heatmap",
44 |               colorbar=list(tickmode='array', tickvals=c(.35,1.1,1.87,2.60), thickness=30,
45 |               ticktext= c("Missing","Fragmented","Duplicated","Complete"), len=0.4))
46 | fig <- fig %>% layout(xaxis=list(title="", showline = TRUE, mirror = TRUE),
47 |               yaxis=list(title="BUSCO ID", tickmode="auto", nticks=length(csv$Busco.ID),
48 |               tickfont=list(size=8), showline = TRUE, mirror = TRUE))
49 | 
50 | orca(fig, paste(sample_name,"_TransPi_missing_BUSCO.png",sep=""))
51 | orca(fig, paste(sample_name,"_TransPi_missing_BUSCO.pdf",sep=""))
52 | 


--------------------------------------------------------------------------------
/docs/options.adoc:
--------------------------------------------------------------------------------
 1 | There are other parameters that can be changed when executing TransPi.
 2 | 
 3 | = Output options
 4 | 
 5 | [horizontal]
 6 | `--outdir`::
 7 |         name of output directory. Example: `--outdir Sponges_150`.
 8 |         Default "results"
 9 | 
10 | `-w, -work`::
11 |         name of working directory. Example: `-work Sponges_work`. Only one dash is needed for `-work` since it is a nextflow function.
12 | 
13 | `--tracedir`::
14 |         Name for directory to save pipeline trace files.
15 |         Default "pipeline_info"
16 | 
17 | = Additional analyses
18 | 
19 | [horizontal]
20 | `--rRNAfilter`:: Remove rRNA from sequences. Requires option --rRNAdb
21 | 
22 | `--rRNAdb`:: PATH to database of rRNA sequences to use for filtering of rRNA. Default ""
23 | 
24 | `--filterSpecies`::
25 |         Perform psytrans filtering of transcriptome. Requires options `--host` and `--symbiont`
26 | 
27 | `--host`:: Host (or similar) protein file.
28 | 
29 | `--symbiont`:: Symbionts (or similar) protein files
30 | 
31 | `--psyval`:: Psytrans value to train model. Default "160"
32 | 
33 | `--allBuscos`:: Run BUSCO analysis in all assemblies
34 | 
35 | `--rescueBusco`:: Generate BUSCO distribution analysis
36 | 
37 | `--minPerc`::
38 |             Mininmum percentage of assemblers require for the BUSCO distribution.
39 |             Default ".70"
40 | 
41 | `--shortTransdecoder`:: Run Transdecoder without the homology searches
42 | 
43 | `--withSignalP`::
44 |         Include SignalP for the annotation. Needs manual installation of CBS-DTU tools.
45 |         Default "false"
46 | 
47 | `--rnam`:: PATH to Rnammer software. Default ""
48 | 
49 | `--withTMHMM`::
50 |         Include TMHMM for the annotation. Needs manual installation of CBS-DTU tools.
51 |         Default "false"
52 | 
53 | `--tmhmm`:: PATH to TMHMM software. Default ""
54 | 
55 | `--withRnammer`::
56 |         Include Rnammer for the annotation. Needs manual installation of CBS-DTU tools.
57 |         Default "false"
58 | 
59 | `--rnam`:: PATH to Rnammer software. Default ""
60 | 
61 | = Skip options
62 | 
63 | [horizontal]
64 | `--skipEvi`:: Skip EvidentialGene run in --onlyAsm option. Default "false"
65 | 
66 | `--skipQC`:: Skip FastQC step. Default "false"
67 | 
68 | `--skipFilter`:: Skip fastp filtering step. Default "false"
69 | 
70 | `--skipKegg`:: Skip kegg analysis. Default "false"
71 | 
72 | `--skipReport`:: Skip generation of final TransPi report. Default "false"
73 | 
74 | = Other parameters
75 | 
76 | [horizontal]
77 | `--minQual`:: Minimum quality score for fastp filtering. Default "25"
78 | 
79 | `--pipeInstall`:: PATH to TransPi directory. Default "". If precheck is used this will be added to the nextflow.config automatically.
80 | 
81 | `--envCacheDir`:: PATH for environment cache directory (either conda or containers). Default "Launch directory of pipeline"
82 | 


--------------------------------------------------------------------------------
/docs/extra.adoc:
--------------------------------------------------------------------------------
 1 | Here are some notes that can help in the execution of TransPi. Also some important considerations based Nextflow settings.
 2 | For more in detail information visit the https://www.nextflow.io/docs/latest/index.html[Nextflow documentation]
 3 | 
 4 | = `-resume`
 5 | If an error occurs and you need to resume the pipeline just include the `-resume` option when calling the pipeline.
 6 | 
 7 | [source,bash]
 8 | ----
 9 | ./nextflow run TransPi.nf --onlyAnn -profile conda -resume
10 | ----
11 | 
12 | = `template.nextflow.config`
13 | 
14 | == Resources
15 | The `template.nextflow.config` file has different configurations for the each program of the pipeline
16 | (e.g. some with a lot of CPUs, others with a small amount of CPUs). You can modify this depending on the resources you have in your system.
17 | 
18 | Example:
19 | [source,bash]
20 | ****
21 | process {
22 |     withLabel: big_cpus {
23 |         cpus='30'
24 |         memory='15 GB'
25 |     }
26 | ****
27 | 
28 | In this case, all the processes using the label `big_cpus` will use 30 CPUs. If your system only has 20 please modify these values accordingly to avoid errors.
29 | 
30 | [NOTE]
31 | Setting the correct CPUs and RAM of your system is important because `nextflow` will start as many jobs as possible if the resources are available.
32 | If you are in a VM with 120 CPUs, `nextfow` will be able to start four processes with the label `big_cpus`.
33 | 
34 | == Data
35 | 
36 | The precheck is designed to create a new `nextflow.config` every time is run with with the `PATH` to the databases.
37 | You can modify the values that do not need editing for your analysis on the `template.nextflow.config`. This way you avoid doing the same changes to the `nextflow.config` after the precheck run.
38 | 
39 | Example: Modify the `template.nextflow.config` with your cluster info to avoid repeating these in the future.
40 | 
41 | = Custom profiles
42 | 
43 | We are using https://slurm.schedmd.com/documentation.html[SLURM] as our workload manager in our server.
44 | Thus we have custom profiles for the submission of jobs. For example our `nextflow.config` has the following lines in the `profiles` section.
45 | 
46 | 
47 | [source,text]
48 | profiles {
49 |     palmuc {
50 |         process {
51 |             executor='slurm'
52 |             clusterOptions='--clusters=inter --partition=bigNode --qos=low'
53 |         }
54 |     }
55 | }
56 | 
57 | 
58 | You can add your custom profiles depending on the settings of your system and the workload manager you use (e.g. SGE, PBS, etc).
59 | 
60 | The line `clusterOptions` can be used to add any other option that you will usually use for your job submission.
61 | 
62 | = Local nextflow
63 | 
64 | To avoid calling the pipeline using `./nextflow ...` you can modify the nextflow command like this `chmod 777 nextflow`. For running the pipeline you just need to use:
65 | 
66 | [source,bash]
67 | ****
68 | 
69 | nextflow run TransPi.nf ...
70 | 
71 | ****
72 | 
73 | = Real Time Monitoring
74 | To monitor your pipeline remotely without connecting to the server via ssh use https://tower.nf/login[Nextflow Tower].
75 | Make an account with your email and follow their instructions. After this, you can now run the pipeline adding the `-with-tower` option and follow live the execution
76 | of the processes.
77 | 
78 | [source,bash]
79 | ****
80 | 
81 | nextflow run TransPi.nf --all -with-tower -profile conda
82 | 
83 | ****
84 | 


--------------------------------------------------------------------------------
/docs/run.adoc:
--------------------------------------------------------------------------------
  1 | = Full analysis (`--all`)
  2 | 
  3 | After the successful run of the precheck script you are set to run TransPi.
  4 | 
  5 | We recommend to run TransPi with the option `--all` where it will do the complete analysis, from raw reads filtering to annotation.
  6 | Other options described below.
  7 | 
  8 | To run the complete pipeline.
  9 | [source,bash]
 10 | ----
 11 | nextflow run TransPi.nf --all --reads '/YOUR/READS/PATH/HERE/*_R[1,2].fastq.gz' \
 12 |      --k 25,41,53 --maxReadLen 75 -profile conda
 13 | 
 14 | ----
 15 | 
 16 | Arguments explanations:
 17 | [source,text]
 18 | ----
 19 | --all                Run full TransPi analysis
 20 | --reads              PATH to the paired-end reads
 21 | --k                  kmers list to use for the assemblies
 22 | --maxReadLen         Max read length in the library
 23 | -profile             Use conda to run the analyses
 24 | ----
 25 | 
 26 | [CAUTION]
 27 | --
 28 | If you combined multiple libraries of the same individual to create a reference transcriptome, which will be later use in downstream analyses (e.g. Differential Expression),
 29 | make sure the kmer list is based on the length for the shortest read library and the `maxReadLen` based on the longest read length.
 30 | 
 31 | Example: Combining reads of 100bp with 125bp
 32 | [source,text]
 33 | ****
 34 | --k 25,41,53,61 --maxReadLen 125
 35 | ****
 36 | --
 37 | 
 38 | [NOTE]
 39 | --
 40 | You can run multiple samples at the same time
 41 | --
 42 | 
 43 | = Other options
 44 | 
 45 | == `--onlyAsm`
 46 | 
 47 | Run only the Assemblies and EvidentialGene analysis.
 48 | 
 49 | Example for `--onlyAsm`:
 50 | [source,bash]
 51 | ----
 52 | nextflow run TransPi.nf --onlyAsm --reads '/home/rrivera/TransPi/reads/*_R[1,2].fastq.gz' \
 53 |      --k 25,41,53 --maxReadLen 75 -profile conda
 54 | 
 55 | ----
 56 | 
 57 | [NOTE]
 58 | You can run multiple samples at the same time
 59 | 
 60 | == `--onlyEvi`
 61 | 
 62 | Run only the Evidential Gene analysis
 63 | 
 64 | Example for `--onlyEvi`:
 65 | [source,bash]
 66 | ----
 67 | nextflow run TransPi.nf --onlyEvi -profile conda
 68 | ----
 69 | 
 70 | 
 71 | [IMPORTANT]
 72 | TransPi looks for a directory named `onlyEvi`. It expects one file per sample to perform the reduction. The file should have all the assemblies concatenated into one.
 73 | [NOTE]
 74 | You can run multiple samples at the same time
 75 | 
 76 | == `--onlyAnn`
 77 | 
 78 | Run only the Annotation analysis (starting from a final assembly)
 79 | 
 80 | Example for `--onlyAnn`:
 81 | [source,bash]
 82 | ----
 83 | nextflow run TransPi.nf --onlyAnn -profile conda
 84 | ----
 85 | 
 86 | [IMPORTANT]
 87 | TransPi looks for a directory named `onlyAnn`. It expects one file per sample to perform the annotation.
 88 | [NOTE]
 89 | You can run multiple samples at the same time
 90 | 
 91 | = Using `-profiles`
 92 | 
 93 | TransPi can also use docker, singularity, and individual conda installations (i.e. per process) to deploy the pipeline.
 94 | 
 95 | [source,text]
 96 | ----
 97 | test                Run TransPi with a test dataset
 98 | conda               Run TransPi with conda.
 99 | docker              Run TransPi with docker container
100 | singularity         Run TransPi with singularity container with all the necessary tools
101 | TransPiContainer    Run TransPi with a single container with all tools
102 | ----
103 | 
104 | [NOTE]
105 | --
106 | Multiple profiles can be specified (comma separated)
107 | 
108 | [source,text]
109 | ****
110 | Example: `-profile test,singularity`
111 | ****
112 | --
113 | 
114 | Refer to *Section 6* of this manual for further details on deployment of TransPi using other profiles.
115 | 


--------------------------------------------------------------------------------
/bin/GO_plots.R:
--------------------------------------------------------------------------------
 1 | args = commandArgs(trailingOnly=TRUE)
 2 | sample_name=args[1]
 3 | 
 4 | library(ggthemes)
 5 | library(ggplot2)
 6 | 
 7 | dataCC=read.delim("GO_cellular.txt", header = F, sep = "\t")
 8 | dataMF=read.delim("GO_molecular.txt", header = F, sep = "\t")
 9 | dataBP=read.delim("GO_biological.txt", header = F, sep = "\t")
10 | 
11 | #CC
12 | nlim=round((head(dataCC$V1,n = 1)+150),digits = -2)
13 | p1<-ggplot(data=dataCC, aes(x=reorder(dataCC$V2,dataCC$V1), y=dataCC$V1))+
14 |   geom_bar(stat="identity", fill="green", width=.5)+
15 |   coord_flip()+labs(x="Classification",y="Number of Sequences")+
16 |   geom_text(aes(label=dataCC$V1), position=position_dodge(width=0.7), vjust=-0.0005, hjust=-.15)+
17 |   theme(axis.text=element_text(size=10))+ylim(0,nlim)+theme(text = element_text(size = 15))+
18 |   theme(axis.text.x=element_text(size=12,angle=0))+theme(axis.title=element_text(size=15,face="bold"))+
19 |   ggtitle(paste(sample_name,"Cellular Componenet GOs",sep=" "))+
20 |   theme(plot.title = element_text(family="sans", colour = "black", size = rel(1.1)*1, face = "bold"))
21 | 
22 | #ggsave(filename = paste(sample_name,"_Cellular_Component.svg",sep=""),width = 15 ,height = 7)
23 | #ggsave(filename = paste(sample_name,"_Cellular_Component.pdf",sep=""),width = 15 ,height = 7)
24 | pdf(paste(sample_name,"_Cellular_Component.pdf",sep=""),width = 15 ,height = 7)
25 | print(p1)
26 | dev.off()
27 | svg(paste(sample_name,"_Cellular_Component.svg",sep=""),width = 15 ,height = 7)
28 | print(p1)
29 | dev.off()
30 | 
31 | #MF
32 | nlim=round((head(dataMF$V1,n = 1)+150),digits = -2)
33 | p2 <-ggplot(data=dataMF, aes(x=reorder(dataMF$V2,dataMF$V1), y=dataMF$V1))+
34 |   geom_bar(stat="identity", fill="blue", width=.5)+
35 |   coord_flip()+labs(x="Classification",y="Number of Sequences")+
36 |   geom_text(aes(label=dataMF$V1), position=position_dodge(width=0.7), vjust=-0.0005, hjust=-.15)+
37 |   theme(axis.text=element_text(size=10))+ylim(0,nlim)+theme(text = element_text(size = 15))+
38 |   theme(axis.text.x=element_text(size=12,angle=0))+theme(axis.title=element_text(size=15,face="bold"))+
39 |   ggtitle(paste(sample_name,"Molecular Function GOs",sep=" "))+
40 |   theme(plot.title = element_text(family="sans", colour = "black", size = rel(1.1)*1, face = "bold"))
41 | 
42 | #ggsave(filename = paste(sample_name,"_Molecular_Function.svg",sep=""),width = 15 ,height = 7)
43 | #ggsave(filename = paste(sample_name,"_Molecular_Function.pdf",sep=""),width = 15 ,height = 7)
44 | pdf(paste(sample_name,"_Molecular_Function.pdf",sep=""),width = 15 ,height = 7)
45 | print(p2)
46 | dev.off()
47 | svg(paste(sample_name,"_Molecular_Function.svg",sep=""),width = 15 ,height = 7)
48 | print(p2)
49 | dev.off()
50 | 
51 | #BP
52 | nlim=round((head(dataBP$V1,n = 1)+150),digits = -2)
53 | p3<-ggplot(data=dataBP, aes(x=reorder(dataBP$V2,dataBP$V1), y=dataBP$V1))+
54 |   geom_bar(stat="identity", fill="red", width=.5)+
55 |   coord_flip()+labs(x="Classification",y="Number of Sequences")+
56 |   geom_text(aes(label=dataBP$V1), position=position_dodge(width=0.7), vjust=-0.0005, hjust=-.15)+
57 |   theme(axis.text=element_text(size=10))+ylim(0,nlim)+theme(text = element_text(size = 15))+
58 |   theme(axis.text.x=element_text(size=12,angle=0))+theme(axis.title=element_text(size=15,face="bold"))+
59 |   ggtitle(paste(sample_name,"Biological Processes GOs",sep=" "))+
60 |   theme(plot.title = element_text(family="sans", colour = "black", size = rel(1.1)*1, face = "bold"))
61 | 
62 | #ggsave(filename = paste(sample_name,"_Biological_Processes.svg",sep=""),width = 15 ,height = 7)
63 | #ggsave(filename = paste(sample_name,"_Biological_Processes.pdf",sep=""),width = 15 ,height = 7)
64 | pdf(paste(sample_name,"_Biological_Processes.pdf",sep=""),width = 15 ,height = 7)
65 | print(p3)
66 | dev.off()
67 | svg(paste(sample_name,"_Biological_Processes.svg",sep=""),width = 15 ,height = 7)
68 | print(p3)
69 | dev.off()
70 | 


--------------------------------------------------------------------------------
/bin/SOS_busco.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import pandas as pd
  4 | import Bio
  5 | from Bio import SeqIO
  6 | import argparse
  7 | from functools import reduce
  8 | import numpy as np
  9 | import math
 10 | import sys
 11 | from collections import Counter
 12 | 
 13 | parser = argparse.ArgumentParser(usage='', description='')
 14 | parser.add_argument('-input_file_busco', dest='input_file_busco',required=True)
 15 | parser.add_argument('-input_file_fasta', dest='input_file_fasta',required=True)
 16 | parser.add_argument('-min', dest='min_num_assembler', type=float, required=True)
 17 | parser.add_argument('-kmers',dest='kmers',required=True)
 18 | 
 19 | args = parser.parse_args()
 20 | 
 21 | assemblers_names = ['SOAP','SPADES','TransABySS','Velvet']
 22 | 
 23 | all_missing_list = []
 24 | list_of_databases = []
 25 | final_list = []
 26 | 
 27 | Busco_to_save = []
 28 | 
 29 | with open(args.input_file_busco) as input_busco_file:
 30 | 
 31 |     kmers_list = args.kmers.strip().split(',')
 32 |     nr_of_kmers = (len(kmers_list)*4+2)
 33 |     column_names = [(assembler + '_' + kmer) for assembler in assemblers_names for kmer in kmers_list]
 34 |     column_names.insert(3*len(kmers_list) ,'Trinity')
 35 |     column_names.insert(len(column_names),'TransPi')
 36 |     column_names.insert(0,'Busco ID')
 37 | 
 38 |     busco_df = pd.read_csv(input_busco_file, sep=',',header=0,names=['Busco_id','Status','Sequence','Score','Length'])
 39 |     busco_unique = busco_df.groupby((busco_df['Busco_id'] !=busco_df['Busco_id'].shift()).cumsum().values).first()
 40 | 
 41 |     busco_tables = np.array_split(busco_unique, nr_of_kmers)
 42 |     transpi_table = busco_tables[nr_of_kmers-1]
 43 | 
 44 |     for table in busco_tables:
 45 |         busco_missing = table[table.Status.eq('Missing')].iloc[:,0].tolist()
 46 |         all_missing_list.extend(busco_missing)
 47 |         missing_Busco = list(dict.fromkeys(all_missing_list))
 48 | 
 49 |     for table in busco_tables:
 50 |         final_df = table[table['Busco_id'].isin(missing_Busco)].iloc[:, 0:2]
 51 |         final_list.append(final_df)
 52 | 
 53 |     comparison_table = reduce(lambda left,right: pd.merge(left,right,on='Busco_id'), final_list)
 54 |     comparison_table.columns = column_names
 55 |     transpi_table = comparison_table[(comparison_table['TransPi'] == 'Missing')]
 56 | 
 57 |     comparison_table.to_csv('Complete_comparison_table',sep='\t',index=False)
 58 |     transpi_table.to_csv('TransPi_comparison_table',sep='\t',index=False)
 59 | 
 60 |     BUSCO_to_rescue =  transpi_table[(transpi_table == 'Complete').any(axis=1)].iloc[:,0].tolist()
 61 | 
 62 |     if len(BUSCO_to_rescue) == 0:
 63 |         sys.exit(0)
 64 |     elif len(BUSCO_to_rescue) != 0:
 65 |         for table in busco_tables[:-1]:
 66 |             for i in BUSCO_to_rescue:
 67 |                 seqs = (i,table['Sequence'].loc[table['Busco_id'] == i].values[0],table['Score'].loc[table['Busco_id'] == i].values[0])
 68 |                 Busco_to_save.append(seqs)
 69 | 
 70 | potential_seqs = [t for t in Busco_to_save if not any(isinstance(n, float) and math.isnan(n) for n in t)]
 71 | flat_list = [i[0] for i in potential_seqs]
 72 | busco_count = Counter(flat_list)
 73 | 
 74 | min_number = nr_of_kmers * args.min_num_assembler
 75 | busco_to_save = [k for k, v in busco_count.items() if v >= min_number]
 76 | 
 77 | seqs_to_save = [item for item in potential_seqs if item[0] in busco_to_save]
 78 | 
 79 | seqs_to_save.sort(key= lambda x: x[2], reverse=True)
 80 | 
 81 | checked = set()
 82 | unique_seqs_list = []
 83 | 
 84 | for busco_id, sequence, score in seqs_to_save:
 85 |     if not busco_id in checked:
 86 |          checked.add(busco_id)
 87 |          unique_seqs_list.append((busco_id,sequence))
 88 | 
 89 | #The fasta file is parsed with Biopython SeqIO.parse. And target sequences are extracted.
 90 | sequences_IDs_to_rescue = [ x[1] for x in unique_seqs_list]
 91 | fasta_to_extract = []
 92 | 
 93 | for seqrecord in SeqIO.parse(args.input_file_fasta, 'fasta'):
 94 |     if seqrecord.id in sequences_IDs_to_rescue:
 95 |         fasta_to_extract.append(seqrecord)
 96 | 
 97 | #Output files are written.
 98 | with open('sequences_to_add.fasta','w') as outputh:
 99 |     SeqIO.write(fasta_to_extract,outputh,'fasta')
100 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # TransPi - TRanscriptome ANalysiS PIpeline
 2 | 
 3 | ```text
 4 |  _______                                 _____   _
 5 | |__   __|                               |  __ \ (_)
 6 |    | |     _ __    __ _   _ __    ___   | |__) | _
 7 |    | |    |  __|  / _  | |  _ \  / __|  |  ___/ | |
 8 |    | |    | |    | (_| | | | | | \__ \  | |     | |
 9 |    |_|    |_|     \__,_| |_| |_| |___/  |_|     |_|
10 |  ```
11 | 
12 | [![Prepint](http://d2538ggaoe6cji.cloudfront.net/sites/default/files/images/favicon.ico)](https://doi.org/10.1101/2021.02.18.431773)[**Preprint**](https://doi.org/10.1101/2021.02.18.431773) &ensp;[![Chat on Gitter](https://img.shields.io/gitter/room/PalMuc/TransPi.svg?colorB=26af64&style=popout)](https://gitter.im/PalMuc/TransPi) &ensp;[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)&ensp;[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)&ensp;[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)
13 | [![release](https://img.shields.io/github/v/release/PalMuc/TransPi?label=release&logo=github)](https://github.com/PalMuc/TransPi/releases/latest)
14 | 
15 | # Table of contents
16 | 
17 | * [General info](#General-info)
18 |   * [Pipeline processes](#Pipelie-processes)
19 |   * [Manual](#Manual)
20 | * [Publication](#Publication)
21 |   * [Citation](#Citation)
22 | * [Funding](#Funding)
23 | * [Future work](#Future-work)
24 | * [Issues](#Issues)
25 |   * [Chat](#Chat)
26 | 
27 | # General info
28 | 
29 | TransPi – a comprehensive TRanscriptome ANalysiS PIpeline for de novo transcriptome assembly
30 | 
31 | TransPi is based on the scientific workflow manager [Nextflow](https://www.nextflow.io). It is designed to help researchers get the best reference transcriptome assembly for their organisms of interest. It performs multiple assemblies with different parameters to then get a non-redundant consensus assembly. It also performs other valuable analyses such as quality assessment of the assembly, BUSCO scores, Transdecoder (ORFs), and gene ontologies (Trinotate), etc. All these with minimum input from the user but without losing the potential of a comprehensive analysis.
32 | 
33 | ## Pipeline processes
34 | 
35 | ![TransPi flowchart](https://sync.palmuc.org/index.php/s/nrd3KPnfnz7AipF/preview)
36 | 
37 | **Figure 1.** TransPi v1.0.0 flowchart showing the various steps and analyses it can performed. For simplicity, this diagram does not show all the connections between the processes. Also, it omits other additional options like the BUSCO distribution and transcriptome filtering with psytrans (see Section 2.6). ORFs=Open reading Frames; HTML=Hypertext Markup Language.
38 | 
39 | ## Manual
40 | 
41 | TransPi documentation and examples can be found [here](https://palmuc.github.io/TransPi/)
42 | 
43 | # Publication
44 | 
45 | Preprint of TransPi including kmer, reads length, and reads quantities tests can be found [here](https://doi.org/10.1101/2021.02.18.431773). Also we tested the pipeline with over 45 samples from different phyla.
46 | 
47 | TransPi has been peer-reviewed and recommended by Peer Community In Genomics
48 | (https://doi.org/10.24072/pci.genomics.100009)
49 | 
50 | ## Citation
51 | 
52 | If you use TransPi please cite the peer-reviewed publication:
53 | 
54 | Rivera-Vicéns, R.E., García-Escudero, CA., Conci, N., Eitel, M., and Wörheide, G. (2021). TransPi – a comprehensive TRanscriptome ANalysiS PIpeline for de novo transcriptome assembly. bioRxiv 2021.02.18.431773; doi: https://doi.org/10.1101/2021.02.18.431773
55 | 
56 | # Funding
57 | 
58 | - European Union’s Horizon 2020 research and innovation programme under the Marie Skłodowska-Curie grant agreement No 764840 (ITN IGNITE).
59 | 
60 | - Advanced Human Capital Program of the National Commission for Scientific and Technological Research (CONICYT)
61 | 
62 | - Lehre@LMU (project number: W19 F1; Studi forscht@GEO)
63 | 
64 | - LMU Munich’s Institutional Strategy LMUexcellent within the framework of the German Excellence Initiative
65 | 
66 | # Future work
67 | 
68 | - Cloud deployment of the tool
69 | 
70 | # Issues
71 | 
72 | We tested TransPi using conda, singularity and docker. However, if you find a problem or get an error please let us know by opening an issue.
73 | 
74 | ## Chat
75 | 
76 | If you have further questions and need help with TransPi you can chat with us in the [TransPi Gitter chat](https://gitter.im/PalMuc/TransPi)
77 | 


--------------------------------------------------------------------------------
/bin/busco_comparison.R:
--------------------------------------------------------------------------------
  1 | ######################################
  2 | 
  3 | # Edit from the orginal BUSCO plot script
  4 | 
  5 | ######################################
  6 | #
  7 | # BUSCO summary figure
  8 | # @version 3.0.0
  9 | # @since BUSCO 2.0.0
 10 | #
 11 | # Copyright (c) 2016-2017, Evgeny Zdobnov (ez@ezlab.org)
 12 | # Licensed under the MIT license. See LICENSE.md file.
 13 | #
 14 | ######################################
 15 | args = commandArgs(trailingOnly=TRUE)
 16 | sample_name=args[1]
 17 | options(warn=-1)
 18 | # Load the required libraries
 19 | library(ggplot2)
 20 | library(grid)
 21 | # !!! CONFIGURE YOUR PLOT HERE !!!
 22 | # Output
 23 | #my_output <- paste("./","combined_busco_figure.png",sep="/")
 24 | #my_width <- 20
 25 | #my_height <- 15
 26 | #my_unit <- "cm"
 27 | # Colors
 28 | #my_colors <- c("#56B4E9", "#3492C7", "#F0E442", "#F04442")
 29 | #cata
 30 | my_colors <- c("#0e9aa7", "#96ceba", "#ffcc5c", "#ff6f69")
 31 | # Bar height ratio
 32 | my_bar_height <- 0.55
 33 | # Legend
 34 | my_title <- "BUSCO Assessment Results - Trinity vs TransPi"
 35 | # Font
 36 | my_family <- "sans"
 37 | my_size_ratio <- 1
 38 | # !!! SEE YOUR DATA HERE !!!
 39 | # Your data as generated by python, remove or add more
 40 | my_species <- c(MYSPEC)
 41 | my_species <- factor(my_species)
 42 | my_species <- factor(my_species,levels(my_species)[c(length(levels(my_species)):1)]) # reorder your species here just by changing the values in the vector :
 43 | my_percentage <- c(MYPERC)
 44 | my_values <- c(MYVAL)
 45 | 
 46 | ######################################
 47 | ######################################
 48 | # Code to produce the graph
 49 | labsize = 1
 50 | if (length(levels(my_species)) > 10){
 51 |   labsize = 0.66
 52 | }
 53 | print("Plotting the figure ...")
 54 | category <- c(rep(c("S","D","F","M"),c(1)))
 55 | category <-factor(category)
 56 | #category = factor(category,levels(category)[c(4,1,2,3)])
 57 | category = factor(category,levels(category)[(c(4,1,2,3))])
 58 | df = data.frame(my_species,my_percentage,my_values,category)
 59 | figure <- ggplot() +
 60 |   geom_bar(aes(y = my_percentage, x = my_species, fill = category), data = df, stat="identity", width=my_bar_height,position = position_stack(reverse=TRUE)) +
 61 |   coord_flip() +
 62 |   theme_gray(base_size = 8) +
 63 |   scale_y_continuous(labels = c("0","20","40","60","80","100"), breaks = c(0,20,40,60,80,100)) +
 64 |   scale_fill_manual(values = my_colors,labels =c(" Complete (C) and single-copy (S)  ",
 65 |                                                  " Complete (C) and duplicated (D)",
 66 |                                                  " Fragmented (F)  ",
 67 |                                                  " Missing (M)")) +
 68 |   ggtitle(my_title) +
 69 |   xlab("") +
 70 |   ylab("\n%BUSCOs") +
 71 |   theme(plot.title = element_text(family=my_family, colour = "black", size = rel(2.2)*my_size_ratio, face = "bold")) +
 72 |   theme(legend.position="top",legend.title = element_blank()) +
 73 |   theme(legend.text = element_text(family=my_family, size = rel(1.2)*my_size_ratio)) +
 74 |   theme(panel.background = element_rect(color="#FFFFFF", fill="white")) +
 75 |   theme(panel.grid.minor = element_blank()) +
 76 |   theme(panel.grid.major = element_blank()) +
 77 |   theme(axis.text.y = element_text(family=my_family, colour = "black", size = rel(1.66)*my_size_ratio,face="italic")) +
 78 |   theme(axis.text.x = element_text(family=my_family, colour = "black", size = rel(1.66)*my_size_ratio)) +
 79 |   theme(axis.line = element_line(size=1*my_size_ratio, colour = "black")) +
 80 |   theme(axis.ticks.length = unit(.85, "cm")) +
 81 |   theme(axis.ticks.y = element_line(colour="white", size = 0)) +
 82 |   theme(axis.ticks.x = element_line(colour="#222222")) +
 83 |   theme(axis.ticks.length = unit(0.4, "cm")) +
 84 |   theme(axis.title.x = element_text(family=my_family, size=rel(1.2)*my_size_ratio)) +
 85 |   guides(fill = guide_legend(override.aes = list(colour = NULL))) +
 86 |   guides(fill=guide_legend(nrow=2,byrow=TRUE))
 87 | for(i in rev(c(1:length(levels(my_species))))){
 88 |   detailed_values <- my_values[my_species==my_species[my_species==levels(my_species)[i]]]
 89 |   total_buscos <- sum(detailed_values)
 90 |   figure <- figure +
 91 |     annotate("text", label=paste("C:", detailed_values[1] + detailed_values[2], " [S:", detailed_values[1], ", D:", detailed_values[2], "], F:", detailed_values[3], ", M:", detailed_values[4], ", n:", total_buscos, sep="\t"),
 92 |              y=3, x = i, size = labsize*4*my_size_ratio, colour = "black", hjust=0, family=my_family)
 93 | }
 94 | 
 95 | #ggsave not working in docker
 96 | #ggsave(filename = paste(sample_name,"_BUSCO_comparison.svg",sep=""),width = 15 ,height = 7)
 97 | #ggsave(filename = paste(sample_name,"_BUSCO_comparison.pdf",sep=""),width = 15 ,height = 7)
 98 | pdf(paste(sample_name,"_BUSCO_comparison.pdf",sep=""),width = 15 ,height = 7)
 99 | print(figure)
100 | dev.off()
101 | svg(paste(sample_name,"_BUSCO_comparison.svg",sep=""),width = 15 ,height = 7)
102 | print(figure)
103 | dev.off()
104 | print("Done")
105 | 


--------------------------------------------------------------------------------
/docs/examples.adoc:
--------------------------------------------------------------------------------
  1 | Here are some examples on how to deploy TransPi depending on the method to use (e.g. conda) and the analyses to be performed.
  2 | 
  3 | = Profiles
  4 | You can use TransPi either with:
  5 | - a local conda environment (from precheck);
  6 | - individual conda environment per process;
  7 | - docker or singularity
  8 | 
  9 | == Conda
 10 | This way of executing TransPi assumes that you installed conda locally.
 11 | All these is done automatically for you, if desired, with the precheck script.
 12 | 
 13 | *Example:*
 14 | [source,bash]
 15 | ----
 16 | nextflow run TransPi.nf --all --maxReadLen 150 --k 25,35,55,75,85 \
 17 |     --reads '/YOUR/PATH/HERE/*_R[1,2].fastq.gz' --outdir Results_Acropora \
 18 |     -profile conda
 19 | ----
 20 | 
 21 | [NOTE]
 22 | `-profile conda` tells TransPi to use conda. An individual environment is used per process.
 23 | 
 24 | == Containers
 25 | Docker or singularity can also be use for deploying TransPi. You can either use individual containers for each process or a TransPi container with all the tools.
 26 | 
 27 | === Individual
 28 | To use individual containers:
 29 | 
 30 | *Example for docker:*
 31 | [source,bash]
 32 | ----
 33 | nextflow run TransPi.nf --all --maxReadLen 150 --k 25,35,55,75,85 \
 34 |     --reads '/YOUR/PATH/HERE/*_R[1,2].fastq.gz' --outdir Results_Acropora \
 35 |     -profile docker
 36 | ----
 37 | 
 38 | *Example for singularity:*
 39 | [source,bash]
 40 | ----
 41 | nextflow run TransPi.nf --all --maxReadLen 150 --k 25,35,55,75,85 \
 42 |     --reads '/YOUR/PATH/HERE/*_R[1,2].fastq.gz' --outdir Results_Acropora \
 43 |     -profile singularity
 44 | ----
 45 | 
 46 | [NOTE]
 47 | Some individual containers can create problems. We are working on solving these issues. In the meantime you can use the TransPi container (see below).
 48 | 
 49 | === TransPi container
 50 | To use the TransPi container with all the tools you need to use the profile `TransPiContainer`.
 51 | 
 52 | *Example for docker:*
 53 | [source,bash]
 54 | ----
 55 | nextflow run TransPi.nf --all --maxReadLen 150 --k 25,35,55,75,85 \
 56 |     --reads '/YOUR/PATH/HERE/*_R[1,2].fastq.gz' --outdir Results_Acropora \
 57 |     -profile docker,TransPiContainer
 58 | ----
 59 | 
 60 | *Example for singularity:*
 61 | [source,bash]
 62 | ----
 63 | nextflow run TransPi.nf --all --maxReadLen 150 --k 25,35,55,75,85 \
 64 |     --reads '/YOUR/PATH/HERE/*_R[1,2].fastq.gz' --outdir Results_Acropora \
 65 |     -profile singularity,TransPiContainer
 66 | ----
 67 | 
 68 | 
 69 | = Other examples
 70 | 
 71 | [NOTE]
 72 | Order of commands is not important.
 73 | 
 74 | == Filtering
 75 | 
 76 | *Scenario:*
 77 | [horizontal]
 78 | Sample:: Coral sample
 79 | Read length:: 150bp
 80 | TransPi mode:: --all
 81 | Kmers:: 25,35,55,75,85
 82 | Reads PATH:: /YOUR/PATH/HERE/*_R[1,2].fastq.gz
 83 | Output directory:: Results_Acropora
 84 | Work directory:: work_acropora
 85 | Engine:: conda
 86 | Filter species:: on
 87 | host:: scleractinian proteins
 88 | symbiont:: symbiodinium proteins
 89 | 
 90 | *Command:*
 91 | [source,bash]
 92 | ----
 93 | nextflow run TransPi.nf --all --maxReadLen 150 --k 25,35,55,75,85 \
 94 |     --reads '/YOUR/PATH/HERE/*_R[1,2].fastq.gz' --outdir Results_Acropora \
 95 |     -w work_acropora -profile conda --filterSpecies \
 96 |     --host /YOUR/PATH/HERE/uniprot-Scleractinia.fasta \
 97 |     --symbiont /YOUR/PATH/HERE/uniprot-symbiodinium.fasta
 98 | ----
 99 | 
100 | 
101 | == BUSCO distribution
102 | 
103 | *Scenario:*
104 | [horizontal]
105 | Sample:: SampleA
106 | Read length:: 100bp
107 | TransPi mode:: --all
108 | Kmers:: 25,41,57,67
109 | Reads PATH:: /YOUR/PATH/HERE/SampleA/*_R[1,2].fastq.gz
110 | Output directory:: Results_SampleA
111 | Engine:: conda
112 | All BUSCOs:: on
113 | BUSCO distribution:: on
114 | 
115 | *Command:*
116 | [source,bash]
117 | ----
118 | nextflow run TransPi.nf --all --maxReadLen 100 --k 25,35,55,75,85 \
119 |     --outdir Results_SampleA --reads '/YOUR/PATH/HERE/SampleA/*_R[1,2].fastq.gz' \
120 |     -profile conda --allBuscos --buscoDist
121 | ----
122 | 
123 | == `--onlyEvi`
124 | 
125 | *Scenario:*
126 | [horizontal]
127 | Sample:: Assemblies from multiple assemblers and kmers
128 | Read length:: 50bp
129 | TransPi mode:: --onlyEvi
130 | Kmers:: 25,33,37
131 | Reads PATH:: /YOUR/PATH/HERE/*_R[1,2].fastq.gz
132 | Output directory:: Reduction_results
133 | Engine:: conda
134 | 
135 | *Command:*
136 | [source,bash]
137 | ----
138 | nextflow run TransPi.nf --onlyEvi --outdir Reduction_results \
139 |     -profile conda
140 | ----
141 | 
142 | .NOTES
143 | ****
144 | - A directory named `onlyEvi` is needed for this option with the transcriptome to perform the reduction.
145 | 
146 | TIP: You can do multiple transcriptomes at the same time. Each file should have a unique name.
147 | 
148 | - No need to specify reads PATH, length, cutoff, and kmers when using the `--onlyEvi`.
149 | 
150 | ****
151 | 
152 | == `--onlyAnn`
153 | 
154 | *Scenario:*
155 | [horizontal]
156 | Sample:: Transcriptome missing annotation
157 | Read length:: 100bp
158 | TransPi mode:: --onlyEvi
159 | Kmers:: 25,41,57,67
160 | Reads PATH:: /YOUR/PATH/HERE/*_R[1,2].fastq.gz
161 | Output directory:: Annotation_results
162 | Engine:: singularity
163 | Container:: TransPi container
164 | 
165 | *Command:*
166 | [source,bash]
167 | ----
168 | nextflow run TransPi.nf --onlyAnn --outdir Annotation_results \
169 |     -profile singularity,TransPiContainer
170 | ----
171 | 
172 | .NOTES
173 | ****
174 | - A directory named `onlyAnn` is needed for this option with the transcriptome to annotate.
175 | 
176 | TIP: You can do multiple transcriptomes (i.e. samples) at the same time. Each file should have a unique name.
177 | 
178 | - No need to specify reads PATH, length, cutoff, and kmers when using the `--onlyAnn`.
179 | 
180 | ****
181 | 
182 | == Skip options
183 | 
184 | *Scenario:*
185 | [horizontal]
186 | Sample:: Coral sample
187 | Read length:: 150bp
188 | TransPi mode:: --all
189 | Kmers:: 25,35,55,75,85
190 | Reads PATH:: /YOUR/PATH/HERE/*_R[1,2].fastq.gz
191 | Output directory:: Results_Acropora
192 | Work directory:: work_acropora
193 | Engine:: docker
194 | Container:: Individual containers
195 | Skip QC:: on
196 | Skip Filter:: on
197 | 
198 | *Command:*
199 | [source,bash]
200 | ----
201 | nextflow run TransPi.nf --all --maxReadLen 150 --k 25,35,55,75,85 \
202 |     --reads '/YOUR/PATH/HERE/*_R[1,2].fastq.gz' --outdir Results_Acropora \
203 |     -w work_acropora -profile docker \
204 |     --skipQC --skipFilter
205 | ----
206 | 
207 | == Extra annotation steps
208 | 
209 | *Scenario:*
210 | [horizontal]
211 | Sample:: Mollusc sample
212 | Read length:: 150bp
213 | TransPi mode:: --all
214 | Kmers:: 25,35,55,75,85
215 | Reads PATH:: /YOUR/PATH/HERE/*_R[1,2].fastq.gz
216 | Output directory:: Results
217 | Engine:: conda
218 | Skip QC:: on
219 | SignalP:: on
220 | TMHMM:: on
221 | RNAmmer:: on
222 | 
223 | 
224 | *Command:*
225 | [source,bash]
226 | ----
227 | nextflow run TransPi.nf --all --maxReadLen 150 --k 25,35,55,75,85 \
228 |     --reads '/YOUR/PATH/HERE/*_R[1,2].fastq.gz' --outdir Results \
229 |     -profile conda --skipQC --withSignalP --withTMHMM --withRnammer
230 | ----
231 | 
232 | .NOTE
233 | ****
234 | - This option requires manual installation of the CBS-DTU tools: signalP, tmhmm, and rnammer.
235 | 
236 | - For more info visit https://services.healthtech.dtu.dk/software.php[CBS-DTU tools]
237 | 
238 | - It also assumes that the `PATH` for all the tools are in the `nextflow.config` file.
239 | 
240 | ****
241 | 
242 | 
243 | == Full run and extra annotation
244 | 
245 | *Scenario:*
246 | [horizontal]
247 | Sample:: Coral sample
248 | Read length:: 150bp
249 | TransPi mode:: --all
250 | Kmers:: 25,35,55,75,85
251 | Reads PATH:: /YOUR/PATH/HERE/*_R[1,2].fastq.gz
252 | Output directory:: Results
253 | Engine:: conda
254 | Skip QC:: on
255 | SignalP:: on
256 | TMHMM:: on
257 | RNAmmer:: on
258 | Filter species:: on
259 | host:: scleractinian proteins
260 | symbiont:: symbiodinium proteins
261 | All BUSCOs:: on
262 | BUSCO distribution:: on
263 | Remove rRNA:: on
264 | rRNA database:: /YOUR/PATH/HERE/silva_rRNA_file.fasta
265 | 
266 | *Command:*
267 | [source,bash]
268 | ----
269 | nextflow run TransPi.nf --all --maxReadLen 150 --k 25,35,55,75,85 \
270 |     --reads '/YOUR/PATH/HERE/*_R[1,2].fastq.gz' --outdir Results \
271 |     -profile conda --skipQC --withSignalP --withTMHMM --withRnammer \
272 |     --host /YOUR/PATH/HERE/uniprot-Scleractinia.fasta \
273 |     --symbiont /YOUR/PATH/HERE/uniprot-symbiodinium.fasta
274 |     --allBuscos --buscoDist --rRNAfilter \
275 |     --rRNAdb "/YOUR/PATH/HERE/silva_rRNA_file.fasta"
276 | ----
277 | 


--------------------------------------------------------------------------------
/template.nextflow.config:
--------------------------------------------------------------------------------
  1 | /*
  2 | ================================================================================================
  3 |                                     Config File TransPi
  4 | ================================================================================================
  5 |                             Transcriptome Analysis Pipeline
  6 |                             Author: Ramón E. Rivera-Vicéns
  7 |                             GitHub: rivera10
  8 | ----------------------------------------------------------------------------------------
  9 | */
 10 | 
 11 | params {
 12 | 
 13 |     // -------------------------  EDIT below variables (mandatory)  ------------------------- //
 14 |     // --------------------- Can also be specified in the command line ---------------------- //
 15 | 
 16 |         // Modify this accordingly (if needed)
 17 |         // kmers list (depends on read length!)
 18 |             k=""
 19 | 
 20 |         // SOAP config file generator
 21 |             //#maximal read length
 22 |                 maxReadLen=""
 23 |             //[LIB]
 24 |             //#maximal read length in this lib
 25 |                 rd_len_cutof="${params.maxReadLen}"
 26 | 
 27 |             // Other options if needed. Leave defaults if unsure.
 28 |             //#average insert size
 29 |             //avg_ins="200"
 30 |             //#if sequence needs to be reversed
 31 |                 reverse_seq="0"
 32 |             //#in which part(s) the reads are used
 33 |                 asm_flags="3"
 34 |             //#minimum aligned length to contigs for a reliable read location (at least 32 for short insert size)
 35 |                 map_len="32"
 36 | 
 37 |     // --------------------------  EDIT below variables if needed  -------------------------- //
 38 | 
 39 |     // Directory for results
 40 |         outdir="results"
 41 | 
 42 |     // Directory for trace files
 43 |         tracedir="pipeline_info"
 44 | 
 45 |     // PATH for rnammer, tmhmm, signalp programs. Requires licenses. See CBS-DTU tools for information.
 46 |         // RNAmmer
 47 |             rnam = ""
 48 |         // Tmhmm
 49 |             tmhmm = ""
 50 |         // SignalP
 51 |             signalp = ""
 52 | 
 53 |     /*
 54 |     // ------------------------------------------------     STOP     ------------------------------------------------ //
 55 | 
 56 |         Most of these values below are filled by the precheck script (e.g. PATH to databases or conda installation).
 57 |         However, if you run the precheck for a container you will not have all these PATHs assigned (e.g. conda PATH).
 58 |         Run the precehck again but selecting conda instead of containers if that is the case.
 59 | 
 60 | 
 61 |         For other options (e.g. filtering, buscoDist, etc.) is recommended to call them from the command line.
 62 | 
 63 | 
 64 |         Proceed to the end of this config file to modify the processes CPUs and RAM with the specs of your system.
 65 |         Also to modify the profiles if you use a scheduler manager like SLURM or PBS.
 66 | 
 67 | 
 68 |         More info at the TransPi repository (https://github.com/PalMuc/TransPi) and
 69 |         manual (https://palmuc.github.io/TransPi/).
 70 | 
 71 | 
 72 |     // -------------------------------------------------------------------------------------------------------------- //
 73 |     */
 74 | 
 75 |     // PATH to TransPi DBs installation
 76 |         pipeInstall
 77 | 
 78 |     // Uniprot database PATH
 79 |         uniprot
 80 |         uniname
 81 | 
 82 |     //BUSCO database
 83 |         busco4db
 84 | 
 85 |     //PFAM file location
 86 |         pfloc
 87 | 
 88 |     //name of pfam file
 89 |         pfname
 90 | 
 91 |     //Trinotate sqlite created when installing Trinotate
 92 |         Tsql
 93 | 
 94 |     // Directory for reads
 95 |         reads=""
 96 | 
 97 |     // Pipeline options
 98 |         help = false
 99 |         fullHelp = false
100 | 
101 |     // Full analysis
102 |         all = false
103 | 
104 |     // Only Evidential Gene run (one sample per run)
105 |         onlyEvi = false
106 | 
107 |     // Only annotation analysis
108 |         onlyAnn = false
109 | 
110 |     // Only Assemblies and Evidential Gene
111 |         onlyAsm = false
112 | 
113 |     // Skip quality control
114 |         skipQC = false
115 | 
116 |     // Skip fastp quality filter step
117 |         skipFilter = false
118 |     // Minimum reads quality for filtering in fastp
119 |         minQual="5"
120 | 
121 |     // Filter rRNA
122 |         rRNAfilter = false
123 |         // rRNA database
124 |         rRNAdb = ""
125 | 
126 |     // Skip normalization of reads
127 |         skipNormalization = false
128 |         // Normalization parameters
129 |         normMaxCov=100
130 |         normMinCov=1
131 | 
132 |     // Save reads from filtering and normalization
133 |         saveReads = false
134 | 
135 |     // Save bam file from mapping step
136 |         saveBam = false
137 | 
138 |     // Filter Species using psytrans
139 |         filterSpecies = false
140 |     // Psytrans value to train model
141 |         psyval=160
142 |     // Host Sequence
143 |         host=""
144 |     // Symbiont Sequence
145 |         symbiont=""
146 | 
147 |     // Run BUSCO in all assemblies
148 |         allBuscos = false
149 | 
150 |     // BUSCO distribution analysis (this option needs to be run together with the allBuscos option)
151 |         // Generate the analysis
152 |         buscoDist = false
153 |         // Mininmum percentage of assemblers require to rescue a BUSCO sequence
154 |         minPerc="0.7"
155 | 
156 |     //short Transdecoder, no homlogy search (PFAM and UniProt)
157 |         shortTransdecoder = false
158 |     //Transdecoder genetic code
159 |         genCode="Universal"
160 | 
161 |     // Annotation options
162 |     // SignalP
163 |         withSignalP = false
164 |     // tmHMM
165 |         withTMHMM = false
166 |     // rnammer
167 |         withRnammer = false
168 |     // Add annotation to file
169 |         addAnnotation = false
170 | 
171 |     //Test data
172 |         readsTest = false
173 | 
174 |     // Skip Evidential Gene for onlyAsm option
175 |         skipEvi = false
176 | 
177 |     // Kegg pathway search
178 |         withKegg = false
179 | 
180 |     // Skip Report
181 |         skipReport = false
182 | 
183 |     // These options will change how the profiles work.
184 |         // Run with conda installed by the precheck
185 |             //next 2 parameters are outdated
186 |                 myConda = false
187 |                 myCondaInstall=""
188 | 
189 |             condaActivate = false
190 | 
191 |         // TransPi container with all programs
192 |             oneContainer = false
193 | 
194 |         // Cache directory for conda and singularity files. Leave in blank if not sure
195 |             envCacheDir = ""
196 | 
197 |         // Singularity
198 |         // Use singularity image created after pulling from docker and not from Galaxy depot (singularity image ready to use).
199 |             singularity_pull_docker_container = false
200 | 
201 |     // Get software versions - only works with local conda installation and TransPi container.
202 |         skipGetRunInfo = false
203 | }
204 | 
205 | /*
206 | // ------------------------------------------------     NOTE     ------------------------------------------------ //
207 | 
208 | 
209 |     Proceed to modify the processes CPUs and RAM with the specs of your system.
210 |     Also to modify the profiles if you use a scheduler manager like SLURM or PBS.
211 | 
212 | 
213 |     More info at the TransPi repository (https://github.com/PalMuc/TransPi) and
214 |     manual (https://palmuc.github.io/TransPi/).
215 | 
216 |     Also see Nextflow documentation (https://www.nextflow.io/docs/latest/index.html).
217 | 
218 | 
219 | // -------------------------------------------------------------------------------------------------------------- //
220 | */
221 | 
222 | process {
223 |     cpus='1'
224 |     memory='5 GB'
225 |     withLabel: big_cpus {
226 |         cpus='20'
227 |         memory='15 GB'
228 |     }
229 |     withLabel: med_cpus {
230 |         cpus='8'
231 |         memory='15 GB'
232 |     }
233 |     withLabel: low_cpus {
234 |         cpus='4'
235 |         memory='15 GB'
236 |     }
237 |     withLabel: exlow_cpus {
238 |         cpus='1'
239 |         memory='2 GB'
240 |     }
241 |     withLabel: big_mem {
242 |         cpus='20'
243 |         memory='350 GB'
244 |     }
245 |     withLabel: med_mem {
246 |         cpus='15'
247 |         memory={ 100.Gb + (task.attempt * 50.Gb)}
248 |         errorStrategy={ task.exitStatus in 137..140 ? 'retry' : 'finish' }
249 |         maxRetries = 2
250 |     }
251 |     withLabel: low_mem {
252 |         cpus='20'
253 |         memory='80 GB'
254 |     }
255 |     errorStrategy='finish'
256 | }
257 | 
258 | // env Evidential Gene variable (only for nextflow)
259 | env.evi="${projectDir}/scripts/evigene"
260 | 
261 | // Get PATH for cache environments
262 | params.localCacheDir = (params.envCacheDir ? "${params.envCacheDir}" : "${launchDir}")
263 | 
264 | profiles {
265 |     conda {
266 |         params.condaActivate = true
267 |         params.localConda="${params.myCondaInstall}"
268 |         // cache for condaEnv created individually
269 |         conda.cacheDir = "${params.localCacheDir}/condaEnv/"
270 |     }
271 |     docker {
272 |         docker.enabled = true
273 |         docker.runOptions = "-u \$(id -u):\$(id -g) -v ${params.pipeInstall}:${params.pipeInstall}"
274 |         // --mount type=bind,src=${params.pipeInstall},dst=/dockerDB"
275 |     }
276 |     singularity {
277 |         singularity.enabled = true
278 |         singularity.autoMounts = true
279 |         // cache for images from docker pull
280 |         singularity.cacheDir="${params.localCacheDir}/singularityCache/"
281 |     }
282 |     test {
283 |         includeConfig 'conf/test.config'
284 |     }
285 |     TransPiContainer {
286 |         process {
287 |             params.oneContainer = true
288 |             params.TPcontainer="rerv/transpi:v1.0.0"
289 |         }
290 |     }
291 |     palmuc {
292 |         process {
293 |             executor='slurm'
294 |             clusterOptions='-p lemmium --qos=low'
295 |         }
296 |     }
297 | }
298 | 
299 | executor {
300 |   $slurm {
301 |     queueSize=100
302 |   }
303 | }
304 | 
305 | timeline {
306 |   enabled = true
307 |   file = "${params.outdir}/${params.tracedir}/transpi_timeline.html"
308 | }
309 | report {
310 |   enabled = true
311 |   file = "${params.outdir}/${params.tracedir}/transpi_report.html"
312 | }
313 | trace {
314 |   enabled = true
315 |   file = "${params.outdir}/${params.tracedir}/transpi_trace.txt"
316 | }
317 | dag {
318 |   enabled = true
319 |   file = "${params.outdir}/${params.tracedir}/transpi_dag.html"
320 | }
321 | 
322 | manifest {
323 |     name = 'TransPi'
324 |     author = 'Ramón E. Rivera-Vicéns'
325 |     description = 'Transcriptome Analysis Pipeline'
326 |     mainScript = 'TransPi.nf'
327 |     nextflowVersion = '>=21.04.1'
328 |     version = '1.3.0-rc'
329 | }
330 | 


--------------------------------------------------------------------------------
/bin/TransPi_Report_Ind.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "TransPi Report: `r commandArgs(trailingOnly=T)[1]`"
  3 | output:
  4 |   html_document: default
  5 |   pdf_document: default
  6 | date: "Generated on: `r Sys.time()`"
  7 | params:
  8 |   interactive: yes
  9 |   sample_id: !r commandArgs(trailingOnly=T)[1]
 10 |   skipFilter: !r commandArgs(trailingOnly=T)[2]
 11 |   skipNormalization: !r commandArgs(trailingOnly=T)[3]
 12 |   rRNAfilter: !r commandArgs(trailingOnly=T)[4]
 13 |   buscoDist: !r commandArgs(trailingOnly=T)[5]
 14 |   allBuscos: !r commandArgs(trailingOnly=T)[6]
 15 |   withKegg: !r commandArgs(trailingOnly=T)[7]
 16 | ---
 17 | 
 18 | <style>
 19 | .rectangle {
 20 |   height: 37px;
 21 |   width: 100%;
 22 |   background-color: #407294;
 23 | }
 24 | </style>
 25 | 
 26 | ```{r setup, include=FALSE}
 27 | knitr::opts_chunk$set(echo = TRUE,
 28 |       message = FALSE,
 29 |       warning = FALSE,
 30 |       out.width="105%"
 31 |       )
 32 | ```
 33 | 
 34 | ```{r load_libraries, include=FALSE}
 35 | library(ggthemes)
 36 | library(ggplot2)
 37 | library(reshape2)
 38 | library(grid)
 39 | library(plotly)
 40 | library(knitr)
 41 | library(kableExtra) #install.packages("kableExtra")
 42 | library(rmarkdown)
 43 | mycol=c('#088da5','#73cdc8','#ff6f61','#7cb8df','#88b04b','#00a199','#6B5B95','#92A8D1','#b0e0e6','#ff7f50','#088d9b','#E15D44','#e19336')
 44 | ```
 45 | 
 46 | <br>
 47 | <br>
 48 | <div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;Reads Stats</h2></div>
 49 | <br>
 50 | **Input reads and filtering**
 51 | ```{r readstats_table, echo=FALSE}
 52 | if (params$skipFilter == "true") {
 53 |     writeLines("\n--------------------------------------------------------------\n")
 54 |     cat(readLines(list.files(pattern="filter_reads.txt")) , sep = '\n')
 55 |     writeLines("\n--------------------------------------------------------------\n")
 56 | } else {
 57 |     reads_stats=read.csv(paste(params$sample_id,sep="","_reads_stats.csv"))
 58 |     paged_table(reads_stats,options = list(rows.print = 10))
 59 | }
 60 | ```
 61 | <br>
 62 | <br>
 63 | <br>
 64 | **Reads mean quality before filtering**
 65 | ```{r qual_plot, echo=FALSE}
 66 | if (params$skipFilter == "true") {
 67 |     writeLines("\n--------------------------------------------------------------\n")
 68 |     cat(readLines(list.files(pattern="filter_reads.txt")) , sep = '\n')
 69 |     writeLines("\n--------------------------------------------------------------\n")
 70 | } else {
 71 |     rqual=read.csv(paste(params$sample_id,sep="","_reads_qual.csv"),header=FALSE)
 72 |     qp <- plot_ly(x=c(1:rqual[1,1]),y=as.numeric(rqual[2,]),type="scatter", mode = "lines", name="Read1", opacity = .8, line=list(color='#088da5'))
 73 |     qp <- qp %>% add_trace(x=c(1:rqual[3,1]), y=as.numeric(rqual[4,]), name="Read2", opacity = .8, line=list(color='#e19336'))
 74 |     qp <- qp %>% layout(xaxis=list(title="Base position"))
 75 |     qp <- qp %>% layout(yaxis=list(title="Mean quality", range = c(0,41)))
 76 |     qp <- qp %>% layout(legend = list(x=10,y=.5), hovermode = "x unified")
 77 |     qp <- qp %>% config(toImageButtonOptions=list(format='svg',filename='readsQC_before', height= 500, width= 800, scale= 1), displaylogo = FALSE)
 78 |     qp
 79 | }
 80 | ```
 81 | <br>
 82 | <br>
 83 | <br>
 84 | <br>
 85 | **Reads mean quality after filtering**
 86 | ```{r qual_plot2, echo=FALSE}
 87 | if (params$skipFilter == "true") {
 88 |     writeLines("\n--------------------------------------------------------------\n")
 89 |     cat(readLines(list.files(pattern="filter_reads.txt")) , sep = '\n')
 90 |     writeLines("\n--------------------------------------------------------------\n")
 91 | } else {
 92 |     qp2 <- plot_ly(x=c(1:rqual[5,1]),y=as.numeric(rqual[6,]),type="scatter", mode = "lines", name="Read1", opacity = .8, line=list(color='#088da5'))
 93 |     qp2 <- qp2 %>% add_trace(x=c(1:rqual[7,1]),y=as.numeric(rqual[8,]), name="Read2", opacity = .8, line=list(color='#e19336'))
 94 |     qp2 <- qp2 %>% layout(xaxis=list(title="Base position"))
 95 |     qp2 <- qp2 %>% layout(yaxis=list(title="Mean quality", range = c(0,41)))
 96 |     qp2 <- qp2 %>% layout(legend = list(x=10,y=.5), hovermode = "x unified")
 97 |     qp2 <- qp2 %>% config(toImageButtonOptions=list(format='svg',filename='readsQC_after', height= 500, width= 800, scale= 1), displaylogo = FALSE)
 98 |     qp2
 99 | }
100 | ```
101 | <br>
102 | <br>
103 | <br>
104 | <br>
105 | **rRNA removal**
106 | ```{r rrna_remove, echo=FALSE}
107 | if (params$rRNAfilter == "true") {
108 |     cat(readLines(list.files(pattern="*_remove_rRNA.log")) , sep = '\n')
109 | } else {
110 |     writeLines("\n--------------------------------------------------------------\n")
111 |     cat(readLines(list.files(pattern="rrna_removal.txt")) , sep = '\n')
112 |     writeLines("\n--------------------------------------------------------------\n")
113 | }
114 | ```
115 | <br>
116 | <br>
117 | <br>
118 | <br>
119 | **Normalization**
120 | ```{r norm_plot, echo=FALSE}
121 | if (params$skipNormalization == "true") {
122 |     writeLines("\n--------------------------------------------------------------\n")
123 |     cat(readLines(list.files(pattern="norm_reads.txt")) , sep = '\n')
124 |     writeLines("\n--------------------------------------------------------------\n")
125 | } else {
126 |     cat(readLines(list.files(pattern="*_normStats.txt")) , sep = '\n')
127 | }
128 | ```
129 | 
130 | <br>
131 | <br>
132 | <br>
133 | 
134 | <div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;Assemblies Stats</h2></div>
135 | 
136 | <br>
137 | 
138 | **Number of transcripts before Evidential Genes**
139 | ```{r pre_EG_table, echo=FALSE}
140 | trans_preEG=read.csv(paste(params$sample_id,sep="","_sum_preEG.csv"))
141 | paged_table(trans_preEG,options = list(rows.print = 10))
142 | ```
143 | 
144 | <br>
145 | <br>
146 | <br>
147 | 
148 | **Number of transcripts after Evidential Genes**
149 | ```{r EG_table, echo=FALSE, results='asis'}
150 | trans_EG=read.csv(paste(params$sample_id,sep="","_sum_EG.csv"))
151 | paged_table(trans_EG,options = list(rows.print = 10))
152 | ```
153 | 
154 | <br>
155 | <br>
156 | <br>
157 | 
158 | **Plot before and after Evidential Gene**
159 | ```{bash EG_final, include=FALSE}
160 | cat *_preEG.csv >preEG.csv
161 | cat *_EG.csv >EG.csv
162 | head -n1 EG.csv | tr "," " " >headers.tmp
163 | tail -n1 preEG.csv | tr "," " " >preEG.tmp
164 | tail -n1 EG.csv | tr "," " " >EG.tmp
165 | cat headers.tmp preEG.tmp EG.tmp >EG_plot
166 | awk '{
167 |     for (f = 1; f <= NF; f++) { a[NR, f] = $f }
168 |     }
169 |     NF > nf { nf = NF }
170 |     END {
171 |         for (f = 1; f <= nf; f++) {
172 |             for (r = 1; r <= NR; r++) {
173 |                 printf a[r, f] (r==NR ? RS : FS)
174 |             }
175 |         }
176 |     }' EG_plot | tr " " "," >EG_plot.csv
177 | rm *.tmp
178 | ```
179 | ```{r EG_plot, echo=FALSE, results='asis'}
180 | eg=read.csv("EG_plot.csv",header=FALSE)
181 | peg <- plot_ly(eg, x = ~V1, y = ~V2, type = 'bar', name = 'Before', marker=list(color='#088da5', line=list(color = 'black', width = 1)), opacity=0.8,
182 |                hovertemplate = paste('Program: %{x}','<br>Number of transcripts: %{y}','<extra></extra>'))
183 | peg <- peg %>% add_trace(y = ~V3, name = 'After', marker=list(color='#e19336'), opacity = 0.8)
184 | peg <- peg %>% layout(yaxis = list(title = 'Number of transcripts'), barmode = 'group')
185 | peg <- peg %>% layout(xaxis = list(title = '',categoryorder = "array",categoryarray = ~V1))
186 | peg <- peg %>% layout(legend = list(x=10,y=.5))
187 | peg <- peg %>% config(toImageButtonOptions=list(format='svg',filename='evigene_plot', height= 500, width= 800, scale= 1), displaylogo = FALSE)
188 | peg
189 | ```
190 | 
191 | <br>
192 | <br>
193 | <br>
194 | <br>
195 | 
196 | **Transcript length distribution**
197 | ```{r trans_len_plot, echo=FALSE, results='asis'}
198 | tlen=read.delim(paste(params$sample_id,sep="","_sizes.txt"), sep = "\t", header = FALSE)
199 | 
200 | gg=ggplot(tlen,aes(x=tlen$V2))+
201 |     coord_flip()+theme_bw()+
202 |     geom_histogram(aes(y = ..count..), binwidth = 100, colour = "#1F3552", fill = "#96ceba")+
203 |     #stat_bin(geom = "text", aes(label=..count..),binwidth = 100, hjust=-.8)+
204 |     scale_x_continuous(name = "Transcripts sizes (100bp incrememnt)",breaks = seq(0, 2500, 100),limits=c(150, 2550))+
205 |     scale_y_continuous(name = "Number of transcripts")
206 | 
207 | tlp <- plot_ly(tlen, x=~V2, type="histogram", xbins=list(start='200',end='2500', size= '100'), marker=list(color='#088da5', line=list(color = 'black',
208 |                width = 1)), hovertemplate = paste('Size range: %{x}','<br>Number of transcripts: %{y}','<extra></extra>'), opacity = 0.7)
209 | tlp <- tlp %>% layout(yaxis = list(title = "Number of transcripts"))
210 | tlp <- tlp %>% layout(xaxis = list(title = "Transcripts sizes (100bp incrememnt)"))
211 | tlp <- tlp %>% config(toImageButtonOptions=list(format='svg',filename='transcript_distribution', height= 500, width= 800, scale= 1), displaylogo = FALSE)
212 | tlp
213 | ```
214 | <br>
215 | <br>
216 | <br>
217 | 
218 | **rnaQUAST**
219 | ```{r rna_quast, echo=FALSE, results='asis'}
220 | paged_table(read.csv(paste(params$sample_id,sep="","_rnaQUAST.csv")),options = list(rows.print = 10))
221 | ```
222 | 
223 | <br>
224 | <br>
225 | <br>
226 | 
227 | **Mapping reads to EviGene results**
228 | 
229 | ```{bash mapping_stats_evi, echo=FALSE, results='asis'}
230 | cat *.combined.okay.fa.txt | grep "overall alignment rate"
231 | ```
232 | 
233 | <br>
234 | <br>
235 | <br>
236 | 
237 | **Mapping reads to Trinity results**
238 | 
239 | ```{bash mapping_stats_tri, echo=FALSE, results='asis'}
240 | cat *.Trinity.fa.txt | grep "overall alignment rate"
241 | ```
242 | 
243 | <br>
244 | <br>
245 | <br>
246 | <br>
247 | <br>
248 | 
249 | <div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;BUSCO</h2></div>
250 | 
251 | <br>
252 | 
253 | ### - Using BUSCO V4
254 | 
255 | <br>
256 | <br>
257 | 
258 | ```{r busco4_plot, echo=FALSE}
259 | bus=read.csv(paste(params$sample_id,sep="","_busco4.csv"),header=FALSE)
260 | MYSPEC=as.character(t(bus[1,]))
261 | MYPERC=as.numeric(t(bus[2,]))
262 | MYVAL=as.numeric(t(bus[3,]))
263 | ######################################
264 | 
265 | # Edit from the orginal BUSCO plot script
266 | 
267 | ######################################
268 | #
269 | # BUSCO summary figure
270 | # @version 3.0.0
271 | # @since BUSCO 2.0.0
272 | #
273 | # Copyright (c) 2016-2017, Evgeny Zdobnov (ez@ezlab.org)
274 | # Licensed under the MIT license. See LICENSE.md file.
275 | #
276 | ######################################
277 | my_colors <- c("#0e9aa7", "#96ceba", "#ffcc5c", "#ff6f69")
278 | # Bar height ratio
279 | my_bar_height <- 0.55
280 | # Legend
281 | my_title <- "BUSCO Assessment Results - TransPi vs Trinity"
282 | # Font
283 | my_family <- "sans"
284 | my_size_ratio <- 1
285 | species <- c(MYSPEC)
286 | species <- factor(species)
287 | species <- factor(species,levels(species)[c(length(levels(species)):1)]) # reorder your species here just by changing the values in the vector :
288 | percentage <- c(MYPERC)
289 | values <- c(MYVAL)
290 | ######################################
291 | # Code to produce the graph
292 | labsize = 1
293 | if (length(levels(species)) > 10){
294 |   labsize = 0.66
295 | }
296 | category <- c(rep(c("Single","Duplicated","Fragmented","Missing"),c(1)))
297 | category <-factor(category)
298 | #category = factor(category,levels(category)[c(4,1,2,3)])
299 | category = factor(category,levels(category)[(c(4,1,2,3))])
300 | df = data.frame(species,percentage,values,category)
301 | figure <- ggplot()+
302 |   geom_bar(aes(y = percentage, x = species, fill = category), data = df, stat="identity", width=my_bar_height,position = position_stack(reverse=TRUE)) +
303 |   coord_flip() + theme_gray(base_size = 8) + scale_y_continuous(labels = c("0","20","40","60","80","100"), breaks = c(0,20,40,60,80,100))+
304 |   theme_gray(base_size = 8) + scale_y_continuous(labels = c("0","20","40","60","80","100"), breaks = c(0,20,40,60,80,100)) +
305 |   scale_fill_manual(values = my_colors,labels =c(" Complete (C) and single-copy (S)  ", " Complete (C) and duplicated (D)",
306 |                                                  " Fragmented (F)  ", " Missing (M)")) +
307 |   xlab("") + ylab("\n%BUSCOs") +
308 |   theme(plot.title = element_text(family=my_family, colour = "black", size = rel(2.2)*my_size_ratio, face = "bold")) +
309 |   theme(legend.position="top",legend.title = element_blank()) +
310 |   theme(legend.text = element_text(family=my_family, size = rel(1.2)*my_size_ratio)) +
311 |   theme(panel.background = element_rect(color="#FFFFFF", fill="white")) +
312 |   theme(panel.grid.minor = element_blank()) +
313 |   theme(panel.grid.major = element_blank()) +
314 |   theme(axis.text.y = element_text(family=my_family, colour = "black", size = rel(1.66)*my_size_ratio,face="italic")) +
315 |   theme(axis.text.x = element_text(family=my_family, colour = "black", size = rel(1.66)*my_size_ratio)) +
316 |   theme(axis.line = element_line(size=1*my_size_ratio, colour = "black")) +
317 |   theme(axis.ticks.length = unit(.85, "cm")) +
318 |   theme(axis.ticks.y = element_line(colour="white", size = 0)) +
319 |   theme(axis.ticks.x = element_line(colour="#222222")) +
320 |   theme(axis.ticks.length = unit(0.4, "cm")) +
321 |   theme(axis.title.x = element_text(family=my_family, size=rel(1.2)*my_size_ratio)) +
322 |   guides(fill = guide_legend(override.aes = list(colour = NULL))) +
323 |   guides(fill=guide_legend(nrow=2,byrow=TRUE))
324 | for(i in rev(c(1:length(levels(species))))){
325 |   detailed_values <- values[species==species[species==levels(species)[i]]]
326 |   total_buscos <- sum(detailed_values)
327 |   figure <- figure +
328 |     annotate("text", label=paste("C:", detailed_values[1] + detailed_values[2], " [S:", detailed_values[1], ", D:", detailed_values[2], "], F:", detailed_values[3], ", M:", detailed_values[4], ", n:", total_buscos, sep=" "),
329 |              y=30, x = i, size = labsize*3*my_size_ratio, colour = "black", hjust=0, family=my_family)
330 | }
331 | bp <-ggplotly(figure)
332 | bp <- bp %>% layout(title = "BUSCO Results - TransPi vs Trinity")
333 | bp <- bp %>% layout(legend = list(x=10,y=.5))
334 | bp <- bp %>% config(toImageButtonOptions=list(format='svg',filename='busco4_plot', height= 500, width= 800, scale= 1), displaylogo = FALSE)
335 | bp
336 | ```
337 | 
338 | <br>
339 | <br>
340 | <br>
341 | <br>
342 | 
343 | ### - BUSCO Distribution
344 | 
345 | <br>
346 | 
347 | #### BUSCO V4
348 | 
349 | <br>
350 | 
351 | **Missing BUSCO distribution**
352 | ```{r busco4_dist_mis, echo=FALSE}
353 | if (params$buscoDist == "true" && params$allBuscos == "true") {
354 |     system("cat *_missing_BUSCO4_table.tsv | sed -e 's/Missing/0/g' -e 's/Fragmented/1/g' -e 's/Duplicated/2/g' -e 's/Complete/3/g' >BUSCO4_missing_table.tsv")
355 |     writeLines("\t\t\t\tHEATMAP")
356 |     csv=read.csv("BUSCO4_missing_table.tsv", header=TRUE, sep="\t")
357 |     c=melt(csv,id.vars = 'Busco.ID')
358 |     dec=c(0,.25,.25,.50,.50,.75,.75,1)
359 |     my_colors <- c("#081D58","#081D58", "#2280B8","#2280B8", "#99D6B9", "#99D6B9","#f8f9fc","#f8f9fc")
360 |     colz <- setNames(data.frame(dec, my_colors), NULL)
361 |     fig <- plot_ly(c,x=~variable, y=~Busco.ID, z=~value, colorscale=colz, reversescale=T, type = "heatmap",
362 |                   colorbar=list(tickmode='array', tickvals=c(.35,1.1,1.87,2.60), thickness=30,
363 |                   ticktext= c("Missing","Fragmented","Duplicated","Complete"), len=0.4))
364 |     fig <- fig %>% layout(xaxis=list(title="", showline = TRUE, mirror = TRUE, tickfont=list(size=8)),
365 |                   yaxis=list(title="BUSCO ID", tickmode="auto", nticks=length(csv$Busco.ID),
366 |                   tickfont=list(size=6), showline = TRUE, mirror = TRUE))
367 |     fig <- fig %>% config(toImageButtonOptions=list(format='svg',filename='busco4_TransPi_missing_distribution_plot', height= 500, width= 800, scale= 1), displaylogo = FALSE)
368 |     fig
369 | } else {
370 |     writeLines("\n--------------------------------------------------------------\n")
371 |     cat(readLines(list.files(pattern="busco4_dist.txt")) , sep = '\n')
372 |     writeLines("\n--------------------------------------------------------------\n")
373 | }
374 | ```
375 | 
376 | <br>
377 | <br>
378 | <br>
379 | <br>
380 | <br>
381 | 
382 | <div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;ORFs</h2></div>
383 | 
384 | <br>
385 | 
386 | **Summary of the Transdecoder run**
387 | 
388 | ```{r trandecoder_table, echo=FALSE, results='asis'}
389 | paged_table(read.csv(paste(params$sample_id,sep="","_transdecoder.csv")),options = list(rows.print = 10))
390 | ```
391 | 
392 | <br>
393 | <br>
394 | <br>
395 | <br>
396 | <br>
397 | <br>
398 | 
399 | <div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;Gene Ontologies</h2></div>
400 | 
401 | <br>
402 | <br>
403 | 
404 | ```{r go_plots1, echo=FALSE}
405 | #go_sample_name=params$data
406 | dataCC=read.csv(paste(params$sample_id,sep="","_GO_cellular.csv"), header = F)
407 | pcel <- plot_ly(dataCC,x=~V1,y=~V2, text = ~V1, textposition = 'outside', marker = list(color = '#ff8f66',line = list(color = '#08306B', width = 1.5)), opacity = 0.8,
408 |                 hovertemplate = paste('Category: %{y}','<br>Number of GOs: %{x}','<extra></extra>'))
409 | pcel <- pcel %>% layout(yaxis=list(showticklabels=TRUE))
410 | pcel <- pcel %>% layout(yaxis=list(autorange="reversed"))
411 | pcel <- pcel %>% layout(yaxis = list(title = '',categoryorder = "array",categoryarray = ~V2))
412 | pcel <- pcel %>% layout(xaxis = list(title = 'Number of sequences'))
413 | pcel <- pcel %>% layout(font = list(size=10))
414 | pcel <- pcel %>% layout(title = list(text='Cellular Component GOs',x=.7))
415 | pcel <- pcel %>% config(toImageButtonOptions=list(format='svg',filename='GO_cellular_plot', height= 500, width= 800, scale= 1), displaylogo = FALSE)
416 | pcel
417 | ```
418 | <br>
419 | <br>
420 | <br>
421 | <br>
422 | 
423 | ```{r go_plots2, echo=FALSE}
424 | dataMF=read.csv(paste(params$sample_id,sep="","_GO_molecular.csv"), header = F)
425 | pmol <- plot_ly(dataMF,x=~V1,y=~V2, text = ~V1, textposition = 'outside', marker = list(color = '#b0e0e6',line = list(color = '#08306B', width = 1.5)), opacity = 0.8,
426 |                 hovertemplate = paste('Category: %{y}','<br>Number of GOs: %{x}','<extra></extra>'))
427 | pmol <- pmol %>% layout(yaxis=list(showticklabels=TRUE))
428 | pmol <- pmol %>% layout(yaxis=list(autorange="reversed"))
429 | pmol <- pmol %>% layout(yaxis = list(title = '',categoryorder = "array",categoryarray = ~V2))
430 | pmol <- pmol %>% layout(xaxis = list(title = 'Number of sequences'))
431 | pmol <- pmol %>% layout(font = list(size=10))
432 | pmol <- pmol %>% layout(title = list(text='Molecular Function GOs',x=.7))
433 | pmol <- pmol %>% config(toImageButtonOptions=list(format='svg',filename='GO_molecular_plot', height= 500, width= 800, scale= 1), displaylogo = FALSE)
434 | pmol
435 | ```
436 | <br>
437 | <br>
438 | <br>
439 | <br>
440 | 
441 | ```{r go_plots3, echo=FALSE}
442 | dataBP=read.csv(paste(params$sample_id,sep="","_GO_biological.csv"), header = F)
443 | pbio <- plot_ly(dataBP,x=~V1,y=~V2, text = ~V1, textposition = 'outside', marker = list(color = '#88B04B',line = list(color = '#08306B', width = 1.5)), opacity = 0.8,
444 |                 hovertemplate = paste('Category: %{y}','<br>Number of GOs: %{x}','<extra></extra>'))
445 | pbio <- pbio %>% layout(yaxis=list(showticklabels=TRUE))
446 | pbio <- pbio %>% layout(yaxis=list(autorange="reversed"))
447 | pbio <- pbio %>% layout(yaxis = list(title = '',categoryorder = "array",categoryarray = ~V2))
448 | pbio <- pbio %>% layout(xaxis = list(title = 'Number of sequences'))
449 | pbio <- pbio %>% layout(font = list(size=10))
450 | pbio <- pbio %>% layout(title = list(text='Biological Processes GOs',x=.7))
451 | pbio <- pbio %>% config(toImageButtonOptions=list(format='svg',filename='GO_biological_plot', height= 500, width= 800, scale= 1), displaylogo = FALSE)
452 | pbio
453 | ```
454 | <br>
455 | <br>
456 | <br>
457 | <br>
458 | 
459 | <div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;UniProt</h2></div>
460 | 
461 | <br>
462 | <br>
463 | 
464 | ```{r custom_uniprot_plot, echo=FALSE}
465 | dataUni=read.csv(paste(params$sample_id,sep="","_custom_uniprot_hits.csv"), header=F)
466 | p3 <- plot_ly(dataUni,x=~V1, y=~V2, text = ~V1, textposition = 'outside', marker = list(color = '#0e9aa7',line = list(color = '#08306B', width = 1.5)), opacity = 0.8,
467 |               hovertemplate = paste('Species: %{y}','<br>Number of hits: %{x}','<extra></extra>'))
468 | p3 <- p3 %>% layout(yaxis=list(showticklabels=TRUE))
469 | p3 <- p3 %>% layout(yaxis=list(autorange="reversed"))
470 | p3 <- p3 %>% layout(yaxis = list(title = '',categoryorder = "array",categoryarray = ~V2))
471 | p3 <- p3 %>% layout(xaxis = list(title = 'Number of sequences'))
472 | p3 <- p3 %>% layout(font = list(size=10))
473 | p3 <- p3 %>% layout(title = list(text='UniProt Species Hits',x=.7))
474 | p3 <- p3 %>% config(toImageButtonOptions=list(format='svg',filename='custom_uniprot_plot', height= 500, width= 800, scale= 1), displaylogo = FALSE)
475 | p3
476 | ```
477 | 
478 | <br>
479 | <br>
480 | <br>
481 | <br>
482 | 
483 | <div class="rectangle"><h2 style="color:white">&nbsp;&nbsp;KEGG Pathways</h2></div>
484 | 
485 | <br>
486 | <br>
487 | 
488 | ```{r pathways_plot, echo=FALSE}
489 | if (params$withKegg == "false") {
490 |     writeLines("\n--------------------------------------------------------------\n")
491 |     writeLines("\t\t\t\tKEGG analysis was skipped")
492 |     writeLines("\n--------------------------------------------------------------\n")
493 | } else {
494 |     writeLines("\t\t\t\tPathways from annotation and iPATH")
495 |     knitr::include_graphics(paste(params$sample_id,sep="","_kegg.svg"))
496 | }
497 | ```
498 | 
499 | <br>
500 | <br>
501 | <br>
502 | <br>
503 | <br>
504 | <br>
505 | <br>
506 | <br>
507 | <br>
508 | <br>
509 | 


--------------------------------------------------------------------------------
/conf/busV4list.txt:
--------------------------------------------------------------------------------
  1 | ###BACTERIA
  2 | ##
  3 | Bacteria_(Kingdom)
  4 | ##
  5 | Acidobacteria_(Phylum)
  6 | Actinobacteria_(Phylum)
  7 | Bacteroidetes_(Phylum)
  8 | Chlamydiae_(Phylum)
  9 | Chlorobi_(Phylum)
 10 | Chloroflexi_(Phylum)
 11 | Cyanobacteria_(Phylum)
 12 | Firmicutes_(Phylum)
 13 | Fusobacteria_(Phylum)
 14 | Planctomycetes_(Phylum)
 15 | Proteobacteria_(Phylum)
 16 | Spirochaetes_(Phylum)
 17 | Synergistetes_(Phylum)
 18 | Tenericutes_(Phylum)
 19 | Thermotogae_(Phylum)
 20 | Verrucomicrobia_(Phylum)
 21 | ##
 22 | Actinobacteria_(Class)
 23 | Alphaproteobacteria_(Class)
 24 | Aquificae_(Class)
 25 | Bacilli_(Class)
 26 | Bacteroidia_(Class)
 27 | Betaproteobacteria_(Class)
 28 | Clostridia_(Class)
 29 | Coriobacteriia_(Class)
 30 | Cytophagia_(Class)
 31 | Deltaproteobacteria_(Class)
 32 | Epsilonproteobacteria_(Class)
 33 | Flavobacteriia_(Class)
 34 | Gammaproteobacteria_(Class)
 35 | Mollicutes_(Class)
 36 | Sphingobacteriia_(Class)
 37 | Spirochaetia_(Class)
 38 | Tissierellia_(Class)
 39 | ##
 40 | Alteromonadales_(Order)
 41 | Bacillales_(Order)
 42 | Bacteroidales_(Order)
 43 | Burkholderiales_(Order)
 44 | Campylobacterales_(Order)
 45 | Cellvibrionales_(Order)
 46 | Chromatiales_(Order)
 47 | Chroococcales_(Order)
 48 | Clostridiales_(Order)
 49 | Coriobacteriales_(Order)
 50 | Corynebacteriales_(Order)
 51 | Cytophagales_(Order)
 52 | Desulfobacterales_(Order)
 53 | Desulfovibrionales_(Order)
 54 | Desulfuromonadales_(Order)
 55 | Enterobacterales_(Order)
 56 | Entomoplasmatales_(Order)
 57 | Flavobacteriales_(Order)
 58 | Fusobacteriales_(Order)
 59 | Lactobacillales_(Order)
 60 | Legionellales_(Order)
 61 | Micrococcales_(Order)
 62 | Mycoplasmatales_(Order)
 63 | Neisseriales_(Order)
 64 | Nitrosomonadales_(Order)
 65 | Nostocales_(Order)
 66 | Oceanospirillales_(Order)
 67 | Oscillatoriales_(Order)
 68 | Pasteurellales_(Order)
 69 | Propionibacteriales_(Order)
 70 | Pseudomonadales_(Order)
 71 | Rhizobiales_(Order)
 72 | Rhodobacterales_(Order)
 73 | Rhodospirillales_(Order)
 74 | Rickettsiales_(Order)
 75 | Selenomonadales_(Order)
 76 | Sphingomonadales_(Order)
 77 | Spirochaetales_(Order)
 78 | Streptomycetales_(Order)
 79 | Streptosporangiales_(Order)
 80 | Synechococcales_(Order)
 81 | Thermoanaerobacterales_(Order)
 82 | Thiotrichales_(Order)
 83 | Tissierellales_(Order)
 84 | Vibrionales_(Order)
 85 | Xanthomonadales_(Order)
 86 | ##
 87 | Bacteroidetes-Chlorobi_group_(Other)
 88 | Rhizobium-Agrobacterium_group_(Other)
 89 | delta-epsilon-subdivisions_(Other)
 90 | #MAIN_MENU
 91 | ###EUKARYOTA
 92 | Eukaryota_(Superkingdom)
 93 | ##
 94 | Arthropoda_(Phylum)
 95 | Fungi_(Kingdom)
 96 | Plants_(Kingdom)
 97 | Protists_(Clade)
 98 | Vertebrata_(Sub_phylum)
 99 | ##
100 | Metazoa_(Other)
101 | Mollusca_(Other)
102 | Nematoda_(Other)
103 | #MAIN_MENU
104 | ###ARCHAEA
105 | Archaea_(Kingdom)
106 | ##
107 | Euryarchaeota_(Phylum)
108 | Thaumarchaeota_(Phylum)
109 | ##
110 | Halobacteria_(Class)
111 | Methanobacteria_(Class)
112 | Methanomicrobia_(Class)
113 | Thermoplasmata_(Class)
114 | Thermoprotei_(Class)
115 | ##
116 | Desulfurococcales_(Order)
117 | Halobacteriales_(Order)
118 | Haloferacales_(Order)
119 | Methanococcales_(Order)
120 | Methanomicrobiales_(Order)
121 | Natrialbales_(Order)
122 | Sulfolobales_(Order)
123 | Thermoproteales_(Order)
124 | #MAIN_MENU
125 | ##ARTHROPODA
126 | Arthropoda_(Phylum)
127 | ##
128 | Arachnida_(Class)
129 | Insecta_(Class)
130 | ##
131 | Diptera_(Order)
132 | Hemiptera_(Order)
133 | Hymenoptera_(Order)
134 | Lepidoptera_(Order)
135 | ##
136 | Endopterygota_(Other)
137 | #MAIN_MENU
138 | ##FUNGI
139 | Fungi_(Kingdom)
140 | ##
141 | Ascomycota_(Phylum)
142 | Basidiomycota_(Phylum)
143 | Microsporidia_(Phylum)
144 | Mucoromycota_(Phylum)
145 | ##
146 | Agaricomycetes_(Class)
147 | Dothideomycetes_(Class)
148 | Eurotiomycetes_(Class)
149 | Leotiomycetes_(Class)
150 | Saccharomycetes_(Class)
151 | Sordariomycetes_(Class)
152 | Tremellomycetes_(Class)
153 | ##
154 | Agaricales_(Order)
155 | Boletales_(Order)
156 | Capnodiales_(Order)
157 | Chaetothyriales_(Order)
158 | Eurotiales_(Order)
159 | Glomerellales_(Order)
160 | Helotiales_(Order)
161 | Hypocreales_(Order)
162 | Mucorales_(Order)
163 | Onygenales_(Order)
164 | Pleosporales_(Order)
165 | Polyporales_(Order)
166 | ##
167 | #MAIN_MENU
168 | ##PLANTS
169 | Viridiplantae_(Kingdom)
170 | ##
171 | Chlorophyta_(Phylum)
172 | ##
173 | Liliopsida_(Class)
174 | ##
175 | Brassicales_(Order)
176 | Eudicots_(Order)
177 | Solanales_(Order)
178 | Poales_(Order)
179 | ##
180 | Embryophyta_(other)
181 | Fabales_(Other)
182 | #MAIN_MENU
183 | ##PROTIST
184 | Alveolata_(Sub_clade)
185 | Apicomplexa_(Sub_clade)
186 | Euglenozoa_(Sub_clade)
187 | Stramenopiles_(Sub_clade)
188 | Aconoidasida_(Sub_clade)
189 | Coccidia_(Sub_clade)
190 | Plasmodium_(Sub_clade)
191 | #MAIN_MENU
192 | ##VERTEBRATA
193 | Vertebrata_(Sub_phylum)
194 | ##
195 | Actinopterygii_(Superclass_and_Class)
196 | Aves_(Superclass_and_Class)
197 | Mammalia_(Superclass_and_Class)
198 | Tetrapoda_(Superclass_and_Class)
199 | ##
200 | Carnivora_(Superorder_and_Order)
201 | Cyprinodontiformes_(Superorder_and_Order)
202 | Euarchontoglires_(Superorder_and_Order)
203 | Laurasiatheria_(Superorder_and_Order)
204 | Passeriformes_(Superorder_and_Order)
205 | Primates_(Superorder_and_Order)
206 | ##
207 | Cetartiodactyla_(Other)
208 | Eutheria_(Other)
209 | Glires_(Other)
210 | Sauropsida_(Other)
211 | #MAIN_MENU
212 | ###EXIT
213 | --
214 | Archaea;https://busco-data.ezlab.org/v4/data/lineages/archaea_odb10.2020-03-06.tar.gz
215 | Euryarchaeota;https://busco-data.ezlab.org/v4/data/lineages/euryarchaeota_odb10.2020-03-06.tar.gz
216 | Thermoplasmata;https://busco-data.ezlab.org/v4/data/lineages/thermoplasmata_odb10.2020-03-06.tar.gz
217 | Thermoproteales;https://busco-data.ezlab.org/v4/data/lineages/thermoproteales_odb10.2020-03-06.tar.gz
218 | Thaumarchaeota;https://busco-data.ezlab.org/v4/data/lineages/thaumarchaeota_odb10.2020-03-06.tar.gz
219 | Halobacteria;https://busco-data.ezlab.org/v4/data/lineages/halobacteria_odb10.2020-03-06.tar.gz
220 | Sulfolobales;https://busco-data.ezlab.org/v4/data/lineages/sulfolobales_odb10.2020-03-06.tar.gz
221 | Methanobacteria;https://busco-data.ezlab.org/v4/data/lineages/methanobacteria_odb10.2020-03-06.tar.gz
222 | Desulfurococcales;https://busco-data.ezlab.org/v4/data/lineages/desulfurococcales_odb10.2020-03-06.tar.gz
223 | Methanomicrobia;https://busco-data.ezlab.org/v4/data/lineages/methanomicrobia_odb10.2020-03-06.tar.gz
224 | Methanococcales;https://busco-data.ezlab.org/v4/data/lineages/methanococcales_odb10.2020-03-06.tar.gz
225 | Thermoprotei;https://busco-data.ezlab.org/v4/data/lineages/thermoprotei_odb10.2020-03-06.tar.gz
226 | Methanomicrobiales;https://busco-data.ezlab.org/v4/data/lineages/methanomicrobiales_odb10.2020-03-06.tar.gz
227 | Halobacteriales;https://busco-data.ezlab.org/v4/data/lineages/halobacteriales_odb10.2020-03-06.tar.gz
228 | Natrialbales;https://busco-data.ezlab.org/v4/data/lineages/natrialbales_odb10.2020-03-06.tar.gz
229 | Haloferacales;https://busco-data.ezlab.org/v4/data/lineages/haloferacales_odb10.2020-03-06.tar.gz
230 | --
231 | Arthropoda;https://busco-data.ezlab.org/v4/data/lineages/arthropoda_odb10.2020-09-10.tar.gz
232 | Insecta;https://busco-data.ezlab.org/v4/data/lineages/insecta_odb10.2020-09-10.tar.gz
233 | Diptera;https://busco-data.ezlab.org/v4/data/lineages/diptera_odb10.2020-08-05.tar.gz
234 | Endopterygota;https://busco-data.ezlab.org/v4/data/lineages/endopterygota_odb10.2020-09-10.tar.gz
235 | Arachnida;https://busco-data.ezlab.org/v4/data/lineages/arachnida_odb10.2020-08-05.tar.gz
236 | Hymenoptera;https://busco-data.ezlab.org/v4/data/lineages/hymenoptera_odb10.2020-08-05.tar.gz
237 | Lepidoptera;https://busco-data.ezlab.org/v4/data/lineages/lepidoptera_odb10.2020-08-05.tar.gz
238 | Hemiptera;https://busco-data.ezlab.org/v4/data/lineages/hemiptera_odb10.2020-08-05.tar.gz
239 | --
240 | Bacteria;https://busco-data.ezlab.org/v4/data/lineages/bacteria_odb10.2020-03-06.tar.gz
241 | Acidobacteria;https://busco-data.ezlab.org/v4/data/lineages/acidobacteria_odb10.2020-03-06.tar.gz
242 | Actinobacteria;https://busco-data.ezlab.org/v4/data/lineages/actinobacteria_class_odb10.2020-03-06.tar.gz
243 | Alteromonadales;https://busco-data.ezlab.org/v4/data/lineages/alteromonadales_odb10.2020-03-06.tar.gz
244 | Bacteroidetes-Chlorobi_group;https://busco-data.ezlab.org/v4/data/lineages/bacteroidetes-chlorobi_group_odb10.2020-03-06.tar.gz
245 | Actinobacteria;https://busco-data.ezlab.org/v4/data/lineages/actinobacteria_phylum_odb10.2020-03-06.tar.gz
246 | Alphaproteobacteria;https://busco-data.ezlab.org/v4/data/lineages/alphaproteobacteria_odb10.2020-03-06.tar.gz
247 | Bacillales;https://busco-data.ezlab.org/v4/data/lineages/bacillales_odb10.2020-03-06.tar.gz
248 | Rhizobium-Agrobacterium_group;https://busco-data.ezlab.org/v4/data/lineages/rhizobium-agrobacterium_group_odb10.2020-03-06.tar.gz
249 | Bacteroidetes;https://busco-data.ezlab.org/v4/data/lineages/bacteroidetes_odb10.2020-03-06.tar.gz
250 | Aquificae;https://busco-data.ezlab.org/v4/data/lineages/aquificae_odb10.2020-03-06.tar.gz
251 | Bacteroidales;https://busco-data.ezlab.org/v4/data/lineages/bacteroidales_odb10.2020-03-06.tar.gz
252 | delta-epsilon-subdivisions;https://busco-data.ezlab.org/v4/data/lineages/delta-epsilon-subdivisions_odb10.2020-03-06.tar.gz
253 | Chlamydiae;https://busco-data.ezlab.org/v4/data/lineages/chlamydiae_odb10.2020-03-06.tar.gz
254 | Bacilli;https://busco-data.ezlab.org/v4/data/lineages/bacilli_odb10.2020-03-06.tar.gz
255 | Burkholderiales;https://busco-data.ezlab.org/v4/data/lineages/burkholderiales_odb10.2020-03-06.tar.gz
256 | Chlorobi;https://busco-data.ezlab.org/v4/data/lineages/chlorobi_odb10.2020-03-06.tar.gz
257 | Bacteroidia;https://busco-data.ezlab.org/v4/data/lineages/bacteroidia_odb10.2020-03-06.tar.gz
258 | Campylobacterales;https://busco-data.ezlab.org/v4/data/lineages/campylobacterales_odb10.2020-03-06.tar.gz
259 | Chloroflexi;https://busco-data.ezlab.org/v4/data/lineages/chloroflexi_odb10.2020-03-06.tar.gz
260 | Betaproteobacteria;https://busco-data.ezlab.org/v4/data/lineages/betaproteobacteria_odb10.2020-03-06.tar.gz
261 | Cellvibrionales;https://busco-data.ezlab.org/v4/data/lineages/cellvibrionales_odb10.2020-03-06.tar.gz
262 | Cyanobacteria;https://busco-data.ezlab.org/v4/data/lineages/cyanobacteria_odb10.2020-03-06.tar.gz
263 | Clostridia;https://busco-data.ezlab.org/v4/data/lineages/clostridia_odb10.2020-03-06.tar.gz
264 | Chromatiales;https://busco-data.ezlab.org/v4/data/lineages/chromatiales_odb10.2020-03-06.tar.gz
265 | Firmicutes;https://busco-data.ezlab.org/v4/data/lineages/firmicutes_odb10.2020-03-06.tar.gz
266 | Coriobacteriia;https://busco-data.ezlab.org/v4/data/lineages/coriobacteriia_odb10.2020-03-06.tar.gz
267 | Chroococcales;https://busco-data.ezlab.org/v4/data/lineages/chroococcales_odb10.2020-03-06.tar.gz
268 | Fusobacteria;https://busco-data.ezlab.org/v4/data/lineages/fusobacteria_odb10.2020-03-06.tar.gz
269 | Cytophagia;https://busco-data.ezlab.org/v4/data/lineages/cytophagia_odb10.2020-03-06.tar.gz
270 | Clostridiales;https://busco-data.ezlab.org/v4/data/lineages/clostridiales_odb10.2020-03-06.tar.gz
271 | Planctomycetes;https://busco-data.ezlab.org/v4/data/lineages/planctomycetes_odb10.2020-03-06.tar.gz
272 | Deltaproteobacteria;https://busco-data.ezlab.org/v4/data/lineages/deltaproteobacteria_odb10.2020-03-06.tar.gz
273 | Coriobacteriales;https://busco-data.ezlab.org/v4/data/lineages/coriobacteriales_odb10.2020-03-06.tar.gz
274 | Proteobacteria;https://busco-data.ezlab.org/v4/data/lineages/proteobacteria_odb10.2020-03-06.tar.gz
275 | Epsilonproteobacteria;https://busco-data.ezlab.org/v4/data/lineages/epsilonproteobacteria_odb10.2020-03-06.tar.gz
276 | Corynebacteriales;https://busco-data.ezlab.org/v4/data/lineages/corynebacteriales_odb10.2020-03-06.tar.gz
277 | Spirochaetes;https://busco-data.ezlab.org/v4/data/lineages/spirochaetes_odb10.2020-03-06.tar.gz
278 | Flavobacteriia;https://busco-data.ezlab.org/v4/data/lineages/flavobacteriia_odb10.2020-03-06.tar.gz
279 | Cytophagales;https://busco-data.ezlab.org/v4/data/lineages/cytophagales_odb10.2020-03-06.tar.gz
280 | Synergistetes;https://busco-data.ezlab.org/v4/data/lineages/synergistetes_odb10.2020-03-06.tar.gz
281 | Gammaproteobacteria;https://busco-data.ezlab.org/v4/data/lineages/gammaproteobacteria_odb10.2020-03-06.tar.gz
282 | Desulfobacterales;https://busco-data.ezlab.org/v4/data/lineages/desulfobacterales_odb10.2020-03-06.tar.gz
283 | Tenericutes;https://busco-data.ezlab.org/v4/data/lineages/tenericutes_odb10.2020-03-06.tar.gz
284 | Mollicutes;https://busco-data.ezlab.org/v4/data/lineages/mollicutes_odb10.2020-03-06.tar.gz
285 | Desulfovibrionales;https://busco-data.ezlab.org/v4/data/lineages/desulfovibrionales_odb10.2020-03-06.tar.gz
286 | Thermotogae;https://busco-data.ezlab.org/v4/data/lineages/thermotogae_odb10.2020-03-06.tar.gz
287 | Sphingobacteriia;https://busco-data.ezlab.org/v4/data/lineages/sphingobacteriia_odb10.2020-03-06.tar.gz
288 | Desulfuromonadales;https://busco-data.ezlab.org/v4/data/lineages/desulfuromonadales_odb10.2020-03-06.tar.gz
289 | Verrucomicrobia;https://busco-data.ezlab.org/v4/data/lineages/verrucomicrobia_odb10.2020-03-06.tar.gz
290 | Spirochaetia;https://busco-data.ezlab.org/v4/data/lineages/spirochaetia_odb10.2020-03-06.tar.gz
291 | Enterobacterales;https://busco-data.ezlab.org/v4/data/lineages/enterobacterales_odb10.2020-03-06.tar.gz
292 | Tissierellia;https://busco-data.ezlab.org/v4/data/lineages/tissierellia_odb10.2020-03-06.tar.gz
293 | Entomoplasmatales;https://busco-data.ezlab.org/v4/data/lineages/entomoplasmatales_odb10.2020-03-06.tar.gz
294 | Flavobacteriales;https://busco-data.ezlab.org/v4/data/lineages/flavobacteriales_odb10.2020-03-06.tar.gz
295 | Fusobacteriales;https://busco-data.ezlab.org/v4/data/lineages/fusobacteriales_odb10.2020-03-06.tar.gz
296 | Lactobacillales;https://busco-data.ezlab.org/v4/data/lineages/lactobacillales_odb10.2020-03-06.tar.gz
297 | Legionellales;https://busco-data.ezlab.org/v4/data/lineages/legionellales_odb10.2020-03-06.tar.gz
298 | Micrococcales;https://busco-data.ezlab.org/v4/data/lineages/micrococcales_odb10.2020-03-06.tar.gz
299 | Mycoplasmatales;https://busco-data.ezlab.org/v4/data/lineages/mycoplasmatales_odb10.2020-03-06.tar.gz
300 | Neisseriales;https://busco-data.ezlab.org/v4/data/lineages/neisseriales_odb10.2020-03-06.tar.gz
301 | Nitrosomonadales;https://busco-data.ezlab.org/v4/data/lineages/nitrosomonadales_odb10.2020-03-06.tar.gz
302 | Nostocales;https://busco-data.ezlab.org/v4/data/lineages/nostocales_odb10.2020-03-06.tar.gz
303 | Oceanospirillales;https://busco-data.ezlab.org/v4/data/lineages/oceanospirillales_odb10.2020-03-06.tar.gz
304 | Oscillatoriales;https://busco-data.ezlab.org/v4/data/lineages/oscillatoriales_odb10.2020-03-06.tar.gz
305 | Pasteurellales;https://busco-data.ezlab.org/v4/data/lineages/pasteurellales_odb10.2020-03-06.tar.gz
306 | Propionibacteriales;https://busco-data.ezlab.org/v4/data/lineages/propionibacteriales_odb10.2020-03-06.tar.gz
307 | Pseudomonadales;https://busco-data.ezlab.org/v4/data/lineages/pseudomonadales_odb10.2020-03-06.tar.gz
308 | Rhizobiales;https://busco-data.ezlab.org/v4/data/lineages/rhizobiales_odb10.2020-03-06.tar.gz
309 | Rhodobacterales;https://busco-data.ezlab.org/v4/data/lineages/rhodobacterales_odb10.2020-03-06.tar.gz
310 | Rhodospirillales;https://busco-data.ezlab.org/v4/data/lineages/rhodospirillales_odb10.2020-03-06.tar.gz
311 | Rickettsiales;https://busco-data.ezlab.org/v4/data/lineages/rickettsiales_odb10.2020-03-06.tar.gz
312 | Selenomonadales;https://busco-data.ezlab.org/v4/data/lineages/selenomonadales_odb10.2020-03-06.tar.gz
313 | Sphingomonadales;https://busco-data.ezlab.org/v4/data/lineages/sphingomonadales_odb10.2020-03-06.tar.gz
314 | Spirochaetales;https://busco-data.ezlab.org/v4/data/lineages/spirochaetales_odb10.2020-03-06.tar.gz
315 | Streptomycetales;https://busco-data.ezlab.org/v4/data/lineages/streptomycetales_odb10.2020-03-06.tar.gz
316 | Streptosporangiales;https://busco-data.ezlab.org/v4/data/lineages/streptosporangiales_odb10.2020-03-06.tar.gz
317 | Synechococcales;https://busco-data.ezlab.org/v4/data/lineages/synechococcales_odb10.2020-03-06.tar.gz
318 | Thermoanaerobacterales;https://busco-data.ezlab.org/v4/data/lineages/thermoanaerobacterales_odb10.2020-03-06.tar.gz
319 | Thiotrichales;https://busco-data.ezlab.org/v4/data/lineages/thiotrichales_odb10.2020-03-06.tar.gz
320 | Tissierellales;https://busco-data.ezlab.org/v4/data/lineages/tissierellales_odb10.2020-03-06.tar.gz
321 | Vibrionales;https://busco-data.ezlab.org/v4/data/lineages/vibrionales_odb10.2020-03-06.tar.gz
322 | Xanthomonadales;https://busco-data.ezlab.org/v4/data/lineages/xanthomonadales_odb10.2020-03-06.tar.gz
323 | --
324 | Eukaryota;https://busco-data.ezlab.org/v4/data/lineages/eukaryota_odb10.2020-09-10.tar.gz
325 | Metazoa;https://busco-data.ezlab.org/v4/data/lineages/metazoa_odb10.2020-09-10.tar.gz
326 | Mollusca;https://busco-data.ezlab.org/v4/data/lineages/mollusca_odb10.2020-08-05.tar.gz
327 | Nematoda;https://busco-data.ezlab.org/v4/data/lineages/nematoda_odb10.2020-08-05.tar.gz
328 | --
329 | Fungi;https://busco-data.ezlab.org/v4/data/lineages/fungi_odb10.2020-09-10.tar.gz
330 | Ascomycota;https://busco-data.ezlab.org/v4/data/lineages/ascomycota_odb10.2020-09-10.tar.gz
331 | Agaricomycetes;https://busco-data.ezlab.org/v4/data/lineages/agaricomycetes_odb10.2020-08-05.tar.gz
332 | Agaricales;https://busco-data.ezlab.org/v4/data/lineages/agaricales_odb10.2020-08-05.tar.gz
333 | Basidiomycota;https://busco-data.ezlab.org/v4/data/lineages/basidiomycota_odb10.2020-09-10.tar.gz
334 | Dothideomycetes;https://busco-data.ezlab.org/v4/data/lineages/dothideomycetes_odb10.2020-08-05.tar.gz
335 | Boletales;https://busco-data.ezlab.org/v4/data/lineages/boletales_odb10.2020-08-05.tar.gz
336 | Microsporidia;https://busco-data.ezlab.org/v4/data/lineages/microsporidia_odb10.2020-08-05.tar.gz
337 | Eurotiomycetes;https://busco-data.ezlab.org/v4/data/lineages/eurotiomycetes_odb10.2020-08-05.tar.gz
338 | Capnodiales;https://busco-data.ezlab.org/v4/data/lineages/capnodiales_odb10.2020-08-05.tar.gz
339 | Mucoromycota;https://busco-data.ezlab.org/v4/data/lineages/mucoromycota_odb10.2020-08-05.tar.gz
340 | Leotiomycetes;https://busco-data.ezlab.org/v4/data/lineages/leotiomycetes_odb10.2020-08-05.tar.gz
341 | Chaetothyriales;https://busco-data.ezlab.org/v4/data/lineages/chaetothyriales_odb10.2020-08-05.tar.gz
342 | Saccharomycetes;https://busco-data.ezlab.org/v4/data/lineages/saccharomycetes_odb10.2020-08-05.tar.gz
343 | Eurotiales;https://busco-data.ezlab.org/v4/data/lineages/eurotiales_odb10.2020-08-05.tar.gz
344 | Sordariomycetes;https://busco-data.ezlab.org/v4/data/lineages/sordariomycetes_odb10.2020-08-05.tar.gz
345 | Glomerellales;https://busco-data.ezlab.org/v4/data/lineages/glomerellales_odb10.2020-08-05.tar.gz
346 | Tremellomycetes;https://busco-data.ezlab.org/v4/data/lineages/tremellomycetes_odb10.2020-08-05.tar.gz
347 | Helotiales;https://busco-data.ezlab.org/v4/data/lineages/helotiales_odb10.2020-08-05.tar.gz
348 | Hypocreales;https://busco-data.ezlab.org/v4/data/lineages/hypocreales_odb10.2020-08-05.tar.gz
349 | Mucorales;https://busco-data.ezlab.org/v4/data/lineages/mucorales_odb10.2020-08-05.tar.gz
350 | Onygenales;https://busco-data.ezlab.org/v4/data/lineages/onygenales_odb10.2020-08-05.tar.gz
351 | Pleosporales;https://busco-data.ezlab.org/v4/data/lineages/pleosporales_odb10.2020-08-05.tar.gz
352 | Polyporales;https://busco-data.ezlab.org/v4/data/lineages/polyporales_odb10.2020-08-05.tar.gz
353 | --
354 | Viridiplantae;https://busco-data.ezlab.org/v4/data/lineages/viridiplantae_odb10.2020-09-10.tar.gz
355 | Chlorophyta;https://busco-data.ezlab.org/v4/data/lineages/chlorophyta_odb10.2020-08-05.tar.gz
356 | Liliopsida;https://busco-data.ezlab.org/v4/data/lineages/liliopsida_odb10.2020-09-10.tar.gz
357 | Brassicales;https://busco-data.ezlab.org/v4/data/lineages/brassicales_odb10.2020-08-05.tar.gz
358 | Embryophyta;;https://busco-data.ezlab.org/v4/data/lineages/embryophyta_odb10.2020-09-10.tar.gz
359 | Eudicots;https://busco-data.ezlab.org/v4/data/lineages/eudicots_odb10.2020-09-10.tar.gz
360 | Fabales;https://busco-data.ezlab.org/v4/data/lineages/fabales_odb10.2020-08-05.tar.gz
361 | Solanales;https://busco-data.ezlab.org/v4/data/lineages/solanales_odb10.2020-08-05.tar.gz
362 | Poales;https://busco-data.ezlab.org/v4/data/lineages/poales_odb10.2020-08-05.tar.gz
363 | --
364 | Alveolata;https://busco-data.ezlab.org/v4/data/lineages/alveolata_odb10.2020-09-10.tar.gz
365 | Aconoidasida;https://busco-data.ezlab.org/v4/data/lineages/aconoidasida_odb10.2020-08-05.tar.gz
366 | Apicomplexa;https://busco-data.ezlab.org/v4/data/lineages/apicomplexa_odb10.2020-09-10.tar.gz
367 | Coccidia;https://busco-data.ezlab.org/v4/data/lineages/coccidia_odb10.2020-08-05.tar.gz
368 | Euglenozoa;https://busco-data.ezlab.org/v4/data/lineages/euglenozoa_odb10.2020-08-05.tar.gz
369 | Plasmodium;https://busco-data.ezlab.org/v4/data/lineages/plasmodium_odb10.2020-08-05.tar.gz
370 | Stramenopiles;https://busco-data.ezlab.org/v4/data/lineages/stramenopiles_odb10.2020-08-05.tar.gz
371 | --
372 | Vertebrata;https://busco-data.ezlab.org/v4/data/lineages/vertebrata_odb10.2020-09-10.tar.gz
373 | Tetrapoda;https://busco-data.ezlab.org/v4/data/lineages/tetrapoda_odb10.2020-09-10.tar.gz
374 | Euarchontoglires;https://busco-data.ezlab.org/v4/data/lineages/euarchontoglires_odb10.2020-08-05.tar.gz
375 | Cetartiodactyla;https://busco-data.ezlab.org/v4/data/lineages/cetartiodactyla_odb10.2020-08-05.tar.gz
376 | Actinopterygii;https://busco-data.ezlab.org/v4/data/lineages/actinopterygii_odb10.2020-08-05.tar.gz
377 | Laurasiatheria;https://busco-data.ezlab.org/v4/data/lineages/laurasiatheria_odb10.2020-09-10.tar.gz
378 | Eutheria;https://busco-data.ezlab.org/v4/data/lineages/eutheria_odb10.2020-09-10.tar.gz
379 | Mammalia;https://busco-data.ezlab.org/v4/data/lineages/mammalia_odb10.2020-09-10.tar.gz
380 | Carnivora;https://busco-data.ezlab.org/v4/data/lineages/carnivora_odb10.2020-08-05.tar.gz
381 | Glires;https://busco-data.ezlab.org/v4/data/lineages/glires_odb10.2020-08-05.tar.gz
382 | Aves;https://busco-data.ezlab.org/v4/data/lineages/aves_odb10.2020-09-10.tar.gz
383 | Cyprinodontiformes;https://busco-data.ezlab.org/v4/data/lineages/cyprinodontiformes_odb10.2020-08-05.tar.gz
384 | Sauropsida;https://busco-data.ezlab.org/v4/data/lineages/sauropsida_odb10.2020-09-10.tar.gz
385 | Passeriformes;https://busco-data.ezlab.org/v4/data/lineages/passeriformes_odb10.2020-08-05.tar.gz
386 | Primates;https://busco-data.ezlab.org/v4/data/lineages/primates_odb10.2020-08-05.tar.gz
387 | 


--------------------------------------------------------------------------------
/precheck_TransPi.sh:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env bash -e
   2 | export mypwd="$1"
   3 | os_c() {
   4 |     OS="$(uname)"
   5 |     if [ "$OS" == "Linux" ]; then
   6 |         echo -e "\n\t -- Downloading Linux Anaconda3 installation -- \n"
   7 |         curl -o Anaconda3-2020.11-Linux-x86_64.sh https://repo.anaconda.com/archive/Anaconda3-2020.11-Linux-x86_64.sh
   8 |     else
   9 |         echo -e "\n\t\e[31m -- ERROR: Are you in a Linux system? Please check requirements and rerun the pre-check --\e[39m\n"
  10 |         exit 0
  11 |     fi
  12 | }
  13 | source_c() {
  14 |     if [ -f ~/.bashrc ];then
  15 |         source ~/.bashrc
  16 |     fi
  17 | }
  18 | cleanConda () {
  19 |     cd $mypwd
  20 |     echo -e "\n\t -- Cleaning conda environment -- \n"
  21 |     conda clean -a -y
  22 |     echo -e "\n\t -- Done cleaning conda environment -- \n"
  23 | }
  24 | conda_only() {
  25 |     source_c
  26 |     #Check conda and environment
  27 |     check_conda=$( command -v conda )
  28 |     if [ "$check_conda" != "" ];then #&& [ "$ver" -gt "45" ];then
  29 |         echo -e "\n\t -- Conda seems to be installed in your system --\n"
  30 |         ver=$( conda -V | awk '{print $2}' | cut -f 1,2 -d "." | tr -d "." )
  31 |         vern=48
  32 |         if [ $( echo "$ver >= $vern" | bc -l ) -eq 1 ];then
  33 |             echo -e "\n\t -- Conda is installed (v4.8 or higher) --\n"
  34 |             #cleanConda
  35 |         fi
  36 |     else
  37 |         echo -e "\n\t -- Conda is not intalled --\n"
  38 |         os_c
  39 |         echo -e "\n\t -- Starting Anaconda installation -- \n"
  40 |         bash Anaconda3-20*.sh
  41 |         echo -e "\n\t -- Installation done -- \n"
  42 |         rm Anaconda3-20*.sh
  43 |         source_c
  44 |     fi
  45 | }
  46 | conda_c() {
  47 |     source_c
  48 |     #Check conda and environment
  49 |     check_conda=$( command -v conda )
  50 |     if [ "$check_conda" != "" ];then #&& [ "$ver" -gt "45" ];then
  51 |         echo -e "\n\t -- Conda seems to be installed in your system --\n"
  52 |         ver=$( conda -V | awk '{print $2}' | cut -f 1,2 -d "." | tr -d "." )
  53 |         vern=48
  54 |         if [ $( echo "$ver >= $vern" | bc -l ) -eq 1 ];then
  55 |             echo -e "\n\t -- Conda is installed (v4.8 or higher). Checking environment... --\n"
  56 |             #Check environment
  57 |             check_env=$( conda info -e | awk '$1 == "TransPi" {print $2}' | wc -l )
  58 | 	        if [ "$check_env" -eq 0 ];then
  59 |                 echo -e "\n\t -- TransPi environment has not been created. Checking environment file... --\n"
  60 |                 if [ -f ${confDir}/transpi_env.yml ];then
  61 |                     echo -e "\n\t -- TransPi environment file found. Creating environment... --\n"
  62 |                     conda env create -f ${confDir}/transpi_env.yml
  63 |                 else
  64 |                     echo -e "\n\t\e[31m -- ERROR: TransPi environment file not found (transpi_env.yml). Please run the precheck in the TransPi directory. See manual for more info --\e[39m\n"
  65 |                     exit 0
  66 |                 fi
  67 |             elif [ "$check_env" -eq 1 ];then
  68 |                 echo -e "\n\t -- TransPi environment is installed and ready to be used --\n"
  69 |             fi
  70 |         fi
  71 |     else
  72 |         echo -e "\n\t -- Conda is not intalled --\n"
  73 |         os_c
  74 |         echo -e "\n\t -- Starting Anaconda installation -- \n"
  75 |         bash Anaconda3-20*.sh
  76 |         echo -e "\n\t -- Installation done -- \n"
  77 |         rm Anaconda3-20*.sh
  78 |         source_c
  79 |         if [ -f ${confDir}/transpi_env.yml ];then
  80 |             echo -e "\n\t -- TransPi environment file found. Creating environment... --\n"
  81 |             conda env create -f ${confDir}/transpi_env.yml
  82 |         else
  83 |             echo -e "\n\t\e[31m -- ERROR: TransPi environment file not found (transpi_env.yml). Please run the precheck in the TransPi directory. See manual for more info --\e[39m\n"
  84 |             exit 0
  85 |         fi
  86 |     fi
  87 | }
  88 | dir_c () {
  89 |     cd $mypwd
  90 |     if [ ! -d scripts/ ];then
  91 |         mkdir scripts
  92 |     fi
  93 |     if [ ! -d DBs ];then
  94 |         mkdir DBs
  95 |     fi
  96 | }
  97 | bus_dow () {
  98 |     name=$1
  99 |     cd $mypwd
 100 |     if [ ! -d DBs/busco_db/ ];then
 101 |         echo -e "\n\t -- Creating directory for the BUSCO V4 database --\n"
 102 |         mkdir -p DBs/busco_db
 103 |         cd DBs/busco_db
 104 |         bname=$( echo $name | cut -f 1 -d "_" )
 105 |         if [ `cat ${confDir}/conf/busV4list.txt | grep "${bname};" | wc -l` -eq 1 ];then
 106 |             echo -e "\n\t -- Downloading BUSCO V4 \"$name\" database --\n";wait
 107 |             wname=$( cat ${confDir}/conf/busV4list.txt | grep "${bname};" | cut -f 2 -d ";" )
 108 |             wget --no-check-certificate $wname
 109 |             echo -e "\n\t -- Preparing files ... --\n";wait
 110 |             tname=$( cat ${confDir}/conf/busV4list.txt | grep "${bname};" | cut -f 1 -d ";" | tr [A-Z] [a-z] )
 111 |             tar -xf ${tname}*.tar.gz
 112 |             rm ${tname}*.tar.gz
 113 |             echo -e "\n\t -- DONE with BUSCO V4 database --\n";wait
 114 |         fi
 115 |         dname=$( cat ${confDir}/conf/busV4list.txt | grep "${bname};" | cut -f 1 -d ";" | tr [A-Z] [a-z] )
 116 |         if [ -d ${dname}_odb10 ];then
 117 |             export busna=${dname}_odb10
 118 |         fi
 119 |     elif [ -d DBs/busco_db/ ];then
 120 |         cd DBs/busco_db
 121 |         bname=$( echo $name | cut -f 1 -d "_" )
 122 |         dname=$( cat ${confDir}/conf/busV4list.txt | grep "${bname};" | cut -f 1 -d ";" | tr [A-Z] [a-z] )
 123 |         if [ -d ${dname}_odb10 ];then
 124 |             echo -e "\n\t -- BUSCO V4 \"$name\" database found -- \n"
 125 |             export busna=${dname}_odb10
 126 |         else
 127 |             bname=$( echo $name | cut -f 1 -d "_" )
 128 |             if [ `cat ${confDir}/conf/busV4list.txt | grep "${bname};" | wc -l` -eq 1 ];then
 129 |                 echo -e "\n\t -- Downloading BUSCO V4 \"$name\" database --\n";wait
 130 |                 wname=$( cat ${confDir}/conf/busV4list.txt | grep "${bname};" | cut -f 2 -d ";" )
 131 |                 wget --no-check-certificate $wname
 132 |                 echo -e "\n\t -- Preparing files ... --\n";wait
 133 |                 tname=$( cat ${confDir}/conf/busV4list.txt | grep "${bname};" | cut -f 1 -d ";" | tr [A-Z] [a-z] )
 134 |                 tar -xvf ${tname}*.tar.gz
 135 |                 rm ${tname}*.tar.gz
 136 |                 echo -e "\n\t -- DONE with BUSCO V4 database --\n";wait
 137 |             fi
 138 |             dname=$( cat ${confDir}/conf/busV4list.txt | grep "${bname};" | cut -f 1 -d ";" | tr [A-Z] [a-z] )
 139 |             if [ -d ${dname}_odb10 ];then
 140 |                 export busna=${dname}_odb10
 141 |             fi
 142 |         fi
 143 |     fi
 144 | }
 145 | bus_c () {
 146 |     cd $mypwd
 147 |     echo -e "\n\t -- Selecting BUSCO V4 database -- \n"
 148 |     PS3="
 149 |     Please select one (1-5): "
 150 |     if [ -f ${confDir}/conf/busV4list.txt ];then
 151 |     select var in `cat ${confDir}/conf/busV4list.txt | grep "###" | tr -d "#"`;do
 152 |     case $var in
 153 |         BACTERIA)
 154 |             echo -e "\n\t You selected BACTERIA. Which specific database? \n"
 155 |             PS3="
 156 | 	    Please select database: "
 157 |             select var1 in `cat ${confDir}/conf/busV4list.txt | sed -n "/##BACTERIA/,/#MAIN/p" | grep -v "##" | tr -d "#"`;do
 158 |     	    case $var1 in
 159 |     	        MAIN_MENU)
 160 |                     bus_c
 161 |                 ;;
 162 |                 *)
 163 |                 if [ "$var1" != "" ];then
 164 |                     if [ `cat ${confDir}/conf/busV4list.txt | grep -c "$var1"` -ge 1 ];then
 165 |                         bus_dow $var1
 166 |                     fi
 167 |                 else
 168 |                     echo -e "\n\t Wrong option. Try again \n"
 169 |                     bus_c
 170 |                 fi
 171 |             ;;
 172 |             esac
 173 |             break
 174 |             done
 175 | 	   ;;
 176 |        EUKARYOTA)
 177 |             echo -e "\n\tYou selected EUKARYOTA. Which specific database? \n"
 178 |             PS3="
 179 | 	    Please select database: "
 180 |             select var1 in `cat ${confDir}/conf/busV4list.txt | sed -n "/##EUKARYOTA/,/#MAIN/p" | grep -v "##" | tr -d "#"`;do
 181 |         	case $var1 in
 182 |         	    MAIN_MENU)
 183 |                     bus_c
 184 |                 ;;
 185 |                 Arthropoda_\(Phylum\))
 186 |                     select var2 in `cat ${confDir}/conf/busV4list.txt | sed -n "/##ARTHROPODA/,/#MAIN/p" | grep -v "##" | tr -d "#"`;do
 187 |                     case $var2 in
 188 |                     MAIN_MENU)
 189 |                         bus_c
 190 |                     ;;
 191 |                     *)
 192 |                     if [ "$var2" != "" ];then
 193 |                         if [ `cat ${confDir}/conf/busV4list.txt | grep -c "$var2"` -ge 1 ];then
 194 |                             bus_dow $var2
 195 |                         fi
 196 |                     else
 197 |                         echo -e "\n\t Wrong option. Try again \n"
 198 |                         bus_c
 199 |                     fi
 200 |                     esac
 201 |                     break
 202 |                     done
 203 |                 ;;
 204 |                 Fungi_\(Kingdom\))
 205 |                     select var2 in `cat ${confDir}/conf/busV4list.txt | sed -n "/##FUNGI/,/#MAIN/p" | grep -v "##" | tr -d "#"`;do
 206 |                     case $var2 in
 207 |                     MAIN_MENU)
 208 |                         bus_c
 209 |                     ;;
 210 |                     *)
 211 |                     if [ "$var2" != "" ];then
 212 |                         if [ `cat ${confDir}/conf/busV4list.txt | grep -c "$var2"` -ge 1 ];then
 213 |                             bus_dow $var2
 214 |                         fi
 215 |                     else
 216 |                         echo -e "\n\t Wrong option. Try again \n"
 217 |                         bus_c
 218 |                     fi
 219 |                     esac
 220 |                     break
 221 |                     done
 222 |                 ;;
 223 |                 Plants_\(Kingdom\))
 224 |                     select var2 in `cat ${confDir}/conf/busV4list.txt | sed -n "/##PLANTS/,/#MAIN/p" | grep -v "##" | tr -d "#"`;do
 225 |                     case $var2 in
 226 |                     MAIN_MENU)
 227 |                         bus_c
 228 |                     ;;
 229 |                     *)
 230 |                     if [ "$var2" != "" ];then
 231 |                         if [ `cat ${confDir}/conf/busV4list.txt | grep -c "$var2"` -ge 1 ];then
 232 |                             bus_dow $var2
 233 |                         fi
 234 |                     else
 235 |                         echo -e "\n\t Wrong option. Try again \n"
 236 |                         bus_c
 237 |                     fi
 238 |                     esac
 239 |                     break
 240 |                     done
 241 |                 ;;
 242 |                 Protists_\(Clade\))
 243 |                     select var2 in `cat ${confDir}/conf/busV4list.txt | sed -n "/##PROTIST/,/#MAIN/p" | grep -v "##" | tr -d "#"`;do
 244 |                     case $var2 in
 245 |                     MAIN_MENU)
 246 |                         bus_c
 247 |                     ;;
 248 |                     *)
 249 |                     if [ "$var2" != "" ];then
 250 |                         if [ `cat ${confDir}/conf/busV4list.txt | grep -c "$var2"` -ge 1 ];then
 251 |                             bus_dow $var2
 252 |                         fi
 253 |                     else
 254 |                         echo -e "\n\t Wrong option. Try again \n"
 255 |                         bus_c
 256 |                     fi
 257 |                     esac
 258 |                     break
 259 |                     done
 260 |                 ;;
 261 |                 Vertebrata_\(Sub_phylum\))
 262 |                     select var2 in `cat ${confDir}/conf/busV4list.txt | sed -n "/##VERTEBRATA/,/#MAIN/p" | grep -v "##" | tr -d "#"`;do
 263 |                     case $var2 in
 264 |                     MAIN_MENU)
 265 |                         bus_c
 266 |                     ;;
 267 |                     *)
 268 |                     if [ "$var2" != "" ];then
 269 |                         if [ `cat ${confDir}/conf/busV4list.txt | grep -c "$var2"` -ge 1 ];then
 270 |                             bus_dow $var2
 271 |                         fi
 272 |                     else
 273 |                         echo -e "\n\t Wrong option. Try again \n"
 274 |                         bus_c
 275 |                     fi
 276 |                     esac
 277 |                     break
 278 |                     done
 279 |                 ;;
 280 |                 *)
 281 |                 if [ "$var1" != "" ];then
 282 |                     if [ `cat ${confDir}/conf/busV4list.txt | grep -c "$var1"` -ge 1 ];then
 283 |                         bus_dow $var1
 284 |                     fi
 285 |                 else
 286 |                     echo -e "\n\t Wrong option. Try again \n"
 287 |                     bus_c
 288 |                 fi
 289 |                 ;;
 290 |             esac
 291 |             break
 292 |             done
 293 |         ;;
 294 |         ARCHAEA)
 295 |             echo -e "\n\tYou selected ARCHAEA. Which specific database? \n"
 296 |             PS3="
 297 | 	    Please select database: "
 298 |             select var1 in `cat ${confDir}/conf/busV4list.txt | sed -n "/##ARCHAEA/,/#MAIN/p" | grep -v "##" | tr -d "#"`;do
 299 |             case $var1 in
 300 |             	MAIN_MENU)
 301 |                     bus_c
 302 |                 ;;
 303 |                 *)
 304 |                 if [ "$var1" != "" ];then
 305 |                     if [ `cat ${confDir}/conf/busV4list.txt | grep -c "$var1"` -ge 1 ];then
 306 |                         bus_dow $var1
 307 |                     fi
 308 |                 else
 309 |                     echo -e "\n\t Wrong option. Try again \n"
 310 |                     bus_c
 311 |                 fi
 312 |                 ;;
 313 |             esac
 314 |             break
 315 |             done
 316 |         ;;
 317 |         EXIT)
 318 |             echo -e "\n\t Exiting \n"
 319 |             exit 0
 320 |         ;;
 321 |         *)
 322 |             echo -e "\n\t Wrong option. Try again \n"
 323 |             bus_c
 324 |             ;;
 325 |     esac
 326 |     break
 327 |     done
 328 |     else
 329 |         echo -e "\n\t\e[31m -- ERROR: Please make sure that file \"busV4list.txt\" is available. Please run the precheck in the TransPi directory. See manual for more info --\e[39m\n\n"
 330 | 	    exit 0
 331 |     fi
 332 | }
 333 | uni_c () {
 334 |     PS3="
 335 |     Please select UNIPROT database to use: "
 336 |     select var in `ls *`;do
 337 |         if [ "$var" != "" ];then
 338 |             if [ `echo $var | grep ".gz" | wc -l` -eq 1 ];then
 339 |                 echo -e "\n\n\t -- File is compressed -- \n"
 340 |                 echo -e "\n\n\t -- Uncompressing file ... -- \n"
 341 |                 gunzip $var
 342 |                 echo -e "\n\t -- UNIPROT database selected: \"${var%.gz}\" --\n"
 343 |                 export unina=${var%.gz}
 344 |             else
 345 |                 echo -e "\n\t -- UNIPROT database selected: \"$var\" --\n"
 346 |                 export unina=${var}
 347 |             fi
 348 |         else
 349 |             echo -e "\n\t Wrong option. Try again \n"
 350 |             uni_c
 351 |         fi
 352 |     break
 353 |     done
 354 | }
 355 | unicomp_c () {
 356 |     echo -e -n "\n\t    Do you want to uncompress the file(s)? (y,n,exit): "
 357 |     read ans
 358 |     case $ans in
 359 |         [yY] | [yY][eE][sS])
 360 |             echo -e "\n\n\t -- Uncompressing file(s) ... -- \n"
 361 |             gunzip *.gz
 362 |         ;;
 363 |         [nN] | [nN][oO])
 364 |             echo -e "\n\n\t\e[31m -- ERROR: Please uncompress the file(s) and rerun the pre-check  --\e[39m\n"
 365 |             exit 0
 366 |         ;;
 367 |         exit)
 368 |             echo -e "\n\t -- Exiting -- \n"
 369 |             exit 0
 370 |         ;;
 371 |         *)
 372 |             echo -e "\n\n\t\e[31m -- Yes or No answer not specified. Try again --\e[39m\n"
 373 |             unicomp_c
 374 |         ;;
 375 |     esac
 376 | }
 377 | uniprot_user_DB(){
 378 |     echo -e -n "\n\t -- Provide the PATH where to locate your proteins file: "
 379 |     read -e ans
 380 |     if [ -d ${ans} ];then
 381 |         echo -e "\n\t -- Directory ${ans} found -- \n"
 382 |         cd ${ans}
 383 |         uni_c
 384 |     elif [ -d $( dirname ${ans} ) ];then
 385 |         echo -e "\n\t -- Directory "$( dirname ${ans} )" found -- \n"
 386 |         cd $( dirname ${ans} )
 387 |         uni_c
 388 |     else
 389 |         echo -e "\n\t\e[31m -- Directory ${ans} not found --\e[39m\n"
 390 |         uniprot_meta
 391 |     fi
 392 | }
 393 | uniprot_taxon_DB(){
 394 |     echo -e "\n\t -- Input the Taxon ID (Taxonomy ID, NCBI txid) of your interest. TransPi will download the proteins from UNIPROT --"
 395 |     echo -e "\t    Example: metazoan TaxID = 33208 -- \n"
 396 |     echo -e -n "\n\t -- Your Taxon ID (only the numbers): "
 397 |     read ans
 398 |     echo -e "\n\t -- Downloading UNIPROT proteins from Taxon ID: $ans -- \n"
 399 |     curl -o uniprot_${ans}.fasta.gz "https://www.uniprot.org/uniprot/?query=taxonomy:${ans}&format=fasta&compress=yes&include=no"
 400 |     gunzip uniprot_${ans}.fasta.gz
 401 |     date -u >.lastrun.txt
 402 |     uni_c
 403 | }
 404 | uniprot_meta () {
 405 |     myuni=$( pwd )
 406 |     echo -e "\n\t -- TransPi uses a custom protein database (one of many) from UNIPROT for the annotation -- \n"
 407 |     echo "
 408 |         Options available:
 409 | 
 410 |             1- Download metazoan proteins from UNIPROT
 411 | 
 412 |             2- Provide the PATH of my DB
 413 | 
 414 |             3- Provide UNIPROT Taxon ID
 415 | 
 416 |             4- Skip for now
 417 | 
 418 |     "
 419 |     echo -e -n "\t Which option you want? "
 420 |     read ans
 421 |     case $ans in
 422 |         1)
 423 |             echo -e "\n\n\t -- Downloading current metazoan protein dataset from UNIPROT -- \n"
 424 |             echo -e "\n\t -- This could take a couple of minutes depending on connection. Please wait -- \n"
 425 |             curl -o uniprot_metazoa_33208.fasta.gz "https://www.uniprot.org/uniprot/?query=taxonomy:33208&format=fasta&compress=yes&include=no"
 426 |             echo -e "\n\t -- Uncompressing uniprot_metazoa_33208.fasta.gz ... -- \n"
 427 |             gunzip uniprot_metazoa_33208.fasta.gz
 428 |             date -u >.lastrun.txt
 429 |             uni_c
 430 |         ;;
 431 |         2)
 432 |             uniprot_user_DB
 433 |         ;;
 434 |         3)
 435 |             uniprot_taxon_DB
 436 |         ;;
 437 |         4)
 438 |             echo -e "\n\t -- Skipping UNIPROT DB -- \n"
 439 |         ;;
 440 |         *)
 441 |             echo -e "\n\t\e[31m -- Wrong option. Try again --\e[39m\n"
 442 |             uniprot_meta
 443 |         ;;
 444 |     esac
 445 | }
 446 | uniprot_c () {
 447 |     #Check UNIPROT
 448 |     cd $mypwd
 449 |     if [ ! -d DBs/uniprot_db/ ];then
 450 |         echo -e "\n\t -- Creating directory for the UNIPROT database --\n"
 451 |         mkdir -p DBs/uniprot_db/
 452 |         cd DBs/uniprot_db/
 453 |         uniprot_meta
 454 |     elif [ -d DBs/uniprot_db/ ];then
 455 |         cd DBs/uniprot_db/
 456 |         myuni=$( pwd )
 457 |         echo -e "\n\t -- UNIPROT database directory found at: $myuni -- \n"
 458 |         myfasta=$( ls -1 | grep -v ".gz" | egrep ".fasta|.fa" | wc -l )
 459 |         myfastagz=$( ls -1 | egrep ".fasta.gz|.fa.gz" | wc -l )
 460 |         if [ $myfasta -eq 0 ] && [ $myfastagz -eq 0 ];then
 461 |             echo -e "\n\t -- Directory \"$myuni\" is empty --\n"
 462 |             uniprot_meta
 463 |         else
 464 |             echo -e "\n\t -- Here is the list of UNIPROT files found at: $myuni -- \n"
 465 |             uni_c
 466 |         fi
 467 |     fi
 468 | }
 469 | java_c () {
 470 | 	export NXF_VER=21.04.1 && curl -s https://get.nextflow.io | bash 2>.error_nextflow
 471 | 	check_err=$( head -n 1 .error_nextflow | grep -c "java: command not found" )
 472 | 	if [ $check_err -eq 1 ];then
 473 | 		echo -e "\n\t\e[31m -- ERROR: Please install Java 1.8 (or later). Requirement for Nextflow --\e[39m\n"
 474 | 		exit 0
 475 | 	fi
 476 | 	rm .error_nextflow
 477 | }
 478 | nextflow_c () {
 479 |     #Check Nextflow
 480 |     cd $mypwd
 481 |     check_next=$( command -v nextflow | wc -l )
 482 |     if [ $check_next -eq 1 ];then
 483 |         echo -e "\n\t -- Nextflow is installed -- \n"
 484 |     elif [ $check_next -eq 0 ];then
 485 | 	check_next=$( ls -1 | grep -v "nextflow.config" | grep -c "nextflow" )
 486 |         if [ $check_next -eq 1 ];then
 487 |             echo -e "\n\t -- Nextflow is installed -- \n"
 488 | 	    else
 489 |             echo -e -n "\n\t    Do you want to install Nextflow? (y or n): "
 490 |             read ans
 491 |             case $ans in
 492 |                 [yY] | [yY][eE][sS])
 493 |                     echo -e "\n\t -- Downloading Nextflow ... -- \n"
 494 |                     java_c
 495 | 		    		echo -e "\n\t -- Nextflow is now installed on $mypwd (local installation) -- \n"
 496 |                 ;;
 497 |                 [nN] | [nN][oO])
 498 |                     echo -e "\n\n\t\e[31m -- ERROR: Download and Install Nextflow. Then rerun the pre-check  --\e[39m\n"
 499 |                     exit 0
 500 |                 ;;
 501 |                 *)
 502 |                     echo -e "\n\n\t\e[31m -- Yes or No answer not specified. Try again --\e[39m\n"
 503 |                     nextflow_c
 504 |                 ;;
 505 |             esac
 506 | 	    fi
 507 |     fi
 508 | }
 509 | evi_c () {
 510 | 	cd ${confDir}
 511 |     check_evi=$( command -v tr2aacds.pl | wc -l )
 512 |     if [ $check_evi -eq 0 ];then
 513 |         if [ ! -d ${confDir}/scripts/evigene/ ];then
 514 |             echo -e "\n\t -- EvidentialGene is not installed -- \n"
 515 |             mkdir -p ${confDir}/scripts && cd ${confDir}/scripts
 516 |             echo -e "\n\t -- Downloading EvidentialGene -- \n"
 517 |             git clone https://github.com/rivera10/TP-evigene.git
 518 |             mv TP-evigene/evigene . && rm -rf TP-evigene/
 519 |             echo -e "\n\t -- Done with EvidentialGene -- \n"
 520 |         else
 521 |             echo -e "\n\t -- EvidentialGene directory was found at ${confDir}/scripts (local installation) -- \n"
 522 |         fi
 523 |     elif [ $check_evi -eq 1 ];then
 524 |         echo -e "\n\t -- EvidentialGene is already installed and in the PATH  -- \n"
 525 |     fi
 526 | }
 527 | buildsql_c () {
 528 |     cd ${mypwd}
 529 |     if [ -d DBs/sqlite_db/ ];then
 530 |         cd DBs/sqlite_db/
 531 |     else
 532 |         mkdir -p DBs/sqlite_db/
 533 |         cd DBs/sqlite_db/
 534 |     fi
 535 | }
 536 | condaTrinotate () {
 537 |     echo -e "\n\t -- Creating Trinotate conda environment -- \n"
 538 |     conda create --mkdir --yes --quiet -n TPtrinotate -c conda-forge bioconda::trinotate=3.2.2=pl5262hdfd78af_0
 539 |     source ~/.bashrc
 540 |     conda activate TPtrinotate
 541 |     echo -e "\n\t -- Done with Trinotate conda environment -- \n"
 542 | }
 543 | condaTrinotateEnd () {
 544 |     conda deactivate
 545 |     conda remove -n TPtrinotate --all -y
 546 | }
 547 | trisql_container () {
 548 |     if [ ! -e *.sqlite ];then
 549 |         echo -e "\n\n\t -- Custom sqlite database for Trinotate is not installed -- \n"
 550 |         echo -e "\n\t -- This could take a couple of minutes depending on connection. Please wait -- \n"
 551 |         rm -rf *
 552 |         wget https://github.com/Trinotate/Trinotate/archive/Trinotate-v3.2.2.tar.gz
 553 |         tar -xf Trinotate-v3.2.2.tar.gz
 554 |         mv Trinotate-Trinotate-v3.2.2/ Trinotate_build_scripts/
 555 |         ./Trinotate_build_scripts/admin/Build_Trinotate_Boilerplate_SQLite_db.pl Trinotate
 556 |         rm uniprot_sprot.dat.gz Pfam-A.hmm.gz
 557 |         date -u >.lastrun.txt
 558 |     elif [ -e *.sqlite ];then
 559 |         echo -e "\n\t -- Custom sqlite database for Trinotate found at "${mypwd}/DBs/sqlite_db" -- \n"
 560 |         DB=$( if [ -f ${mypwd}/DBs/sqlite_db/.lastrun.txt ];then cat .lastrun.txt;else echo "N/A";fi )
 561 |         echo -e "\n\t -- Databases (PFAM,SwissProt,EggNOG,GO) last update: ${DB} --\n "
 562 |     fi
 563 | }
 564 | trisql_c () {
 565 |     source ~/.bashrc
 566 |     check_conda=$( command -v conda )
 567 |     if [ "$check_conda" == "" ];then
 568 |         echo -e "\n\t\e[31m -- Looks like conda is not installed--\e[39m\n"
 569 |         exit 0
 570 |     fi
 571 |     if [ ! -e *.sqlite ];then
 572 |         echo -e "\n\t -- Custom sqlite database for Trinotate is not installed -- \n"
 573 |         echo -e "\n\t -- This could take a couple of minutes depending on connection. Please wait -- \n"
 574 |         condaRoot=$( conda info --json | grep "CONDA_ROOT" | cut -f 2 -d ":" | tr -d "," | tr -d " " | tr -d "\"" )
 575 |         if [ -f ${condaRoot}/etc/profile.d/conda.sh ];then
 576 |             source ${condaRoot}/etc/profile.d/conda.sh
 577 |             condaTrinotate
 578 |             check_sql=$( command -v Build_Trinotate_Boilerplate_SQLite_db.pl | wc -l )
 579 |             if [ $check_sql -eq 0 ];then
 580 |                 echo -e "\n\t -- Script \"Build_Trinotate_Boilerplate_SQLite_db.pl\" from Trinotate cannot be found -- \n"
 581 |                 echo -e "\n\t\e[31m -- Verify your conda installation --\e[39m\n"
 582 |                 exit 0
 583 |             elif [ $check_sql -eq 1 ];then
 584 |                 Build_Trinotate_Boilerplate_SQLite_db.pl Trinotate
 585 |                 rm uniprot_sprot.dat.gz Pfam-A.hmm.gz
 586 |                 date -u >.lastrun.txt
 587 |                 condaTrinotateEnd
 588 |             fi
 589 |         fi
 590 |     elif [ -e *.sqlite ];then
 591 |         echo -e "\n\t -- Custom sqlite database for Trinotate found at "${mypwd}/DBs/sqlite_db" -- \n"
 592 |         DB=$( if [ -f ${mypwd}/DBs/sqlite_db/.lastrun.txt ];then cat .lastrun.txt;else echo "N/A";fi )
 593 |         echo -e "\n\t -- Databases (PFAM,SwissProt,EggNOG,GO) last update: ${DB} --\n "
 594 |     fi
 595 | }
 596 | pfam_c() {
 597 |     #Check PFAM files
 598 |     cd $mypwd
 599 |     if [ ! -d DBs/hmmerdb/ ];then
 600 |         echo -e "\n\t -- Creating directory for the HMMER database --\n"
 601 |         mkdir -p DBs/hmmerdb/
 602 |         cd DBs/hmmerdb/
 603 |         echo -e "-- Downloading Pfam-A files ... --\n"
 604 |         wget ftp://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/Pfam-A.hmm.gz
 605 |         echo -e "-- Preparing Pfam-A files ... --\n"
 606 |         gunzip Pfam-A.hmm.gz
 607 |         date -u >.lastrun.txt
 608 |     elif [ -d DBs/hmmerdb/ ];then
 609 |         echo -e "\n\t -- Directory for the HMMER database is present --\n"
 610 |         cd DBs/hmmerdb/
 611 |         if [ -f Pfam-A.hmm ];then
 612 |             echo -e "\n\t -- Pfam file is present and ready to be used --\n"
 613 |             DB=$( if [ -f ${mypwd}/DBs/hmmerdb/.lastrun.txt ];then cat .lastrun.txt;else echo "N/A";fi )
 614 |             echo -e "\n\t -- Pfam last update: ${DB} --\n"
 615 |         else
 616 |             echo -e "-- Downloading Pfam-A files ... --\n"
 617 |             wget ftp://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/Pfam-A.hmm.gz
 618 |             echo -e "-- Preparing Pfam-A files ... --\n"
 619 |             gunzip Pfam-A.hmm.gz
 620 |             date -u >.lastrun.txt
 621 |         fi
 622 |     fi
 623 | }
 624 | pfam_u() {
 625 |     cd $installDir
 626 |     if [ ! -d DBs/hmmerdb/ ];then
 627 |         echo -e "\n\t -- Creating directory for the HMMER database --\n"
 628 |         mkdir -p DBs/hmmerdb/
 629 |         cd DBs/hmmerdb/
 630 |         echo -e "-- Downloading Pfam-A files ... --\n"
 631 |         wget ftp://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/Pfam-A.hmm.gz
 632 |         echo -e "-- Preparing Pfam-A files ... --\n"
 633 |         gunzip Pfam-A.hmm.gz
 634 |         date -u >.lastrun.txt
 635 |     elif [ -d DBs/hmmerdb/ ];then
 636 |         echo -e "\n\t -- Directory for the HMMER database is present --\n"
 637 |         cd DBs/hmmerdb/
 638 |         rm -rf *
 639 |         echo -e "-- Downloading Pfam-A files ... --\n"
 640 |         wget ftp://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/Pfam-A.hmm.gz
 641 |         echo -e "-- Preparing Pfam-A files ... --\n"
 642 |         gunzip Pfam-A.hmm.gz
 643 |         date -u >.lastrun.txt
 644 |     fi
 645 | }
 646 | sqld(){
 647 |     rm -rf *
 648 |     source ~/.bashrc
 649 |     check_conda=$( command -v conda )
 650 |     if [ "$check_conda" == "" ];then
 651 |         echo -e "\n\t\e[31m -- Looks like conda is not installed --\e[39m\n"
 652 |         echo -e "\n\t\e[31m -- Install conda and rerun this script --\e[39m\n"
 653 |         exit 0
 654 |     fi
 655 |     if [ ! -e *.sqlite ];then
 656 |         echo -e "\n\t -- Custom sqlite database for Trinotate is not installed -- \n"
 657 |         echo -e -n "\n\t    Do you want to install the custom sqlite database? (y or n): "
 658 |         read ans
 659 |         case $ans in
 660 |             [yY] | [yY][eE][sS])
 661 |                 condaRoot=$( conda info --json | grep "CONDA_ROOT" | cut -f 2 -d ":" | tr -d "," | tr -d " " | tr -d "\"" )
 662 |                 if [ -f ${condaRoot}/etc/profile.d/conda.sh ];then
 663 |                     source ${condaRoot}/etc/profile.d/conda.sh
 664 |                     conda activate TransPi
 665 |                     check_sql=$( command -v Build_Trinotate_Boilerplate_SQLite_db.pl | wc -l )
 666 |                     if [ $check_sql -eq 0 ];then
 667 |                         echo -e "\n\t -- Script \"Build_Trinotate_Boilerplate_SQLite_db.pl\" from Trinotate cannot be found -- \n"
 668 |                         echo -e "\n\t\e[31m -- Verify your conda installation --\e[39m\n"
 669 |                         exit 0
 670 |                     elif [ $check_sql -eq 1 ];then
 671 |                         echo -e "\n\t -- This could take a couple of minutes depending on connection. Please wait -- \n"
 672 |                         Build_Trinotate_Boilerplate_SQLite_db.pl Trinotate
 673 |                         rm uniprot_sprot.dat.gz Pfam-A.hmm.gz
 674 |                         date -u >.lastrun.txt
 675 |                     fi
 676 |                 fi
 677 |             ;;
 678 |             [nN] | [nN][oO])
 679 |                 echo -e "\n\n\t\e[31m -- ERROR: Generate the custom trinotate sqlite database at "${mypwd}/DBs/sqlite_db". Then rerun the pre-check  --\e[39m\n"
 680 |                 exit 0
 681 |             ;;
 682 |             *)
 683 |                 echo -e "\n\n\t\e[31m -- Yes or No answer not specified. Try again --\e[39m\n"
 684 |                 sqld
 685 |             ;;
 686 |         esac
 687 |     elif [ -e *.sqlite ];then
 688 |         echo -e "\n\t -- Custom sqlite database for Trinotate found at "${mypwd}/DBs/sqlite_db" -- \n"
 689 |         DB=$( if [ -f ${mypwd}/DBs/sqlite_db/.lastrun.txt ];then cat .lastrun.txt;else echo "N/A";fi )
 690 |         echo -e "\n\t -- Databases (PFAM,SwissProt,EggNOG,GO) last update: ${DB} --\n "
 691 |     fi
 692 |     pfam_u
 693 | }
 694 | ddate() {
 695 |     if [ ! -e .lastrun.txt ];then
 696 |         echo -e "\n\t -- No info about when the databse was created -- \n"
 697 |         echo -e -n "\n\t -- Do you want rerun script and update the databases? (y or n): "
 698 |         read ans
 699 |         case $ans in
 700 |             [yY] | [yY][eE][sS])
 701 |                 sqld
 702 |             ;;
 703 |             [nN] | [nN][oO])
 704 |                 echo -e "\n\n\t -- Exiting program -- \n"
 705 |                 exit 0
 706 |             ;;
 707 |             *)
 708 |                 echo -e "\n\n\t\e[31m -- Yes or No answer not specified. Try again --\e[39m\n"
 709 |                 ddate
 710 |             ;;
 711 |         esac
 712 |     elif [ -e .lastrun.txt ];then
 713 |         a=$( cat .lastrun.txt )
 714 |         echo -e "\n\t -- Database was created on \e[32m${a}\e[39m -- \n"
 715 |         echo -e -n "\n\t -- Do you want rerun script and update the databases? (y or n): "
 716 |         read ans
 717 |         case $ans in
 718 |             [yY] | [yY][eE][sS])
 719 |                 sqld
 720 |             ;;
 721 |             [nN] | [nN][oO])
 722 |                 echo -e "\n\n\t -- Exiting program -- \n"
 723 |                 exit
 724 |             ;;
 725 |             *)
 726 |                 echo -e "\n\n\t\e[31m -- Yes or No answer not specified. Try again --\e[39m\n"
 727 |                 ddate
 728 |             ;;
 729 |         esac
 730 |     fi
 731 | }
 732 | downd() {
 733 |     cd $mypwd
 734 |     if [ ! -d DBs/sqlite_db/ ];then
 735 |         echo -e "\n\t -- SQLite directory not found at ${mypwd}/DBs -- \n"
 736 |         echo -e "\n\t -- Creating ${mypwd}/DBs -- \n"
 737 |         mkdir -p $mypwd/DBs/sqlite_db/
 738 |         downd
 739 |     elif [ -d DBs/sqlite_db/ ];then
 740 |         echo -e "\n\t -- SQLite directory found at ${mypwd}/DBs -- \n"
 741 |         cd DBs/sqlite_db/
 742 |         if [ ! -e *.sqlite ];then
 743 |             sqld
 744 |         elif [ -e *.sqlite ];then
 745 |             echo -e "\n\t -- Custom sqlite database for Trinotate is installed -- \n"
 746 |             echo -e "\n\t -- Verifying when scripts was last run -- \n"
 747 |             ddate
 748 |         fi
 749 |     fi
 750 | }
 751 | get_var_container () {
 752 |     cd $mypwd
 753 |     echo "busco4db=$mypwd/DBs/busco_db/$busna" >>${mypwd}/.varfile.sh
 754 |     echo "uniname=$unina" >>${mypwd}/.varfile.sh
 755 |     echo "uniprot=$mypwd/DBs/uniprot_db/$unina" >>${mypwd}/.varfile.sh
 756 |     echo "pfloc=$mypwd/DBs/hmmerdb/Pfam-A.hmm" >>${mypwd}/.varfile.sh
 757 |     echo "pfname=Pfam-A.hmm" >>${mypwd}/.varfile.sh
 758 |     echo "nextflow=$mypwd/nextflow" >>${mypwd}/.varfile.sh
 759 |     echo "Tsql=$mypwd/DBs/sqlite_db/*.sqlite" >>${mypwd}/.varfile.sh
 760 |     echo "unpdate=\"$( if [ -f ${mypwd}/DBs/uniprot_db/.lastrun.txt ];then cat ${mypwd}/DBs/uniprot_db/.lastrun.txt;else echo "N/A";fi )\"" >>${mypwd}/.varfile.sh
 761 |     echo "pfdate=\"$( if [ -f ${mypwd}/DBs/hmmerdb/.lastrun.txt ];then cat ${mypwd}/DBs/hmmerdb/.lastrun.txt;else echo "N/A";fi )\"" >>${mypwd}/.varfile.sh
 762 |     echo "dbdate=\"$( if [ -f ${mypwd}/DBs/sqlite_db/.lastrun.txt ];then cat ${mypwd}/DBs/sqlite_db/.lastrun.txt;else echo "N/A";fi )\"" >>${mypwd}/.varfile.sh
 763 |     vpwd=$mypwd
 764 |     echo "mypwd=$mypwd" >>${vpwd}/.varfile.sh
 765 |     source .varfile.sh
 766 |     echo -e "\n\t -- INFO to use in TransPi --\n"
 767 |     echo -e "\t Installation PATH:\t $mypwd"
 768 |     echo -e "\t BUSCO V4 database:\t $busco4db"
 769 |     echo -e "\t UNIPROT database:\t $uniprot"
 770 |     echo -e "\t UNIPROT last update:\t $unpdate"
 771 |     echo -e "\t PFAM files:\t\t $pfloc"
 772 |     echo -e "\t PFAM last update:\t $pfdate"
 773 |     echo -e "\t SQL DB last update: \t $dbdate"
 774 |     echo -e "\t NEXTFLOW:\t\t $nextflow \n\n"
 775 |     cat ${confDir}/template.nextflow.config | sed -e "s|pipeInstall|pipeInstall=\"${mypwd}\"|" -e "s|busco4db|busco4db=\"${busco4db}\"|" -e "s|uniprot|uniprot=\"${uniprot}\"|" \
 776 |         -e "s|uniname|uniname=\"${uniname}\"|" -e "s|pfloc|pfloc=\"${pfloc}\"|" -e "s|pfname|pfname=\"${pfname}\"|" -e "s|Tsql|Tsql=\"${Tsql}\"|" >nextflow.config
 777 |     rm .varfile.sh
 778 | }
 779 | get_var () {
 780 |     cd $mypwd
 781 |     echo "busco4db=$mypwd/DBs/busco_db/$busna" >>${mypwd}/.varfile.sh
 782 |     echo "uniname=$unina" >>${mypwd}/.varfile.sh
 783 |     echo "uniprot=$mypwd/DBs/uniprot_db/$unina" >>${mypwd}/.varfile.sh
 784 |     echo "pfloc=$mypwd/DBs/hmmerdb/Pfam-A.hmm" >>${mypwd}/.varfile.sh
 785 |     echo "pfname=Pfam-A.hmm" >>${mypwd}/.varfile.sh
 786 |     echo "nextflow=$mypwd/nextflow" >>${mypwd}/.varfile.sh
 787 |     echo "Tsql=$mypwd/DBs/sqlite_db/*.sqlite" >>${mypwd}/.varfile.sh
 788 |     echo "unpdate=\"$( if [ -f ${mypwd}/DBs/uniprot_db/.lastrun.txt ];then cat ${mypwd}/DBs/uniprot_db/.lastrun.txt;else echo "N/A";fi )\"" >>${mypwd}/.varfile.sh
 789 |     echo "pfdate=\"$( if [ -f ${mypwd}/DBs/hmmerdb/.lastrun.txt ];then cat ${mypwd}/DBs/hmmerdb/.lastrun.txt;else echo "N/A";fi )\"" >>${mypwd}/.varfile.sh
 790 |     echo "dbdate=\"$( if [ -f ${mypwd}/DBs/sqlite_db/.lastrun.txt ];then cat ${mypwd}/DBs/sqlite_db/.lastrun.txt;else echo "N/A";fi )\"" >>${mypwd}/.varfile.sh
 791 |     #echo "tenv=$( conda info --json | sed -n '/\"envs\":/,/\],/p' | grep -w "TransPi\"" | tr -d "," | tr -d " " )" >>${mypwd}/.varfile.sh
 792 |     #echo "cenv=$( conda info --json | sed -n '/\"envs\":/,/\],/p' | grep "busco4" | tr -d "," | tr -d " " )" >>${mypwd}/.varfile.sh
 793 |     vpwd=$mypwd
 794 |     echo "mypwd=$mypwd" >>${vpwd}/.varfile.sh
 795 |     source .varfile.sh
 796 |     echo -e "\n\t -- INFO to use in TransPi --\n"
 797 |     echo -e "\t Installation PATH:\t $mypwd"
 798 |     echo -e "\t BUSCO V4 database:\t $busco4db"
 799 |     echo -e "\t UNIPROT database:\t $uniprot"
 800 |     echo -e "\t UNIPROT last update:\t $unpdate"
 801 |     echo -e "\t PFAM files:\t\t $pfloc"
 802 |     echo -e "\t PFAM last update:\t $pfdate"
 803 |     echo -e "\t SQL DB last update: \t $dbdate"
 804 |     echo -e "\t NEXTFLOW:\t\t $nextflow \n\n"
 805 |     cat ${confDir}/template.nextflow.config | sed -e "s|pipeInstall|pipeInstall=\"${mypwd}\"|" -e "s|busco4db|busco4db=\"${busco4db}\"|" -e "s|uniprot|uniprot=\"${uniprot}\"|" \
 806 |         -e "s|uniname|uniname=\"${uniname}\"|" -e "s|pfloc|pfloc=\"${pfloc}\"|" -e "s|pfname|pfname=\"${pfname}\"|" -e "s|Tsql|Tsql=\"${Tsql}\"|" >nextflow.config
 807 |     rm .varfile.sh
 808 | }
 809 | get_var_user() {
 810 |     cd $mypwd
 811 |     echo "busco4db=${busco4db}" >>${mypwd}/.varfile.sh
 812 |     echo "uniname=${uniname}" >>${mypwd}/.varfile.sh
 813 |     echo "uniprot=${uniprot}" >>${mypwd}/.varfile.sh
 814 |     echo "pfloc=${pfloc}" >>${mypwd}/.varfile.sh
 815 |     echo "pfname=${pfname}" >>${mypwd}/.varfile.sh
 816 |     echo "nextflow=$mypwd/nextflow" >>${mypwd}/.varfile.sh
 817 |     echo "Tsql=${Tsql}" >>${mypwd}/.varfile.sh
 818 |     vpwd=$mypwd
 819 |     echo "mypwd=$mypwd" >>${vpwd}/.varfile.sh
 820 |     source .varfile.sh
 821 |     echo -e "\n\t -- INFO to use in TransPi --\n"
 822 |     echo -e "\t Installation PATH:\t $mypwd"
 823 |     echo -e "\t Using your DBs\t\t"
 824 |     echo -e "\t BUSCO V4 database:\t $busco4db"
 825 |     echo -e "\t UNIPROT database:\t $uniprot"
 826 |     echo -e "\t PFAM files:\t\t $pfloc"
 827 |     echo -e "\t NEXTFLOW:\t\t $nextflow \n\n"
 828 |     cat ${confDir}/template.nextflow.config | sed -e "s|pipeInstall|pipeInstall=\"${mypwd}\"|" -e "s|busco4db|busco4db=\"${busco4db}\"|" -e "s|uniprot|uniprot=\"${uniprot}\"|" \
 829 |         -e "s|uniname|uniname=\"${uniname}\"|" -e "s|pfloc|pfloc=\"${pfloc}\"|" -e "s|pfname|pfname=\"${pfname}\"|" -e "s|Tsql|Tsql=\"${Tsql}\"|" >nextflow.config
 830 |     rm .varfile.sh
 831 | }
 832 | container_pipeline_setup() {
 833 |     if [ "${userVar}" == "y" ];then
 834 |         nextflow_c
 835 |         evi_c
 836 |         echo -e "\n\t -- If no \"ERROR\" was found and all the neccesary databases are installed proceed to run TransPi -- \n"
 837 |         get_var_user
 838 |     else
 839 |         echo -e "\n\t -- Installing databases only -- \n"
 840 |         dir_c
 841 |         bus_c
 842 |         uniprot_c
 843 |         nextflow_c
 844 |         evi_c
 845 |         buildsql_c
 846 |         trisql_container
 847 |         pfam_c
 848 |         echo -e "\n\t -- If no \"ERROR\" was found and all the neccesary databases are installed proceed to run TransPi -- \n"
 849 |         get_var_container
 850 |     fi
 851 | }
 852 | conda_pipeline_setup() {
 853 |     if [ "${userVar}" == "y" ];then
 854 |         echo -e "\n\t -- Installing conda --\n"
 855 |         conda_only
 856 |         nextflow_c
 857 |         evi_c
 858 |         echo -e "\n\t -- If no \"ERROR\" was found and all the neccesary databases are installed proceed to run TransPi -- \n"
 859 |         get_var_user
 860 |     else
 861 |         echo -e "\n\t -- Installing conda and the databases -- \n"
 862 |         conda_only
 863 |         dir_c
 864 |         bus_c
 865 |         uniprot_c
 866 |         nextflow_c
 867 |         evi_c
 868 |         buildsql_c
 869 |         trisql_c
 870 |         pfam_c
 871 |         echo -e "\n\t -- If no \"ERROR\" was found and all the neccesary databases are installed proceed to run TransPi -- \n"
 872 |         get_var
 873 |     fi
 874 | }
 875 | user_buscoDBv4(){
 876 |     echo -e "\n\t -- PATH where to locate your BUSCO v4 file -- "
 877 |     echo -e "\n\t -- Example: /home/ubuntu/myDB/metazoa_odb10 -- "
 878 |     echo -e -n "\n\t -- Provide the PATH where to locate your BUSCO v4 file: "
 879 |     read -e ans
 880 |     if [ -d ${ans} ];then
 881 |         echo -e "\n\t -- File ${ans} found -- \n"
 882 |         export busco4db=${ans}
 883 |     elif [ ! -d ${ans} ];then
 884 |         echo -e "\n\t\e[31m -- File ${ans} not found -- \e[39m\n"
 885 |         user_buscoDBv4
 886 |     fi
 887 | }
 888 | user_uniDB(){
 889 |     echo -e "\n\t -- PATH where to locate your UNIPROT file -- "
 890 |     echo -e "\n\t -- Example: /home/ubuntu/myDB/uniprot_proteins.fasta -- "
 891 |     echo -e -n "\n\t -- Provide the PATH where to locate your UNIPROT file: "
 892 |     read -e ans
 893 |     if [ -f ${ans} ];then
 894 |         echo -e "\n\t -- File ${ans} found -- \n"
 895 |         export uniprot=${ans}
 896 |         export uniname=$( basename ${ans} )
 897 |     elif [ ! -f ${ans} ];then
 898 |         echo -e "\n\t\e[31m -- File ${ans} not found -- \e[39m\n"
 899 |         user_uniDB
 900 |     fi
 901 | }
 902 | user_pfDB(){
 903 |     echo -e "\n\t -- PATH where to locate your PFAM file -- "
 904 |     echo -e "\n\t -- Example: /home/ubuntu/myDB/Pfam-A.hmm -- "
 905 |     echo -e -n "\n\t -- Provide the PATH where to locate your PFAM file: "
 906 |     read -e ans
 907 |     if [ -f ${ans} ];then
 908 |         echo -e "\n\t -- File ${ans} found -- \n"
 909 |         export pfloc=${ans}
 910 |         export pfname=$( basename ${ans} )
 911 |     elif [ ! -f ${ans} ];then
 912 |         echo -e "\n\t\e[31m -- File ${ans} not found -- \e[39m\n"
 913 |         user_pfDB
 914 |     fi
 915 | }
 916 | user_sqlDB(){
 917 |     echo -e "\n\t -- PATH where to locate your SQL file -- "
 918 |     echo -e "\n\t -- Example: /home/ubuntu/myDB/Trinotate.sqlite -- "
 919 |     echo -e -n "\n\t -- Provide the PATH where to locate your SQL file: "
 920 |     read -e ans
 921 |     if [ -f ${ans} ];then
 922 |         echo -e "\n\t -- File ${ans} found -- \n"
 923 |         export Tsql=${ans}
 924 |     elif [ ! -f ${ans} ];then
 925 |         echo -e "\n\t\e[31m -- File ${ans} not found -- \e[39m\n"
 926 |         user_sqlDB
 927 |     fi
 928 | }
 929 | userDBs(){
 930 |     user_buscoDBv4
 931 |     user_uniDB
 932 |     user_pfDB
 933 |     user_sqlDB
 934 |     userVar=y
 935 | }
 936 | dbs(){
 937 |     echo -e "\n\t -- Either TransPi install the DBs for you or you provide the PATH of the DBs -- \n"
 938 |     echo -e -n "\t Do you want TransPi to handle the DBs installation? (y,n,exit): "
 939 |     read ans
 940 |     case $ans in
 941 |         [yY] | [yY][eE][sS])
 942 |             echo -e "\n\n\t -- TransPi will handle the installation -- \n"
 943 |         ;;
 944 |         [nN] | [nN][oO])
 945 |             echo -e "\n\n\t -- Using your DBs -- \n"
 946 |             userDBs
 947 |         ;;
 948 |         exit)
 949 |             echo -e "\n\n\t -- Exiting --\n"
 950 |         ;;
 951 |         *)
 952 |             echo -e "\n\n\t\e[31m -- Yes or No answer not specified. Try again --\e[39m\n"
 953 |             dbs
 954 |         ;;
 955 |     esac
 956 |     if [ "$1" == "1" ];then
 957 |         conda_pipeline_setup
 958 |     elif [ "$1" == "2" ];then
 959 |         container_pipeline_setup
 960 |     fi
 961 | }
 962 | message(){
 963 |     echo "
 964 | 
 965 |     #########################################################################################
 966 |     #                                                                                       #
 967 |     #                             TransPi precheck script                                   #
 968 |     #                                                                                       #
 969 |     #   Options available:                                                                  #
 970 |     #                                                                                       #
 971 |     #        1- Install conda (if neccesary) and DBs                                        #
 972 |     #                                                                                       #
 973 |     #               Runs of TransPi using conda                                             #
 974 |     #                                                                                       #
 975 |     #        2- Install DBs for containers use                                              #
 976 |     #                                                                                       #
 977 |     #               Runs of TransPi with containers (docker or singularity)                 #
 978 |     #                                                                                       #
 979 |     #        3- Update DBs                                                                  #
 980 |     #                                                                                       #
 981 |     #               SwissProt, PFAM, SQL DB used for annotation (requires conda)            #
 982 |     #                                                                                       #
 983 |     #        4- Exit                                                                        #
 984 |     #                                                                                       #
 985 |     #########################################################################################
 986 | 
 987 |     "
 988 | }
 989 | moption(){
 990 |     echo -e -n "\t Which option you want? "
 991 |     read ans
 992 |     case $ans in
 993 |         1 | 2)
 994 |             dbs $ans
 995 |         ;;
 996 |         3)
 997 |             echo -e "\n\t -- Updating DBs -- \n"
 998 |             downd
 999 |         ;;
1000 |         4)
1001 |             echo -e "\n\t -- Exit -- \n"
1002 |             exit 0
1003 |         ;;
1004 |         *)
1005 |             echo -e "\n\t\e[31m -- Wrong option. Try again --\e[39m\n"
1006 |             moption
1007 |         ;;
1008 |     esac
1009 | }
1010 | main(){
1011 |     if [ "$mypwd" == "" ] || [ "$mypwd" == "-h" ] || [ "$mypwd" == "-help" ] || [ "$mypwd" == "--help" ];then
1012 |         echo -e "\n\t Script for checking the requirements of TransPi \n"
1013 |         echo -e "\t Usage:\n\n\t\t bash precheck_TransPi.sh WORK_PATH \n"
1014 |         echo -e "\t\t\t WORK_PATH = PATH to download requirements and databases used by TransPi \n\n\t\t\t Example: /home/bioinf/run/ \n"
1015 |         exit 0
1016 |     elif [ ! -d "$mypwd" ];then
1017 |         echo -e "\n\t -- Directory "${mypwd}" is not found -- \n"
1018 |         echo -e "\n\t -- Creating "${mypwd}" -- \n"
1019 |         mkdir -p ${mypwd}
1020 |         if [ -d "$mypwd" ];then
1021 |             echo -e "\n\t -- Directory created succesfully -- \n"
1022 |             main
1023 |         else
1024 |             echo -e "\n\t -- Please provide a valid PATH to run TransPi -- \n"
1025 |             exit 0
1026 |         fi
1027 |     elif [ -d "$mypwd" ];then
1028 |         if [ ${mypwd} == "." ];then
1029 |             mypwd=$(pwd)
1030 |             confDir=$(pwd)
1031 |         elif [ ${mypwd} == $(pwd) ]; then
1032 |             confDir=$(pwd)
1033 |         else
1034 |             cd ${mypwd} && mypwd=$(pwd) && cd -
1035 |             confDir=$( dirname ${BASH_SOURCE} )
1036 |             if [ ${confDir} == "." ];then
1037 |                  confDir=$(pwd)
1038 |             else
1039 |                 cd ${confDir} && confDir=$(pwd)
1040 |             fi
1041 |         fi
1042 |         message
1043 |         moption
1044 |     fi
1045 | }
1046 | main
1047 | 


--------------------------------------------------------------------------------