├── .gitmodules ├── pipeline ├── runBismark.sh ├── runProjectPermission.sh ├── runHTSeqStats.sh ├── runBuscoPlant.sh ├── runFastQCCounts.sh ├── runMeme.sh ├── runRmarkdown.sh ├── sortMeRnaV2Stats.sh ├── runGzip.sh ├── runXxhashsum.sh ├── runGetPasaFasta.sh ├── runSamtools_miRNAclusters.sh ├── submit_nextflow_RNAseq.sh ├── runCmd.sh ├── runMinion.sh ├── runQCprot.sh ├── run_ppfinder.sh ├── run_makeblastdb_pglauca.sh ├── runVCFPlot.sh ├── run_makeblastdb_ptaeda.sh ├── run_array_repeatmasker2.sh ├── run_array_repeatmasker.sh ├── runDexSeqCount.sh ├── run_HMMbuild.sh ├── runKnitR.sh ├── runParseMacs2.sh ├── run_ClustalW.sh ├── run_check_offtarget_for_guide_RNAs.sh ├── run_array_repeatmasker2coredump.sh ├── runCreateDEXSeqReference.sh ├── runRemoveBlankLines.sh ├── runSTARGenomeLoad.sh ├── runSTARGenomeRemove.sh ├── runAbyssBloom.sh ├── runPasaSeqclean.sh ├── runMetaxa2.sh ├── runReaper.sh ├── runMuscle.sh ├── runPASA_GFF3_validator.sh ├── runSamtoolsFlagstat.sh ├── runSampleN_SE.sh ├── runPrepInfomap.sh ├── runTrim.sh ├── join_targets.sh ├── runBedToolsGCov.sh ├── runTremulaTranslate.sh ├── runThresholdNetwork.sh ├── runDiamondMakedb.sh ├── runAsArray.sh ├── runSamtoolsFaidx.sh ├── runDISCOVARdeNovo.sh ├── runSampleN.sh ├── runSamtoolsIdxstats.sh ├── runPicardSamToFastq.sh ├── runFRC.sh ├── runBwaIndex.sh ├── runSortmernaDennis.sh ├── runSeidrThreshold.sh ├── runGATK_CombineGVCFs.sh ├── runPearsonSpearmanCorrelation.sh ├── runInfomap.sh ├── runCLR.sh ├── runSRnaWorkBenchFilter.sh ├── runGenomeTools.sh ├── runBedToolsBamToFastq.sh ├── runBESST.sh ├── runMiRBase_SS.sh ├── runGMAPIndex.sh ├── runFastQC.sh ├── runBamSubset.sh ├── runPasaLoadAnnotation.sh ├── runGeneNetworkRPreparation.sh ├── runCPC2.sh ├── runRepeatMasker.sh ├── runMultiQC.sh ├── runAnova.sh ├── runSSPACE-LR.sh ├── runStarFusion.sh ├── runCuffcompare.sh ├── runFusionInspector.sh ├── runTrinityTransDecoder.sh ├── runBAMtoCRAM.sh ├── runBedToolsIntersect.sh ├── runSamtoolsMerge.sh ├── runVsearchMergePairs.sh ├── runFastQCMultiviewer.sh ├── runSraFastqDump.sh ├── runMinimap2.sh ├── runFastQValidator.sh ├── runMmseq.sh ├── runRepeatModeler.sh ├── runCNCI.sh ├── runPicardCreateSequenceDictionary.sh ├── runMarkDuplicates.sh ├── runSUPPA2PsiPerEvent.sh ├── runSUPPA2PsiPerIsoform.sh ├── runSalmonStats.sh ├── runGeneNetworkRAggregate.sh ├── runGeneNetworkRThreshold.sh ├── runPyfasta.sh ├── runSeidrBackbone.sh ├── runBedToolsSubtract.sh ├── runITSx.sh ├── runTaxonomicClassification.sh ├── runKmergenie.sh ├── runKallistoStats.sh ├── runSamtoolsIndex.sh ├── runRePair.sh ├── runJellyfishHisto.sh ├── runBgzipTabix.sh ├── runPLEK.sh ├── runGENIE3.sh ├── runTaxonomyUpdate.sh ├── runNutil.sh ├── runJellyfishBc.sh ├── runUsearch.sh ├── runCuffmerge.sh ├── runGATK_SplitNCigarReads.sh ├── runGROM.sh ├── runCleanTrinity.sh ├── runGatkRealignerTargetCreator.sh ├── runKallistoIndex.sh ├── runPicardAddOrReplaceReadGroups.sh ├── runTIGLM.sh ├── runAssemblathonStat.sh ├── runBlastFormatDb.sh ├── runSeidrAggregate.sh ├── runGATK_GenotypeGVCFs.sh ├── runNarromi.sh ├── runGATK_CombineVariants.sh ├── runPlaac.sh ├── runSnpEff.sh ├── runTrimmomaticSeStats.sh ├── runBedToolsCoverage.sh ├── runGeneNetworkRRun.sh ├── runGATK_VariantFiltration.sh ├── run_psf.sh ├── runTrimmomaticStats.sh ├── runGatkFastaAlternateReferenceMaker.sh ├── runJBrowse2.sh ├── runBamtoFastQ.sh ├── runSTARStats.sh ├── runDemultiplex.sh ├── runUpdateNCBI.sh ├── runSeidrRoc.sh ├── runSamtoolsSort.sh ├── runShortstack.sh ├── runPicardMarkDuplicatesWithMateCigar.sh ├── runSamtools_split_primary.sh ├── runGATK_IndelRealigner.sh └── runSwestoreSync.sh ├── VERSION.info ├── src ├── R │ ├── rmd.R │ ├── percentile.R │ ├── GeneNet.R │ ├── try opt.R │ ├── getCoverage.R │ ├── ARACNE.R │ ├── parseUniRef90IDs.R │ ├── reverseFastq.R │ ├── misoPePlot.R │ ├── convertTemplates.R │ ├── mailR.R │ ├── WgcnaClusterPlot.R │ ├── plotSft.R │ ├── GC_percent_from_fasta.R │ ├── extractGff3Subset.R │ ├── rfam5SKrakenPrep.R │ ├── gopher2-example.R │ ├── deviseSequenceFromGFF.R │ ├── createGeneAnnotation.R │ ├── enaCsvEdit.R │ ├── updateAspenVcfv1.0.R │ └── plotVCFQual.R ├── bash │ ├── try-catch.sh │ ├── updateTaxonomySqlite.sh │ ├── functions.sh │ └── seidr-aggregate-kebnekaise.sh └── python │ └── fastQCmultiviewer.py ├── templates ├── R │ ├── bulogo2.png │ ├── style.css │ ├── empty.R │ ├── seidrPageRank.R │ ├── footer.html │ └── header.html └── bash │ ├── submitSeidrBackbone.sh │ ├── submitSeidrAggregate.sh │ ├── submitSeidrRoc.sh │ ├── runTemplate.sh │ └── template.sh ├── container ├── apptainer │ ├── macs3.def │ ├── tagdust.def │ ├── plink20.def │ ├── kallisto.def │ ├── plink19.def │ ├── swarm.def │ ├── MCScanX.def │ ├── casoffinder.def │ ├── seidr.def │ ├── angsd.def │ └── velocyto.def └── docker │ └── Dockerfile_diamond ├── nextflow ├── config │ └── upscb.config └── template │ ├── rnaseq_spruce_v2.json │ ├── rnaseq_tomato_v4.json │ ├── rnaseq_arabidopsis_araport11.json │ ├── rnaseq_microtom_xue.json │ ├── rnaseq_T89_v1.json │ ├── rnaseq_microtom_shirasawa.json │ ├── rnaseq_lupin_v2.json │ └── rnaseq_lupin_v1.json └── .gitignore /.gitmodules: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pipeline/runBismark.sh: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /VERSION.info: -------------------------------------------------------------------------------- 1 | UPSCb-common: 1.2.1 2 | -------------------------------------------------------------------------------- /src/R/rmd.R: -------------------------------------------------------------------------------- 1 | rmd <- function(x){ 2 | mean(abs(x-mean(x)))/mean(x) 3 | } 4 | -------------------------------------------------------------------------------- /templates/R/bulogo2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UPSCb/UPSCb-common/HEAD/templates/R/bulogo2.png -------------------------------------------------------------------------------- /src/R/percentile.R: -------------------------------------------------------------------------------- 1 | "percentile" <- function(x,probs=seq(0,1,.01),...){ 2 | quantile(x,probs=probs) 3 | } -------------------------------------------------------------------------------- /pipeline/runProjectPermission.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sudo find . -type d -exec chmod 771 "{}" \; 4 | sudo find . -type d -exec chmod g+s "{}" \; 5 | sudo find . -type f -exec chmod 664 "{}" \; 6 | 7 | -------------------------------------------------------------------------------- /pipeline/runHTSeqStats.sh: -------------------------------------------------------------------------------- 1 | find . -name "*.txt" | xargs -I {} bash -c 'echo $0 $(grep __no_feature $0) $(grep __alignment_not_unique $0)' {} 2 | find . -name "*.txt" -exec awk 'BEGIN{sum=0}{sum+=$2}END{print sum}' "{}" \; 3 | 4 | -------------------------------------------------------------------------------- /pipeline/runBuscoPlant.sh: -------------------------------------------------------------------------------- 1 | module load bioinfo-tools 2 | module load busco 3 | 4 | # $0 [option] 5 | # option should be -m OGS or -m trans 6 | # only runs on plants 7 | MODE="trans" 8 | cd $out 9 | python3 $BUSCO_PATH/BUSCO_plants.py -o $1 -in $2 -l $BUSCO_DATA/plantae -m $MODE -------------------------------------------------------------------------------- /templates/R/style.css: -------------------------------------------------------------------------------- 1 | body .main-container { 2 | max-width: 98% !important; 3 | margin-left: 0px; 4 | margin-right: 20px; 5 | } 6 | 7 | img { 8 | display:block; 9 | float:none; 10 | margin-left:auto; 11 | margin-right:auto; 12 | } 13 | 14 | .btn-group { display: none; } -------------------------------------------------------------------------------- /pipeline/runFastQCCounts.sh: -------------------------------------------------------------------------------- 1 | find $1 -name "fastqc_data.txt" | xargs -I {} bash -c 'echo $0 $(grep "Total Sequences" $0)' {} 2 | 3 | #find . -name "fastqc_data.txt" | xargs -I {} bash -c 'echo $0 $(grep "Total Sequences" $0)' {} | awk '{id=$1;gsub("\\./|_[1,2]_fastqc.*","",id);print id, $4}' | sort | uniq -------------------------------------------------------------------------------- /pipeline/runMeme.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH --mail-type=all 3 | #SBATCH -p all 4 | #SBATCH -n 8 5 | #SBATCH -t 2-00:00:00 6 | 7 | # module load bioinfo-tools Reaper 8 | set -ex 9 | 10 | meme $1.$SLURM_ARRAY_TASK_ID -dna -oc $2$SLURM_ARRAY_TASK_ID -mod anr -evt 0.05 -maxsize 3500000 -maxw 30 -nmotifs 100 -bfile $3 -p 8 11 | -------------------------------------------------------------------------------- /pipeline/runRmarkdown.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH --mail-type=all 3 | 4 | usage () { 5 | echo "Usage:" 6 | echo "runRmarkdown.sh " 7 | echo 8 | } 9 | 10 | if [ ! $# == 1 -o ! -f $1 ]; then 11 | usage 12 | exit 1 13 | fi 14 | 15 | module load R 16 | Rscript -e "library(rmarkdown); render(commandArgs(TRUE))" $1 17 | -------------------------------------------------------------------------------- /src/R/GeneNet.R: -------------------------------------------------------------------------------- 1 | args <- commandArgs(trailingOnly = TRUE) 2 | setwd(args[1]) 3 | library(GeneNet) 4 | library(data.table) 5 | dat <- fread(args[2]) 6 | out <- args[3] 7 | 8 | pcor.dyn = ggm.estimate.pcor(as.matrix(dat), method = "dynamic") 9 | write.table(abs(pcor.dyn), out, quote = FALSE, 10 | col.names = FALSE, row.names = FALSE, sep = "\t") 11 | -------------------------------------------------------------------------------- /pipeline/sortMeRnaV2Stats.sh: -------------------------------------------------------------------------------- 1 | grep "%" *.log | grep passing | awk -F_ '{print $4"_"$5,$8}' | awk '{print $1,$10}' | sed 's:[(,)]::g' 2 | grep "%" *.log | grep -v passing | grep -v failing | awk -F_ '{print $4"_"$5,$9}' | awk '{print $1,$3}' > file.txt 3 | R dat <- read.delim(sep=" ",file="file.txt",stringsAsFactors = FALSE,header=FALSE) 4 | do.call(rbind,split(dat[,2],dat[,1])) 5 | -------------------------------------------------------------------------------- /pipeline/runGzip.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH -p main -n 1 3 | #SBATCH -t 06:00:00 4 | #SBATCH --mail-type=ALL 5 | 6 | ## stop on error 7 | set -e 8 | 9 | if [ $# == 0 ]; then 10 | echo "This function takes one file as argument" 11 | exit 1 12 | fi 13 | 14 | if [ ! -f $1 ]; then 15 | echo "The provided file: $1 does not exist" 16 | exit 1 17 | fi 18 | 19 | gzip -f $1 20 | 21 | -------------------------------------------------------------------------------- /pipeline/runXxhashsum.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH -p main -n 1 3 | #SBATCH -t 06:00:00 4 | #SBATCH --mail-type=ALL 5 | 6 | ## stop on error 7 | set -e 8 | 9 | if [ $# == 0 ]; then 10 | echo "This function takes one file as argument" 11 | exit 1 12 | fi 13 | 14 | if [ ! -f "$1" ]; then 15 | echo "The provided file: $1 does not exist" 16 | exit 1 17 | fi 18 | 19 | ~/bin/xxhashsum -f "$1" 20 | -------------------------------------------------------------------------------- /pipeline/runGetPasaFasta.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH -p main 3 | #SBATCH -n 1 4 | #SBATCH -t 1:00:00 5 | #SBATCH --mail-type=ALL 6 | 7 | #### 8 | # A runner part of the novel genen and long non coding RNA pipeline 9 | # python get_fasta_seq.py 10 | #### 11 | 12 | ## stop on error but be verbose 13 | set -e 14 | set -x 15 | 16 | python $UPSCb/src/python/novel_genes/get_fasta_seq.py $1 $2 $3 -------------------------------------------------------------------------------- /pipeline/runSamtools_miRNAclusters.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH --mail-type=all 3 | #SBATCH -p main 4 | #SBATCH -n 1 5 | 6 | module load bioinfo-tools 7 | module load samtools/1.3.1 8 | 9 | file=$1 10 | bed=$2 11 | outdir=$3 12 | name=$4 13 | 14 | # extract alignments in miRNA loci regions 15 | # extract columns with sequence names and genomic location 16 | samtools view -L $bed $file | cut -f 1,3,4 > $outdir/$name.miRNA.txt 17 | -------------------------------------------------------------------------------- /templates/R/empty.R: -------------------------------------------------------------------------------- 1 | #' --- 2 | #' title: "CHANGEME" 3 | #' author: "CHANGEME" 4 | #' date: "`r Sys.Date()`" 5 | #' output: 6 | #' html_document: 7 | #' toc: true 8 | #' number_sections: true 9 | #' code_folding: hide 10 | #' --- 11 | #' # Setup 12 | #' * Libraries 13 | suppressPackageStartupMessages({ 14 | 15 | }) 16 | 17 | #' # Session Info 18 | #' ```{r session info, echo=FALSE} 19 | #' sessionInfo() 20 | #' ``` 21 | -------------------------------------------------------------------------------- /src/R/try opt.R: -------------------------------------------------------------------------------- 1 | suppressPackageStartupMessages(library(optparse)) 2 | 3 | Main <- function(){ 4 | ### ================ main 5 | ## define the arguments 6 | option_list <- list( 7 | make_option(c("-op", "--output_prefix"),dest="op", type="character", default="", 8 | help="The output prefix, if wanted")) 9 | opt <- parse_args(OptionParser(option_list=option_list)) 10 | 11 | return(opt$op) 12 | } 13 | Main() 14 | -------------------------------------------------------------------------------- /pipeline/submit_nextflow_RNAseq.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH -p nextflow 3 | #SBATCH -t 72:00:00 4 | #SBATCH -A SLURM_project_Code 5 | #SBATCH -o log_rnaseq.out 6 | #SBATCH -e log_rnaseq.err 7 | 8 | set -eu -o pipefail 9 | 10 | nextflow run nf-core/rnaseq -r 3.19.0 \ 11 | -profile singularity,upscb -c "nextflow/upscb.config" \ 12 | -params-file "nextflow/nf-params.json" \ 13 | -with-trace -with-report "report_rnaseq.html" \ 14 | -resume 15 | -------------------------------------------------------------------------------- /container/apptainer/macs3.def: -------------------------------------------------------------------------------- 1 | BootStrap: docker 2 | From: ubuntu:22.04 3 | 4 | %environment 5 | export LC_ALL=C 6 | 7 | %post 8 | apt-get update && \ 9 | apt-get -y install \ 10 | build-essential \ 11 | python3-dev \ 12 | python3-pip \ 13 | python3 14 | apt-get clean 15 | 16 | cd ~ 17 | pip install numpy scipy scikit-learn hmmlearn Cython cykhash 18 | pip install macs3 19 | pip cache purge 20 | 21 | %runscript 22 | macs3 "$@" 23 | -------------------------------------------------------------------------------- /src/R/getCoverage.R: -------------------------------------------------------------------------------- 1 | suppressPackageStartupMessages(require(GenomicRanges)) 2 | 3 | args <- commandArgs(trailingOnly=TRUE); 4 | 5 | inf <- args[1] 6 | chr <- args[2] 7 | st <- as.integer(args[3]) 8 | en <- as.integer(args[4]) 9 | 10 | load(inf) 11 | 12 | gr <- grep(".GR$",ls(),value=TRUE) 13 | 14 | target <- GRanges(chr,IRanges(st,en)) 15 | res <- sort(subsetByOverlaps(get(gr), target)) 16 | 17 | write.table(as.data.frame(res), sep="\t", quote=FALSE) 18 | -------------------------------------------------------------------------------- /container/apptainer/tagdust.def: -------------------------------------------------------------------------------- 1 | BootStrap: docker 2 | From: ubuntu:18.04 3 | 4 | %environment 5 | export LC_ALL=C 6 | 7 | %post 8 | apt-get update && \ 9 | apt-get -y install \ 10 | build-essential \ 11 | autoconf \ 12 | git 13 | apt-get clean 14 | 15 | cd ~ 16 | git clone https://github.com/TimoLassmann/tagdust.git 17 | cd tagdust 18 | ./autogen.sh 19 | ./configure 20 | make 21 | make check 22 | make install 23 | 24 | %runscript 25 | tagdust "$@" 26 | -------------------------------------------------------------------------------- /container/apptainer/plink20.def: -------------------------------------------------------------------------------- 1 | BootStrap: docker 2 | From: ubuntu:22.04 3 | 4 | %environment 5 | export LC_ALL=C 6 | 7 | %post 8 | apt-get update && \ 9 | apt-get -y install \ 10 | unzip \ 11 | wget 12 | apt-get clean 13 | 14 | cd ~ 15 | wget https://s3.amazonaws.com/plink2-assets/plink2_linux_x86_64_20231123.zip 16 | unzip plink2_linux_x86_64_20231123.zip 17 | rm plink2_linux_x86_64_20231123.zip 18 | cp plink2 /usr/local/bin/ 19 | 20 | %runscript 21 | plink2 "$@" 22 | -------------------------------------------------------------------------------- /container/apptainer/kallisto.def: -------------------------------------------------------------------------------- 1 | BootStrap: docker 2 | From: ubuntu:22.04 3 | 4 | %environment 5 | export LC_ALL=C 6 | 7 | %post 8 | apt-get update && \ 9 | apt-get -y install \ 10 | build-essential \ 11 | cmake \ 12 | zlib1g-dev \ 13 | libhdf5-dev \ 14 | git 15 | apt-get clean 16 | 17 | cd ~ 18 | git clone https://github.com/pachterlab/kallisto.git 19 | cd kallisto 20 | mkdir build 21 | cd build 22 | cmake .. 23 | make 24 | make install 25 | 26 | %runscript 27 | kallisto "$@" 28 | -------------------------------------------------------------------------------- /pipeline/runCmd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH -p main 3 | #SBATCH --mail-type=ALL 4 | 5 | set -ex 6 | 7 | usage(){ 8 | echo >&2 \ 9 | " 10 | Usage: $(basename $0)