├── .gitignore ├── script ├── run_kallistoindex.sh ├── run_genotypingkallisto.sh ├── run_bam2fq.sh ├── write_pers_index.R ├── calc_starindex_params.R ├── run_starindex.sh ├── genotype_kallisto.R ├── write_winners.R ├── run_quantkallisto.sh ├── write_top5_fasta.R ├── run_quantSalmonReads.sh ├── write_final_genotypes.R ├── run_quant.sh ├── run_bam2fq_mhc.sh ├── run_genotyping.sh └── make_index_files.R ├── README.Rmd ├── README.md ├── hlapers └── license.txt /.gitignore: -------------------------------------------------------------------------------- 1 | *.tsv 2 | *.txt 3 | *.pbs 4 | index 5 | fastq 6 | gencode 7 | IMGTHLA 8 | results* 9 | 10 | -------------------------------------------------------------------------------- /script/run_kallistoindex.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | transcripts=$1 4 | out=$2 5 | 6 | kallisto index -i $out $transcripts 7 | -------------------------------------------------------------------------------- /script/run_genotypingkallisto.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR=$( dirname "$0" ) 4 | index=$1 5 | fq1=$2 6 | fq2=$3 7 | out=$4 8 | cpus=$5 9 | 10 | kallisto quant -i $index -t $cpus -o $out $fq1 $fq2 11 | 12 | Rscript $DIR/genotype_kallisto.R $out 13 | -------------------------------------------------------------------------------- /script/run_bam2fq.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | bam=$1 4 | outPrefix=$2 5 | 6 | fq1=${outPrefix}_1.fq.gz 7 | fq2=${outPrefix}_2.fq.gz 8 | 9 | if [[ ! -f "$bam".bai ]]; then 10 | samtools index $bam $bam.bai 11 | fi 12 | 13 | samtools sort -n $bam |\ 14 | samtools fastq -N -1 $fq1 -2 $fq2 -0 /dev/null - 15 | 16 | -------------------------------------------------------------------------------- /script/write_pers_index.R: -------------------------------------------------------------------------------- 1 | library(hlaseqlib) 2 | 3 | opts <- commandArgs(TRUE) 4 | transcripts<- opts[1] 5 | typings <- opts[2] 6 | outPrefix <- opts[3] 7 | 8 | outindex <- paste0(outPrefix, "_index.fa") 9 | 10 | index <- Biostrings::readDNAStringSet(transcripts) 11 | 12 | typings_df <- readr::read_tsv(typings) 13 | 14 | alleles <- unlist(strsplit(unique(typings_df$allele), "-")) 15 | 16 | Biostrings::writeXStringSet(index[alleles], outindex) 17 | -------------------------------------------------------------------------------- /script/calc_starindex_params.R: -------------------------------------------------------------------------------- 1 | suppressPackageStartupMessages(library(Biostrings)) 2 | 3 | opts <- commandArgs(TRUE) 4 | index <- opts[1] 5 | outprefix <- opts[2] 6 | 7 | out <- file.path(outprefix, "indexparams.txt") 8 | 9 | gen <- readDNAStringSet(index) 10 | 11 | genlen <- sum(width(gen)) 12 | 13 | nrefs <- length(gen) 14 | 15 | binbits <- floor(min(18, log2(genlen/nrefs))) 16 | saindex <- floor(min(14, log2(genlen)/2 - 1)) 17 | 18 | writeLines(as.character(c(binbits, saindex)), out) 19 | -------------------------------------------------------------------------------- /script/run_starindex.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR=$( dirname "$0" ) 4 | transcripts=$1 5 | out=$2 6 | threads=$3 7 | 8 | mkdir -p $out 9 | 10 | Rscript $DIR/calc_starindex_params.R $transcripts $out 11 | 12 | binbits=$(awk 'FNR == 1' $out/indexparams.txt) 13 | saindex=$(awk 'FNR == 2' $out/indexparams.txt) 14 | 15 | STAR --runThreadN $threads --runMode genomeGenerate \ 16 | --genomeDir $out --genomeFastaFiles $transcripts \ 17 | --genomeChrBinNbits $binbits --genomeSAindexNbases $saindex 18 | -------------------------------------------------------------------------------- /script/genotype_kallisto.R: -------------------------------------------------------------------------------- 1 | library(hlaseqlib) 2 | suppressPackageStartupMessages(library(dplyr)) 3 | suppressPackageStartupMessages(library(readr)) 4 | 5 | opts <- commandArgs(TRUE) 6 | outPrefix <- opts[1] 7 | 8 | abundance_file <- file.path(outPrefix, "abundance.tsv") 9 | outgenos <- file.path(outPrefix, "genotypes.tsv") 10 | 11 | genos <- abundance_file %>% 12 | read_tsv() %>% 13 | filter(grepl("^IMGT_", target_id)) %>% 14 | mutate(locus = sub("^IMGT_([^\\*]+).+$", "\\1", target_id)) %>% 15 | select(locus, allele = target_id, counts = est_counts, tpm) %>% 16 | hla_genotype(th = 0.15) %>% 17 | filter(!is.na(allele)) 18 | 19 | write_tsv(genos, outgenos) 20 | -------------------------------------------------------------------------------- /script/write_winners.R: -------------------------------------------------------------------------------- 1 | suppressPackageStartupMessages(library(dplyr)) 2 | suppressPackageStartupMessages(library(readr)) 3 | 4 | opts <- commandArgs(TRUE) 5 | top5_quants <- opts[1] 6 | out <- opts[2] 7 | 8 | quants <- top5_quants %>% 9 | read_tsv() %>% 10 | mutate(locus = sub("^IMGT_([^\\*]+).+$", "\\1", Name), 11 | lineage = sub("^IMGT_([^:]+).*$", "\\1", Name)) %>% 12 | select(locus, lineage, allele = Name, counts = NumReads, tpm = TPM) 13 | 14 | winner_alleles <- quants %>% 15 | group_by(locus) %>% 16 | slice(which.max(counts)) %>% 17 | ungroup() %>% 18 | distinct(allele) %>% 19 | pull(allele) 20 | 21 | writeLines(winner_alleles, out) 22 | -------------------------------------------------------------------------------- /script/run_quantkallisto.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR=$( dirname "$0") 4 | hladb=$1 5 | genos=$2 6 | fq1=$3 7 | fq2=$4 8 | outPrefix=$5 9 | cpus=$6 10 | 11 | transcripts=$hladb/transcripts_MHC_HLAsupp.fa 12 | 13 | Rscript $DIR/write_pers_index.R $transcripts $genos $outPrefix 14 | 15 | transcriptsNoHLA=$hladb/transcripts_noHLA.fa 16 | samplehla=${outPrefix}_index.fa 17 | sample_transcripts=${outPrefix}_transcripts.fa 18 | 19 | cat $transcriptsNoHLA $samplehla > $sample_transcripts 20 | 21 | index=${outPrefix}_index 22 | out=${outPrefix}_quant 23 | 24 | kallisto index -i $index $sample_transcripts 25 | 26 | kallisto quant -i $index -t $cpus -o $out --bias $fq1 $fq2 27 | 28 | awk 'FNR == 1 {print $1"\t"$4"\t"$5}' $out/abundance.tsv > ${outPrefix}_hlaquant.tsv 29 | awk '/IMGT/ {print $1"\t"$4"\t"$5}' $out/abundance.tsv >> ${outPrefix}_hlaquant.tsv 30 | 31 | rm $samplehla $sample_transcripts $index 32 | -------------------------------------------------------------------------------- /script/write_top5_fasta.R: -------------------------------------------------------------------------------- 1 | suppressPackageStartupMessages(library(dplyr)) 2 | suppressPackageStartupMessages(library(readr)) 3 | 4 | opts <- commandArgs(TRUE) 5 | quant_file <- opts[1] 6 | gencode <- opts[2] 7 | out <- opts[3] 8 | 9 | index <- Biostrings::readDNAStringSet(gencode) 10 | 11 | imgt_quants <- read_tsv(quant_file) %>% 12 | filter(grepl("^IMGT", Name)) %>% 13 | mutate(lineage = sub("^IMGT_([^:]+).*$", "\\1", Name), 14 | locus = sub("^([^\\*]+).+$", "\\1", lineage)) %>% 15 | select(locus, lineage, allele = Name, est_counts = NumReads, tpm = TPM) 16 | 17 | top_alleles <- imgt_quants %>% 18 | group_by(locus) %>% 19 | top_n(5, est_counts) %>% 20 | ungroup() %>% 21 | group_by(locus, lineage) %>% 22 | filter(tpm/max(tpm) > 0.25) %>% 23 | ungroup() %>% 24 | pull(allele) %>% 25 | unique() 26 | 27 | Biostrings::writeXStringSet(index[top_alleles], out) 28 | -------------------------------------------------------------------------------- /script/run_quantSalmonReads.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR=$( dirname "$0" ) 4 | hladb=$1 5 | genos=$2 6 | fq1=$3 7 | fq2=$4 8 | outPrefix=$5 9 | cpus=$6 10 | 11 | out=${outPrefix}_quant 12 | index=${outPrefix}_index 13 | transcripts=$hladb/transcripts_MHC_HLAsupp.fa 14 | transcriptsNoHLA=$hladb/transcripts_noHLA.fa 15 | samplehla=${outPrefix}_index.fa 16 | sample_transcripts=${outPrefix}_transcripts.fa 17 | 18 | mkdir -p ${outPrefix}_log 19 | 20 | Rscript $DIR/write_pers_index.R $transcripts $genos $outPrefix 21 | 22 | cat $transcriptsNoHLA $samplehla > $sample_transcripts 23 | 24 | salmon index -t $sample_transcripts -i $index 25 | 26 | salmon quant -i $index -l A -1 $fq1 -2 $fq2 -o $out -p $cpus \ 27 | --seqBias --gcBias --posBias 28 | 29 | awk 'FNR == 1 {print $1"\t"$4"\t"$5}' $out/quant.sf > ${outPrefix}_hlaquant.tsv 30 | awk '/IMGT/ {print $1"\t"$4"\t"$5}' $out/quant.sf >> ${outPrefix}_hlaquant.tsv 31 | 32 | rm -r $index $samplehla $sample_transcripts 33 | -------------------------------------------------------------------------------- /script/write_final_genotypes.R: -------------------------------------------------------------------------------- 1 | library(hlaseqlib) 2 | suppressPackageStartupMessages(library(dplyr)) 3 | suppressPackageStartupMessages(library(readr)) 4 | 5 | opts <- commandArgs(TRUE) 6 | transcripts<- opts[1] 7 | quants_1st <- opts[2] 8 | quants_2nd <- opts[3] 9 | outPrefix <- opts[4] 10 | 11 | outgenos <- paste0(outPrefix, "_genotypes.tsv") 12 | 13 | index <- Biostrings::readDNAStringSet(transcripts) 14 | 15 | typings_1st <- quants_1st %>% 16 | read_tsv() %>% 17 | mutate(locus = sub("^IMGT_([^\\*]+).+$", "\\1", Name)) %>% 18 | select(locus, allele = Name, counts = NumReads, tpm = TPM) %>% 19 | group_by(locus) %>% 20 | slice(which.max(counts)) %>% 21 | ungroup() 22 | 23 | if (file.exists(quants_2nd)) { 24 | 25 | typings_2nd <- quants_2nd %>% 26 | read_tsv() %>% 27 | mutate(locus = sub("^IMGT_([^\\*]+).+$", "\\1", Name)) %>% 28 | select(locus, allele = Name, counts = NumReads, tpm = TPM) %>% 29 | group_by(locus) %>% 30 | slice(which.max(counts)) %>% 31 | ungroup() %>% 32 | filter(counts > 0) 33 | 34 | typings_df <- bind_rows(typings_1st, typings_2nd) %>% 35 | arrange(locus) %>% 36 | hla_genotype(th = 0.05) 37 | 38 | } else { 39 | 40 | typings_df <- typings_1st %>% 41 | arrange(locus) %>% 42 | hla_genotype(th = 0.05) 43 | } 44 | 45 | typings_df %>% 46 | filter(!is.na(allele)) %>% 47 | select(locus, allele) %>% 48 | write_tsv(outgenos) 49 | -------------------------------------------------------------------------------- /script/run_quant.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR=$( dirname "$0" ) 4 | hladb=$1 5 | genos=$2 6 | fq1=$3 7 | fq2=$4 8 | outPrefix=$5 9 | cpus=$6 10 | 11 | 12 | transcripts=$hladb/transcripts_MHC_HLAsupp.fa 13 | 14 | Rscript $DIR/write_pers_index.R $transcripts $genos $outPrefix 15 | 16 | transcriptsNoHLA=$hladb/transcripts_noHLA.fa 17 | samplehla=${outPrefix}_index.fa 18 | sample_transcripts=${outPrefix}_transcripts.fa 19 | 20 | cat $transcriptsNoHLA $samplehla > $sample_transcripts 21 | 22 | index=${outPrefix}_index 23 | bampers=${outPrefix}_Aligned.out.bam 24 | out=${outPrefix}_quant 25 | 26 | mkdir -p $index 27 | mkdir -p ${outPrefix}_log 28 | 29 | STAR --runThreadN $cpus --runMode genomeGenerate --genomeDir $index\ 30 | --genomeFastaFiles $sample_transcripts\ 31 | --genomeChrBinNbits 11 --genomeSAindexNbases 13\ 32 | --outFileNamePrefix ${index}_ 33 | 34 | STAR --runMode alignReads --runThreadN $cpus --genomeDir $index\ 35 | --readFilesIn $fq1 $fq2 --readFilesCommand zcat\ 36 | --outFilterMismatchNmax 999\ 37 | --outFilterMismatchNoverReadLmax 0.04\ 38 | --outFilterMultimapScoreRange 1\ 39 | --outFilterMultimapNmax 150\ 40 | --winAnchorMultimapNmax 300\ 41 | --alignIntronMax 0\ 42 | --alignEndsType Local\ 43 | --outSAMprimaryFlag AllBestScore\ 44 | --outSAMtype BAM Unsorted\ 45 | --outFileNamePrefix ${outPrefix}_ 46 | 47 | salmon quant -t $sample_transcripts -l A -a $bampers -o $out -p $cpus \ 48 | --seqBias --gcBias --posBias 49 | 50 | awk 'FNR == 1 {print $1"\t"$4"\t"$5}' $out/quant.sf > ${outPrefix}_hlaquant.tsv 51 | awk '/IMGT/ {print $1"\t"$4"\t"$5}' $out/quant.sf >> ${outPrefix}_hlaquant.tsv 52 | 53 | mv ${outPrefix}_*Log.* ${outPrefix}_log/ 54 | 55 | rm -r $bampers $index $sample_transcripts $samplehla ${outPrefix}_SJ* 56 | -------------------------------------------------------------------------------- /script/run_bam2fq_mhc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | bam=$1 4 | mhccoords=$2 5 | outPrefix=$3 6 | dosorting=$4 7 | 8 | mhc=$(cat $mhccoords) 9 | 10 | mapbam=${outPrefix}_map.bam 11 | unmapbam=${outPrefix}_unmap.bam 12 | tmpbam=${outPrefix}_tmp.bam 13 | sortbam=${outPrefix}_tmpsorted.bam 14 | tmpfq1=${outPrefix}_tmp_1.fq 15 | tmpfq2=${outPrefix}_tmp_2.fq 16 | reads1tmp=${outPrefix}_reads1.tmp 17 | reads2tmp=${outPrefix}_reads2.tmp 18 | reads=${outPrefix}_reads 19 | reads1=${outPrefix}_reads1 20 | reads2=${outPrefix}_reads2 21 | finalfq1=${outPrefix}_mhc_unmap_1.fq 22 | finalfq2=${outPrefix}_mhc_unmap_2.fq 23 | 24 | if [[ "$dosorting" -eq 1 ]]; then 25 | echo "Sorting BAM file..." 26 | samtools sort -o $sortbam $bam 27 | else 28 | sortbam=$bam 29 | fi 30 | 31 | echo "Extracting MHC and unmapped reads from BAM..." 32 | 33 | if [[ ! -f "$sortbam".bai ]]; then 34 | samtools index $sortbam ${sortbam}.bai 35 | fi 36 | 37 | samtools view $sortbam $mhc -b -o $mapbam 38 | samtools view -F 0x2 $sortbam -b -o $unmapbam 39 | samtools merge $tmpbam $mapbam $unmapbam 40 | 41 | if [[ -f "${outPrefix}"_tmpsorted.bam ]]; then 42 | rm ${outPrefix}_tmpsorted.bam 43 | fi 44 | 45 | samtools sort -n $tmpbam | samtools fastq -1 $tmpfq1 -2 $tmpfq2 -0 /dev/null - 46 | 47 | sed -n '1~4p' $tmpfq1 | sed 's|^@||' | sed 's|/1$||'| sort > $reads1tmp 48 | sed -n '1~4p' $tmpfq2 | sed 's|^@||' | sed 's|/2$||'| sort > $reads2tmp 49 | 50 | comm -12 $reads1tmp $reads2tmp | sort -V | uniq > $reads 51 | 52 | if [[ $(head -n1 $tmpfq1) =~ /1$ ]] && [[ $(head -n1 $tmpfq2) =~ /2$ ]]; then 53 | 54 | awk '{ print $0 "/1" }' $reads > $reads1 55 | awk '{ print $0 "/2" }' $reads > $reads2 56 | 57 | else 58 | 59 | cp $reads $reads1 60 | cp $reads $reads2 61 | 62 | fi 63 | 64 | echo "Writing fastq files..." 65 | 66 | seqtk subseq $tmpfq1 $reads1 > $finalfq1 67 | seqtk subseq $tmpfq2 $reads2 > $finalfq2 68 | 69 | rm $mapbam $unmapbam $tmpbam $tmpfq1 $tmpfq2 $reads1tmp $reads2tmp $reads $reads1 $reads2 70 | 71 | echo "Done!" 72 | -------------------------------------------------------------------------------- /script/run_genotyping.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR=$( dirname "$0" ) 4 | index=$1 5 | transcripts=$2 6 | fq1=$3 7 | fq2=$4 8 | outPrefix=$5 9 | cpus=$6 10 | 11 | bammhc=${outPrefix}_MHC_Aligned.out.bam 12 | outmhc=${outPrefix}_MHC_quants 13 | persindex=${outPrefix}_persindex 14 | outtop5=${outPrefix}_top5_quants 15 | readsWin=${outPrefix}_readsWin.txt 16 | readsNoWin=${outPrefix}_readsNoWin.txt 17 | fqnoWin1=${outPrefix}_noWin_1.fq 18 | fqnoWin2=${outPrefix}_noWin_2.fq 19 | outNoWin=${outPrefix}_NoWin_quants 20 | 21 | if file $fq1 | grep -q gzip ; then 22 | readcommand=zcat 23 | else 24 | readcommand="-" 25 | fi 26 | 27 | # Remap to supplemented index 28 | echo "Remapping extracted reads to personalized MHC index..." 29 | 30 | STAR --runMode alignReads --runThreadN $cpus --genomeDir $index\ 31 | --readFilesIn $fq1 $fq2 --readFilesCommand $readcommand\ 32 | --outFilterMismatchNmax 1\ 33 | --outFilterMultimapScoreRange 0\ 34 | --outFilterMultimapNmax 3000\ 35 | --winAnchorMultimapNmax 6000\ 36 | --alignEndsType EndToEnd\ 37 | --outSAMprimaryFlag AllBestScore\ 38 | --outSAMtype BAM Unsorted\ 39 | --outFileNamePrefix ${outPrefix}_MHC_ 40 | 41 | # Quantify MHC expression 42 | echo "Genotyping HLA..." 43 | 44 | salmon quant -t $transcripts -l A -a $bammhc -o $outmhc -p $cpus 45 | 46 | #Extract up to top 5 HLA alleles 47 | mkdir -p $persindex 48 | 49 | Rscript $DIR/write_top5_fasta.R $outmhc/quant.sf $transcripts $persindex/hla.fa 50 | 51 | #Requantify expression of the top5 52 | mkdir -p $outtop5 53 | 54 | salmon index -t $persindex/hla.fa -i $persindex/salmon 55 | 56 | salmon quant -i $persindex/salmon -l A -1 $fq1 -2 $fq2 -o $outtop5\ 57 | -p $cpus --writeMappings=$outtop5/mappings.sam 58 | 59 | Rscript $DIR/write_winners.R $outtop5/quant.sf $outtop5/winners.txt 60 | 61 | #Remove reads from the winner alleles 62 | samtools view $outtop5/mappings.sam |\ 63 | grep -F -f $outtop5/winners.txt - |\ 64 | cut -f1 |\ 65 | sort |\ 66 | uniq > $readsWin 67 | 68 | samtools view $outtop5/mappings.sam |\ 69 | cut -f1 |\ 70 | awk 'FNR==NR {hash[$0]; next} !($0 in hash)' $readsWin - |\ 71 | sort |\ 72 | uniq > $readsNoWin 73 | 74 | if [[ $(head -n1 $fq1) =~ /1$ ]] && [[ $(head -n1 $fq2) =~ /2$ ]]; then 75 | 76 | awk '{ print $0 "/1" }' $readsNoWin > ${readsNoWin}1 77 | awk '{ print $0 "/2" }' $readsNoWin > ${readsNoWin}2 78 | 79 | seqtk subseq $fq1 ${readsNoWin}1 > $fqnoWin1 80 | seqtk subseq $fq2 ${readsNoWin}2 > $fqnoWin2 81 | 82 | else 83 | 84 | seqtk subseq $fq1 $readsNoWin > $fqnoWin1 85 | seqtk subseq $fq2 $readsNoWin > $fqnoWin2 86 | 87 | fi 88 | 89 | #Requantify to see if winner alleles explain all the expression or if 90 | # there is other relevant allele 91 | 92 | if [[ -s "$fqnoWin1" ]] && [[ -s "$fqnoWin2" ]]; then 93 | salmon quant -i $persindex/salmon -l A -1 $fqnoWin1 -2 $fqnoWin2\ 94 | -o $outNoWin -p $cpus 95 | fi 96 | 97 | #Final gentotypes 98 | Rscript $DIR/write_final_genotypes.R $transcripts $outtop5/quant.sf $outNoWin/quant.sf $outPrefix 99 | 100 | mkdir -p ${outPrefix}_log 101 | 102 | mv ${outPrefix}_MHC_Log* ${outPrefix}_log/ 103 | mv ${outPrefix}_MHC_quants/logs/salmon_quant.log ${outPrefix}_log/ 104 | 105 | rm -r ${outPrefix}_MHC* $persindex $outtop5 $readsWin\ 106 | ${readsNoWin}* $fqnoWin1 $fqnoWin2 $outNoWin 107 | 108 | echo "Done!" 109 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "" 3 | output: github_document 4 | --- 5 | 6 | ```{r setup, include=FALSE} 7 | knitr::opts_chunk$set(echo = TRUE, comment = "", engine.opts = list(bash = "-l")) 8 | ``` 9 | 10 | # HLApers 11 | 12 | ## License 13 | 14 | HLApers integrates software such as kallisto, Salmon and STAR. Before using it, please read the license notices [here](https://github.com/genevol-usp/HLApers/blob/Latest/license.txt) 15 | 16 | ## Getting started 17 | 18 | ### Install required software 19 | 20 | ##### 1. HLApers 21 | 22 | ``` 23 | git clone https://github.com/genevol-usp/HLApers.git 24 | ``` 25 | 26 | ##### 2. R v3.4+ 27 | 28 | ##### 3. In R, install the following packages 29 | 30 | - from Bioconductor: 31 | 32 | ``` 33 | if (!requireNamespace("BiocManager", quietly = TRUE)) 34 | install.packages("BiocManager") 35 | 36 | BiocManager::install("Biostrings") 37 | ``` 38 | 39 | - from GitHub: 40 | 41 | ``` 42 | if (!requireNamespace("devtools", quietly = TRUE)) 43 | install.packages("devtools") 44 | 45 | devtools::install_github("genevol-usp/hlaseqlib") 46 | ``` 47 | 48 | ##### 4. For STAR-Salmon-based pipeline, install: 49 | 50 | - STAR v2.5.3a+ 51 | 52 | - Salmon v0.8.2+ 53 | 54 | - samtools 1.3+ 55 | 56 | - seqtk 57 | 58 | 59 | ##### 5. For kallisto-based pipeline, install: 60 | 61 | - kallisto 62 | 63 | 64 | ### Download data: 65 | 66 | 67 | ##### 1. IMGT database 68 | 69 | ``` 70 | git clone https://github.com/ANHIG/IMGTHLA.git 71 | ``` 72 | 73 | ##### 2. Gencode: 74 | 75 | - transcripts fasta (e.g., Gencode v37 fasta) 76 | 77 | - corresponding annotations GTF (e.g., Gencode v37 GTF) 78 | 79 | 80 | 81 | ## HLApers usage 82 | 83 | Link the hlapers executable in your execution path, or change to the HLApers directory and execute the program with `./hlapers`. 84 | 85 | 86 | ### Getting help 87 | 88 | HLApers is composed of the following modes: 89 | 90 | ```{bash} 91 | hlapers --help 92 | ``` 93 | 94 | 95 | ### 1. Building a transcriptome supplemented with HLA sequences 96 | 97 | The first step is to use `hlapers prepare-ref` to build an index composed of 98 | Gencode transcripts, where we replace the HLA transcripts with IMGT HLA allele 99 | sequences. 100 | 101 | ```{bash} 102 | hlapers prepare-ref --help 103 | ``` 104 | 105 | Example: 106 | 107 | ``` 108 | hlapers prepare-ref -t gencode.v37.transcripts.fa.gz -a gencode.v37.annotation.gtf.gz -i IMGTHLA -o hladb 109 | ``` 110 | 111 | ### 2. Creating an index for read alignment 112 | 113 | ```{bash} 114 | hlapers index --help 115 | ``` 116 | 117 | Example: 118 | 119 | ``` 120 | hlapers index -t hladb/transcripts_MHC_HLAsupp.fa -p 4 -o index 121 | ``` 122 | 123 | ### 3. HLA genotyping 124 | 125 | Given a BAM file from a previous alignment to the genome, we first need to extract the reads mapped to the MHC region and those which are unmapped. For this, we can use the `bam2fq` utility. 126 | 127 | ```{bash} 128 | hlapers bam2fq --help 129 | ``` 130 | 131 | Example: 132 | 133 | ``` 134 | hlapers bam2fq -b HG00096.bam -m ./hladb/mhc_coords.txt -o HG00096 135 | ``` 136 | 137 | Then we run the genotyping module. 138 | 139 | ```{bash} 140 | hlapers genotype --help 141 | ``` 142 | 143 | Example: 144 | 145 | ``` 146 | hlapers genotype -i index/STARMHC -t ./hladb/transcripts_MHC_HLAsupp.fa -1 HG00096_mhc_1.fq -2 HG00096_mhc_2.fq -p 8 -o results/HG00096 147 | ``` 148 | 149 | 150 | ### 4. Quantify HLA expression 151 | 152 | In order to quantify expression, we use the `quant` module. If the original fastq files are available, we can proceed directly to the quantification step. If only a BAM file of a previous alignment to the genome is available, we first need to convert the BAM to fastq using the `bam2fq` utility. 153 | 154 | Example: 155 | 156 | ``` 157 | hlapers bam2fq -b HG00096.bam -o HG00096 158 | ``` 159 | 160 | Proceed to the quantification step. 161 | 162 | 163 | ```{bash} 164 | hlapers quant --help 165 | ``` 166 | 167 | Example: 168 | 169 | ``` 170 | hlapers quant -t ./hladb -g ./results/HG00096_genotypes.tsv -1 HG00096_1.fq.gz -2 HG00096_2.fq.gz -o ./results/HG00096 -p 8 171 | ``` 172 | 173 | -------------------------------------------------------------------------------- /script/make_index_files.R: -------------------------------------------------------------------------------- 1 | library(hlaseqlib) 2 | suppressPackageStartupMessages(library(Biostrings)) 3 | suppressPackageStartupMessages(library(dplyr)) 4 | suppressPackageStartupMessages(library(purrr)) 5 | suppressPackageStartupMessages(library(readr)) 6 | suppressPackageStartupMessages(library(tidyr)) 7 | 8 | # inputs 9 | opts <- commandArgs(TRUE) 10 | transcript_fasta <- opts[1] 11 | transcript_annot <- opts[2] 12 | imgt_db <- opts[3] 13 | out <- opts[4] 14 | 15 | # outputs 16 | out_noHLA <- file.path(out, "transcripts_noHLA.fa") 17 | out_supp <- file.path(out, "transcripts_HLAsupp.fa") 18 | out_MHCsupp <- file.path(out, "transcripts_MHC_HLAsupp.fa") 19 | out_coord <- file.path(out, "mhc_coords.txt") 20 | 21 | if (!file.exists(out)) dir.create(out) 22 | 23 | # HLA database 24 | imgt_loci <- c("A", "B", "C", "E", "F", "G", "H", 25 | "DMA", "DMB", "DOA", "DOB", 26 | "DPA1", "DPA2", "DPB1", "DPB2", 27 | "DQA1", "DQA2", "DQB1", 28 | "DRA", "DRB1", "DRB3", "DRB4", "DRB5") 29 | 30 | present_loci <- 31 | list.files(file.path(imgt_db, "alignments"), full.names = TRUE) %>% 32 | .[grep("nuc", .)] %>% 33 | .[!grepl("(Class)|(HFE)|(TAP)|(MIC)", .)] %>% 34 | map(. %>% 35 | readLines() %>% 36 | trimws() %>% 37 | .[grep("^[A-Z1-9]+\\*\\d+:", .)] %>% 38 | sub("^([^*]+).*$", "\\1", .) %>% 39 | unique()) %>% 40 | unlist() 41 | 42 | imgt_loci_inc <- imgt_loci[imgt_loci %in% present_loci] 43 | 44 | message(paste("HLApers found data for the following loci, which will be personalized:", 45 | paste(imgt_loci_inc, collapse = ", "))) 46 | 47 | hladb <- tibble(locus = imgt_loci_inc) %>% 48 | mutate(data = map(locus, ~hla_compile_index(., imgt_db))) %>% 49 | filter(!is.na(data)) %>% 50 | unnest(data) %>% 51 | filter(!grepl("N$", allele)) %>% 52 | select(-locus) %>% 53 | mutate(allele = paste0("IMGT_", allele)) %>% 54 | split(.$allele) %>% 55 | map_chr("cds") %>% 56 | DNAStringSet() 57 | 58 | hladb_genes <- unique(sub("^IMGT_([^\\*]+).+$", "HLA-\\1", names(hladb))) 59 | 60 | # Annotations 61 | message("Reading transcript annotations...") 62 | g_annot <- read_tsv(transcript_annot, comment = "#", col_names = FALSE, 63 | col_types = "ccciicccc", progress = FALSE) 64 | 65 | transcripts_db <- g_annot %>% 66 | filter(X3 == "transcript") %>% 67 | mutate(gene_name = sub("^.*gene_name \"([^\"]+)\";.*$", "\\1", X9), 68 | gene_id = sub("^.*gene_id \"([^\"]+)\";.*$", "\\1", X9), 69 | transcript_id = sub("^.*transcript_id \"([^\"]+)\";.*$", "\\1", X9)) %>% 70 | select(chr = X1, start = X4, end = X5, gene_name, gene_id, transcript_id) 71 | 72 | mhc_coords <- transcripts_db %>% 73 | filter(chr == "chr6" | chr == 6, gene_name %in% hladb_genes) %>% 74 | summarise(chr = unique(chr), start = min(start) -5e5, end = max(end) + 5e5) 75 | 76 | mhc_coords %>% 77 | mutate(out = paste0(chr, ":", start, "-", end)) %>% 78 | pull(out) %>% 79 | writeLines(out_coord) 80 | 81 | # Transcript sequences 82 | transcripts <- readDNAStringSet(transcript_fasta) %>% 83 | `names<-`(sub("^([^\\|]+).*$", "\\1", names(.))) 84 | 85 | transcripts_no_hla <- transcripts_db %>% 86 | filter(! gene_name %in% hladb_genes) %>% 87 | pull(transcript_id) %>% 88 | transcripts[.] 89 | 90 | transcripts_hlasupp <- c(transcripts_no_hla, hladb) 91 | 92 | mhc_transc_ids <- transcripts_db %>% 93 | filter(chr == "chr6" | chr == 6, start >= mhc_coords$start, end <= mhc_coords$end, 94 | transcript_id %in% names(transcripts_no_hla)) %>% 95 | pull(transcript_id) 96 | 97 | transcripts_mhc <- 98 | tibble(tx_id = names(transcripts_no_hla), 99 | cds = as.character(transcripts_no_hla)) %>% 100 | filter(tx_id %in% mhc_transc_ids) %>% 101 | split(.$tx_id) %>% 102 | map_chr("cds") %>% 103 | DNAStringSet() 104 | 105 | transcripts_mhc_supp <- c(transcripts_mhc, hladb) 106 | 107 | message("writing index files...") 108 | writeXStringSet(transcripts_hlasupp, out_supp) 109 | writeXStringSet(transcripts_no_hla, out_noHLA) 110 | writeXStringSet(transcripts_mhc_supp, out_MHCsupp) 111 | 112 | message("Done!") 113 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # HLApers 3 | 4 | ## License 5 | 6 | HLApers integrates software such as kallisto, Salmon and STAR. Before 7 | using it, please read the license notices 8 | [here](https://github.com/genevol-usp/HLApers/blob/Latest/license.txt) 9 | 10 | ## Getting started 11 | 12 | ### Install required software 13 | 14 | ##### 1\. HLApers 15 | 16 | git clone https://github.com/genevol-usp/HLApers.git 17 | 18 | ##### 2\. R v3.4+ 19 | 20 | ##### 3\. In R, install the following packages 21 | 22 | - from Bioconductor: 23 | 24 | 25 | 26 | if (!requireNamespace("BiocManager", quietly = TRUE)) 27 | install.packages("BiocManager") 28 | 29 | BiocManager::install("Biostrings") 30 | 31 | - from GitHub: 32 | 33 | 34 | 35 | if (!requireNamespace("devtools", quietly = TRUE)) 36 | install.packages("devtools") 37 | 38 | devtools::install_github("genevol-usp/hlaseqlib") 39 | 40 | ##### 4\. For STAR-Salmon-based pipeline, install: 41 | 42 | - STAR v2.5.3a+ 43 | 44 | - Salmon v0.8.2+ 45 | 46 | - samtools 1.3+ 47 | 48 | - seqtk 49 | 50 | ##### 5\. For kallisto-based pipeline, install: 51 | 52 | - kallisto 53 | 54 | ### Download data: 55 | 56 | ##### 1\. IMGT database 57 | 58 | git clone https://github.com/ANHIG/IMGTHLA.git 59 | 60 | ##### 2\. Gencode: 61 | 62 | - transcripts fasta (e.g., Gencode v37 fasta) 63 | 64 | - corresponding annotations GTF (e.g., Gencode v37 GTF) 65 | 66 | ## HLApers usage 67 | 68 | Link the hlapers executable in your execution path, or change to the 69 | HLApers directory and execute the program with `./hlapers`. 70 | 71 | ### Getting help 72 | 73 | HLApers is composed of the following modes: 74 | 75 | ``` bash 76 | hlapers --help 77 | ``` 78 | 79 | Usage: hlapers [modes] 80 | 81 | prepare-ref Prepare transcript fasta files. 82 | index Create index for read alignment. 83 | bam2fq Convert BAM to fastq. 84 | genotype Infer HLA genotypes. 85 | quant Quantify HLA expression. 86 | 87 | ### 1\. Building a transcriptome supplemented with HLA sequences 88 | 89 | The first step is to use `hlapers prepare-ref` to build an index 90 | composed of Gencode transcripts, where we replace the HLA transcripts 91 | with IMGT HLA allele sequences. 92 | 93 | ``` bash 94 | hlapers prepare-ref --help 95 | ``` 96 | 97 | Usage: hlapers prepare-ref [options] 98 | 99 | -t | --transcripts Fasta with Gencode transcript sequences. 100 | -a | --annotations GTF from Gencode for the same Genome version. 101 | -i | --imgt Path to IMGT directory. 102 | -o | --out Output directory. 103 | 104 | Example: 105 | 106 | hlapers prepare-ref -t gencode.v37.transcripts.fa.gz -a gencode.v37.annotation.gtf.gz -i IMGTHLA -o hladb 107 | 108 | ### 2\. Creating an index for read alignment 109 | 110 | ``` bash 111 | hlapers index --help 112 | ``` 113 | 114 | Usage: hlapers index [options] 115 | 116 | -t | --transcripts Fasta with Gencode transcript sequences. 117 | -p | --threads Number of threads. 118 | -o | --out Output directory. 119 | --kallisto Create index for kallisto pipeline instead of STARsalmon. 120 | 121 | Example: 122 | 123 | hlapers index -t hladb/transcripts_MHC_HLAsupp.fa -p 4 -o index 124 | 125 | ### 3\. HLA genotyping 126 | 127 | Given a BAM file from a previous alignment to the genome, we first need 128 | to extract the reads mapped to the MHC region and those which are 129 | unmapped. For this, we can use the `bam2fq` utility. 130 | 131 | ``` bash 132 | hlapers bam2fq --help 133 | ``` 134 | 135 | Usage: hlapers bam2fq [options] 136 | 137 | -m | --mhc-coords Genomic coordinates of the MHC region in chrN:start-end format if MHC fastq is desired. 138 | -b | --bam BAM file (if -m is specified, needs to be sorted by coordinate; otherwise use --sort). 139 | -o | --outprefix Output prefix name. 140 | --sort Sort input BAM file by coordinate (REQUIRED if -m is specified and BAM is not sorted by coordinate). 141 | 142 | Example: 143 | 144 | hlapers bam2fq -b HG00096.bam -m ./hladb/mhc_coords.txt -o HG00096 145 | 146 | Then we run the genotyping module. 147 | 148 | ``` bash 149 | hlapers genotype --help 150 | ``` 151 | 152 | Usage: hlapers genotype [options] 153 | 154 | -i | --index Index generated by 'hlapers index'. 155 | -t | --transcripts Fasta with Gencode transcripts sequences used for 'hlapers index'. 156 | -1 | --fq1 Fastq for READ 1. 157 | -2 | --fq2 Fastq for READ 2. 158 | -p | --threads Number of threads. 159 | -o | --outprefix Output prefix name. 160 | --kallisto Use kallisto for genotyping. 161 | 162 | Example: 163 | 164 | hlapers genotype -i index/STARMHC -t ./hladb/transcripts_MHC_HLAsupp.fa -1 HG00096_mhc_1.fq -2 HG00096_mhc_2.fq -p 8 -o results/HG00096 165 | 166 | ### 4\. Quantify HLA expression 167 | 168 | In order to quantify expression, we use the `quant` module. If the 169 | original fastq files are available, we can proceed directly to the 170 | quantification step. If only a BAM file of a previous alignment to the 171 | genome is available, we first need to convert the BAM to fastq using the 172 | `bam2fq` utility. 173 | 174 | Example: 175 | 176 | hlapers bam2fq -b HG00096.bam -o HG00096 177 | 178 | Proceed to the quantification step. 179 | 180 | ``` bash 181 | hlapers quant --help 182 | ``` 183 | 184 | Usage: hlapers quant [options] 185 | 186 | -t | --transcripts Reference transcripts directory. 187 | -g | --genotypes *_genotypes.tsv file generated by 'hlapers genotype'. 188 | -1 | --fq1 Fastq for READ 1. 189 | -2 | --fq2 Fastq for READ 2. 190 | -p | --threads Number of threads. 191 | -o | --out Output prefix name. 192 | --salmonreads Use Salmon lightweight alignment for quantification (NOT TESTED) 193 | --kallisto Use kallisto for quantification. 194 | 195 | Example: 196 | 197 | hlapers quant -t ./hladb -g ./results/HG00096_genotypes.tsv -1 HG00096_1.fq.gz -2 HG00096_2.fq.gz -o ./results/HG00096 -p 8 198 | -------------------------------------------------------------------------------- /hlapers: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # script directory 4 | if [[ "$OSTYPE" == "linux-gnu" ]]; then 5 | 6 | myexecpath="$( readlink -f "$0" )" 7 | 8 | elif [[ "$OSTYPE" == "darwin"* ]]; then 9 | 10 | myexecpath="$( readlink "$0" )" 11 | 12 | else 13 | myexecpath="$( pwd )" 14 | fi 15 | 16 | execdir="$( dirname "$myexecpath" )" 17 | scriptdir=$execdir/script 18 | 19 | # Usage functions 20 | usage() { 21 | echo "Usage: hlapers [modes]" 22 | echo "" 23 | printf "%-20s %s\n" "prepare-ref" "Prepare transcript fasta files." 24 | printf "%-20s %s\n" "index" "Create index for read alignment." 25 | printf "%-20s %s\n" "bam2fq" "Convert BAM to fastq." 26 | printf "%-20s %s\n" "genotype" "Infer HLA genotypes." 27 | printf "%-20s %s\n" "quant" "Quantify HLA expression." 28 | } 29 | 30 | usage_prepare() { 31 | echo "Usage: hlapers prepare-ref [options]" 32 | echo "" 33 | printf "%-20s %s\n" "-t | --transcripts" "Fasta with Gencode transcript sequences." 34 | printf "%-20s %s\n" "-a | --annotations" "GTF from Gencode for the same Genome version." 35 | printf "%-20s %s\n" "-i | --imgt" "Path to IMGT directory." 36 | printf "%-20s %s\n" "-o | --out" "Output directory." 37 | } 38 | 39 | usage_index() { 40 | echo "Usage: hlapers index [options]" 41 | echo "" 42 | printf "%-20s %s\n" "-t | --transcripts" "Fasta with Gencode transcript sequences." 43 | printf "%-20s %s\n" "-p | --threads" "Number of threads." 44 | printf "%-20s %s\n" "-o | --out" "Output directory." 45 | printf "%-20s %s\n" "--kallisto" "Create index for kallisto pipeline instead of STARsalmon." 46 | } 47 | 48 | usage_genotype() { 49 | echo "Usage: hlapers genotype [options]" 50 | echo "" 51 | printf "%-20s %s\n" "-i | --index" "Index generated by 'hlapers index'." 52 | printf "%-20s %s\n" "-t | --transcripts" "Fasta with Gencode transcripts sequences used for 'hlapers index'." 53 | printf "%-20s %s\n" "-1 | --fq1" "Fastq for READ 1." 54 | printf "%-20s %s\n" "-2 | --fq2" "Fastq for READ 2." 55 | printf "%-20s %s\n" "-p | --threads" "Number of threads." 56 | printf "%-20s %s\n" "-o | --outprefix" "Output prefix name." 57 | printf "%-20s %s\n" "--kallisto" "Use kallisto for genotyping." 58 | } 59 | 60 | usage_quant() { 61 | echo "Usage: hlapers quant [options]" 62 | echo "" 63 | printf "%-20s %s\n" "-t | --transcripts" "Reference transcripts directory." 64 | printf "%-20s %s\n" "-g | --genotypes" "*_genotypes.tsv file generated by 'hlapers genotype'." 65 | printf "%-20s %s\n" "-1 | --fq1" "Fastq for READ 1." 66 | printf "%-20s %s\n" "-2 | --fq2" "Fastq for READ 2." 67 | printf "%-20s %s\n" "-p | --threads" "Number of threads." 68 | printf "%-20s %s\n" "-o | --out" "Output prefix name." 69 | printf "%-20s %s\n" "--salmonreads" "Use Salmon lightweight alignment for quantification (NOT TESTED)" 70 | printf "%-20s %s\n" "--kallisto" "Use kallisto for quantification." 71 | } 72 | 73 | usage_bam2fq() { 74 | echo "Usage: hlapers bam2fq [options]" 75 | echo "" 76 | printf "%-20s %s\n" "-m | --mhc-coords" "Genomic coordinates of the MHC region in chrN:start-end format if MHC fastq is desired." 77 | printf "%-20s %s\n" "-b | --bam" "BAM file (if -m is specified, needs to be sorted by coordinate; otherwise use --sort)." 78 | printf "%-20s %s\n" "-o | --outprefix" "Output prefix name." 79 | printf "%-20s %s\n" "--sort" "Sort input BAM file by coordinate (REQUIRED if -m is specified and BAM is not sorted by coordinate)." 80 | } 81 | 82 | # Mode functions 83 | run_prepref () { 84 | 85 | if [[ "$#" -eq 0 ]]; then 86 | usage_prepare 87 | exit 1 88 | fi 89 | 90 | while [[ "$#" -gt 0 ]]; do 91 | case "$1" in 92 | -h|--help) 93 | usage_prepare 94 | exit 95 | ;; 96 | -t|--transcripts) 97 | transcripts="$2" 98 | shift 2 99 | ;; 100 | -a|--annotations) 101 | annotations="$2" 102 | shift 2 103 | ;; 104 | -i|--imgt) 105 | imgt="$2" 106 | shift 2 107 | ;; 108 | -o|-out) 109 | out="$2" 110 | shift 2 111 | ;; 112 | *) 113 | echo "ERROR: unknown parameter $1" 114 | usage_prepare 115 | exit 1 116 | ;; 117 | esac 118 | done 119 | 120 | Rscript $scriptdir/make_index_files.R $transcripts $annotations $imgt $out 121 | } 122 | 123 | 124 | run_index() { 125 | 126 | if [[ "$#" -eq 0 ]]; then 127 | usage_index 128 | exit 1 129 | fi 130 | 131 | while [[ "$#" -gt 0 ]]; do 132 | case "$1" in 133 | -h|--help) 134 | usage_index 135 | exit 136 | ;; 137 | -t|--transcripts) 138 | transcripts="$2" 139 | shift 2 140 | ;; 141 | -p|--threads) 142 | threads="$2" 143 | shift 2 144 | ;; 145 | -o|--out) 146 | out="$2" 147 | shift 2 148 | ;; 149 | --kallisto) 150 | k=1 151 | shift 152 | ;; 153 | *) 154 | echo "ERROR: unknown parameter $1" 155 | usage_index 156 | exit 1 157 | ;; 158 | esac 159 | done 160 | 161 | kallisto=${k-0} 162 | 163 | if [[ "$kallisto" -eq 0 ]]; then 164 | 165 | $scriptdir/run_starindex.sh $transcripts $out $threads 166 | 167 | elif [[ "$kallisto" -eq 1 ]]; then 168 | 169 | $scriptdir/run_kallistoindex.sh $transcripts $out 170 | 171 | fi 172 | } 173 | 174 | run_genotype() { 175 | 176 | if [[ "$#" -eq 0 ]]; then 177 | usage_genotype 178 | exit 1 179 | fi 180 | 181 | while [[ "$#" -gt 0 ]]; do 182 | case "$1" in 183 | -h|--help) 184 | usage_genotype 185 | exit 186 | ;; 187 | -i|--index) 188 | index="$2" 189 | shift 2 190 | ;; 191 | -t|--transcripts) 192 | transcripts="$2" 193 | shift 2 194 | ;; 195 | -1|--fq1) 196 | fq1="$2" 197 | shift 2 198 | ;; 199 | -2|--fq2) 200 | fq2="$2" 201 | shift 2 202 | ;; 203 | -p|--threads) 204 | threads="$2" 205 | shift 2 206 | ;; 207 | -o|--out) 208 | outprefix="$2" 209 | shift 2 210 | ;; 211 | --kallisto) 212 | k=1 213 | shift 214 | ;; 215 | *) 216 | echo "ERROR: unknown parameter $1" 217 | usage_genotype 218 | exit 1 219 | ;; 220 | esac 221 | done 222 | 223 | kallisto=${k-0} 224 | 225 | if [[ "$kallisto" -eq 0 ]]; then 226 | 227 | $scriptdir/run_genotyping.sh $index $transcripts $fq1 $fq2 $outprefix $threads 228 | 229 | elif [[ "$kallisto" -eq 1 ]]; then 230 | 231 | $scriptdir/run_genotypingkallisto.sh $index $fq1 $fq2 $outprefix $threads 232 | 233 | fi 234 | 235 | } 236 | 237 | run_quant() { 238 | if [[ "$#" -eq 0 ]]; then 239 | usage_quant 240 | exit 1 241 | fi 242 | 243 | while [[ "$#" -gt 0 ]]; do 244 | case "$1" in 245 | -h|--help) 246 | usage_quant 247 | exit 248 | ;; 249 | -t|--transcripts) 250 | transcripts="$2" 251 | shift 2 252 | ;; 253 | -g|--genotypes) 254 | genos="$2" 255 | shift 2 256 | ;; 257 | -1|--fq1) 258 | fq1="$2" 259 | shift 2 260 | ;; 261 | -2|--fq2) 262 | fq2="$2" 263 | shift 2 264 | ;; 265 | -p|--threads) 266 | threads="$2" 267 | shift 2 268 | ;; 269 | -o|--outprefix) 270 | outprefix="$2" 271 | shift 2 272 | ;; 273 | --kallisto) 274 | k=1 275 | shift 276 | ;; 277 | --salmonreads) 278 | s=1 279 | shift 280 | ;; 281 | *) 282 | echo "ERROR: unknown parameter $1" 283 | usage_quant 284 | exit 1 285 | ;; 286 | esac 287 | done 288 | 289 | kallisto=${k-0} 290 | salmonreads=${s-0} 291 | 292 | if [[ "$kallisto" -eq 0 ]] && [[ "$salmonreads" -eq 0 ]]; then 293 | 294 | $scriptdir/run_quant.sh $transcripts $genos $fq1 $fq2 $outprefix $threads 295 | 296 | elif [[ "$kallisto" -eq 1 ]] && [[ "$salmonreads" -eq 0 ]]; then 297 | 298 | $scriptdir/run_quantkallisto.sh $transcripts $genos $fq1 $fq2 $outprefix $threads 299 | 300 | elif [[ "$kallisto" -eq 0 ]] && [[ "$salmonreads" -eq 1 ]]; then 301 | 302 | $scriptdir/run_quantSalmonReads.sh $transcripts $genos $fq1 $fq2 $outprefix $threads 303 | 304 | fi 305 | } 306 | 307 | run_bam2fq() { 308 | if [[ "$#" -eq 0 ]]; then 309 | usage_bam2fq 310 | exit 1 311 | fi 312 | 313 | while [[ "$#" -gt 0 ]]; do 314 | case "$1" in 315 | -h|--help) 316 | usage_bam2fq 317 | exit 318 | ;; 319 | -b|--bam) 320 | bam="$2" 321 | shift 2 322 | ;; 323 | -m|--mhc) 324 | if [[ -n "$2" ]] && [[ "$2" != -* ]]; then 325 | mhc="$2" 326 | shift 2 327 | else 328 | echo "ERROR: missing MHC coordinates." 329 | exit 1 330 | fi 331 | ;; 332 | -o|--out) 333 | out="$2" 334 | shift 2 335 | ;; 336 | --sort) 337 | s=1 338 | shift 339 | ;; 340 | *) 341 | echo "ERROR: unknown parameter $1" 342 | usage_bam2fq 343 | exit 1 344 | ;; 345 | esac 346 | done 347 | 348 | sort=${s-0} 349 | 350 | if [[ -n "$mhc" ]]; then 351 | $scriptdir/run_bam2fq_mhc.sh $bam $mhc $out $sort 352 | else 353 | $scriptdir/run_bam2fq.sh $bam $out $sort 354 | fi 355 | 356 | } 357 | 358 | # main 359 | if [[ "$#" -eq 0 ]]; then 360 | usage 361 | exit 1 362 | fi 363 | 364 | while [[ $# -gt 0 ]] 365 | do 366 | case "$1" in 367 | -h|--help) 368 | usage 369 | exit 370 | ;; 371 | 372 | prepare-ref) 373 | shift 374 | run_prepref "$@" 375 | break 376 | ;; 377 | 378 | index) 379 | shift 380 | run_index "$@" 381 | break 382 | ;; 383 | 384 | genotype) 385 | shift 386 | run_genotype "$@" 387 | break 388 | ;; 389 | 390 | quant) 391 | shift 392 | run_quant "$@" 393 | break 394 | ;; 395 | bam2fq) 396 | shift 397 | run_bam2fq "$@" 398 | break 399 | ;; 400 | *) 401 | echo "ERROR: unknown parameter $1" 402 | usage 403 | exit 1 404 | ;; 405 | esac 406 | done 407 | -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | ### kallisto ################################################################## 2 | 3 | BSD 2-Clause License 4 | 5 | Copyright (c) 2017, Nicolas Bray, Harold Pimentel, Páll Melsted and Lior 6 | Pachter All rights reserved. 7 | 8 | Redistribution and use in source and binary forms, with or without 9 | modification, are permitted provided that the following conditions are met: 10 | 11 | * Redistributions of source code must retain the above copyright notice, this 12 | list of conditions and the following disclaimer. 13 | 14 | * Redistributions in binary form must reproduce the above copyright notice, 15 | this list of conditions and the following disclaimer in the documentation 16 | and/or other materials provided with the distribution. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | 30 | ### Salmon ################################################################### 31 | 32 | GNU GENERAL PUBLIC LICENSE 33 | Version 3, 29 June 2007 34 | 35 | Copyright (C) 2007 Free Software Foundation, Inc. 36 | Everyone is permitted to copy and distribute verbatim copies 37 | of this license document, but changing it is not allowed. 38 | 39 | Preamble 40 | 41 | The GNU General Public License is a free, copyleft license for 42 | software and other kinds of works. 43 | 44 | The licenses for most software and other practical works are designed 45 | to take away your freedom to share and change the works. By contrast, 46 | the GNU General Public License is intended to guarantee your freedom to 47 | share and change all versions of a program--to make sure it remains free 48 | software for all its users. We, the Free Software Foundation, use the 49 | GNU General Public License for most of our software; it applies also to 50 | any other work released this way by its authors. You can apply it to 51 | your programs, too. 52 | 53 | When we speak of free software, we are referring to freedom, not 54 | price. Our General Public Licenses are designed to make sure that you 55 | have the freedom to distribute copies of free software (and charge for 56 | them if you wish), that you receive source code or can get it if you 57 | want it, that you can change the software or use pieces of it in new 58 | free programs, and that you know you can do these things. 59 | 60 | To protect your rights, we need to prevent others from denying you 61 | these rights or asking you to surrender the rights. Therefore, you have 62 | certain responsibilities if you distribute copies of the software, or if 63 | you modify it: responsibilities to respect the freedom of others. 64 | 65 | For example, if you distribute copies of such a program, whether 66 | gratis or for a fee, you must pass on to the recipients the same 67 | freedoms that you received. You must make sure that they, too, receive 68 | or can get the source code. And you must show them these terms so they 69 | know their rights. 70 | 71 | Developers that use the GNU GPL protect your rights with two steps: 72 | (1) assert copyright on the software, and (2) offer you this License 73 | giving you legal permission to copy, distribute and/or modify it. 74 | 75 | For the developers' and authors' protection, the GPL clearly explains 76 | that there is no warranty for this free software. For both users' and 77 | authors' sake, the GPL requires that modified versions be marked as 78 | changed, so that their problems will not be attributed erroneously to 79 | authors of previous versions. 80 | 81 | Some devices are designed to deny users access to install or run 82 | modified versions of the software inside them, although the manufacturer 83 | can do so. This is fundamentally incompatible with the aim of 84 | protecting users' freedom to change the software. The systematic 85 | pattern of such abuse occurs in the area of products for individuals to 86 | use, which is precisely where it is most unacceptable. Therefore, we 87 | have designed this version of the GPL to prohibit the practice for those 88 | products. If such problems arise substantially in other domains, we 89 | stand ready to extend this provision to those domains in future versions 90 | of the GPL, as needed to protect the freedom of users. 91 | 92 | Finally, every program is threatened constantly by software patents. 93 | States should not allow patents to restrict development and use of 94 | software on general-purpose computers, but in those that do, we wish to 95 | avoid the special danger that patents applied to a free program could 96 | make it effectively proprietary. To prevent this, the GPL assures that 97 | patents cannot be used to render the program non-free. 98 | 99 | The precise terms and conditions for copying, distribution and 100 | modification follow. 101 | 102 | TERMS AND CONDITIONS 103 | 104 | 0. Definitions. 105 | 106 | "This License" refers to version 3 of the GNU General Public License. 107 | 108 | "Copyright" also means copyright-like laws that apply to other kinds of 109 | works, such as semiconductor masks. 110 | 111 | "The Program" refers to any copyrightable work licensed under this 112 | License. Each licensee is addressed as "you". "Licensees" and 113 | "recipients" may be individuals or organizations. 114 | 115 | To "modify" a work means to copy from or adapt all or part of the work 116 | in a fashion requiring copyright permission, other than the making of an 117 | exact copy. The resulting work is called a "modified version" of the 118 | earlier work or a work "based on" the earlier work. 119 | 120 | A "covered work" means either the unmodified Program or a work based 121 | on the Program. 122 | 123 | To "propagate" a work means to do anything with it that, without 124 | permission, would make you directly or secondarily liable for 125 | infringement under applicable copyright law, except executing it on a 126 | computer or modifying a private copy. Propagation includes copying, 127 | distribution (with or without modification), making available to the 128 | public, and in some countries other activities as well. 129 | 130 | To "convey" a work means any kind of propagation that enables other 131 | parties to make or receive copies. Mere interaction with a user through 132 | a computer network, with no transfer of a copy, is not conveying. 133 | 134 | An interactive user interface displays "Appropriate Legal Notices" 135 | to the extent that it includes a convenient and prominently visible 136 | feature that (1) displays an appropriate copyright notice, and (2) 137 | tells the user that there is no warranty for the work (except to the 138 | extent that warranties are provided), that licensees may convey the 139 | work under this License, and how to view a copy of this License. If 140 | the interface presents a list of user commands or options, such as a 141 | menu, a prominent item in the list meets this criterion. 142 | 143 | 1. Source Code. 144 | 145 | The "source code" for a work means the preferred form of the work 146 | for making modifications to it. "Object code" means any non-source 147 | form of a work. 148 | 149 | A "Standard Interface" means an interface that either is an official 150 | standard defined by a recognized standards body, or, in the case of 151 | interfaces specified for a particular programming language, one that 152 | is widely used among developers working in that language. 153 | 154 | The "System Libraries" of an executable work include anything, other 155 | than the work as a whole, that (a) is included in the normal form of 156 | packaging a Major Component, but which is not part of that Major 157 | Component, and (b) serves only to enable use of the work with that 158 | Major Component, or to implement a Standard Interface for which an 159 | implementation is available to the public in source code form. A 160 | "Major Component", in this context, means a major essential component 161 | (kernel, window system, and so on) of the specific operating system 162 | (if any) on which the executable work runs, or a compiler used to 163 | produce the work, or an object code interpreter used to run it. 164 | 165 | The "Corresponding Source" for a work in object code form means all 166 | the source code needed to generate, install, and (for an executable 167 | work) run the object code and to modify the work, including scripts to 168 | control those activities. However, it does not include the work's 169 | System Libraries, or general-purpose tools or generally available free 170 | programs which are used unmodified in performing those activities but 171 | which are not part of the work. For example, Corresponding Source 172 | includes interface definition files associated with source files for 173 | the work, and the source code for shared libraries and dynamically 174 | linked subprograms that the work is specifically designed to require, 175 | such as by intimate data communication or control flow between those 176 | subprograms and other parts of the work. 177 | 178 | The Corresponding Source need not include anything that users 179 | can regenerate automatically from other parts of the Corresponding 180 | Source. 181 | 182 | The Corresponding Source for a work in source code form is that 183 | same work. 184 | 185 | 2. Basic Permissions. 186 | 187 | All rights granted under this License are granted for the term of 188 | copyright on the Program, and are irrevocable provided the stated 189 | conditions are met. This License explicitly affirms your unlimited 190 | permission to run the unmodified Program. The output from running a 191 | covered work is covered by this License only if the output, given its 192 | content, constitutes a covered work. This License acknowledges your 193 | rights of fair use or other equivalent, as provided by copyright law. 194 | 195 | You may make, run and propagate covered works that you do not 196 | convey, without conditions so long as your license otherwise remains 197 | in force. You may convey covered works to others for the sole purpose 198 | of having them make modifications exclusively for you, or provide you 199 | with facilities for running those works, provided that you comply with 200 | the terms of this License in conveying all material for which you do 201 | not control copyright. Those thus making or running the covered works 202 | for you must do so exclusively on your behalf, under your direction 203 | and control, on terms that prohibit them from making any copies of 204 | your copyrighted material outside their relationship with you. 205 | 206 | Conveying under any other circumstances is permitted solely under 207 | the conditions stated below. Sublicensing is not allowed; section 10 208 | makes it unnecessary. 209 | 210 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 211 | 212 | No covered work shall be deemed part of an effective technological 213 | measure under any applicable law fulfilling obligations under article 214 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 215 | similar laws prohibiting or restricting circumvention of such 216 | measures. 217 | 218 | When you convey a covered work, you waive any legal power to forbid 219 | circumvention of technological measures to the extent such circumvention 220 | is effected by exercising rights under this License with respect to 221 | the covered work, and you disclaim any intention to limit operation or 222 | modification of the work as a means of enforcing, against the work's 223 | users, your or third parties' legal rights to forbid circumvention of 224 | technological measures. 225 | 226 | 4. Conveying Verbatim Copies. 227 | 228 | You may convey verbatim copies of the Program's source code as you 229 | receive it, in any medium, provided that you conspicuously and 230 | appropriately publish on each copy an appropriate copyright notice; 231 | keep intact all notices stating that this License and any 232 | non-permissive terms added in accord with section 7 apply to the code; 233 | keep intact all notices of the absence of any warranty; and give all 234 | recipients a copy of this License along with the Program. 235 | 236 | You may charge any price or no price for each copy that you convey, 237 | and you may offer support or warranty protection for a fee. 238 | 239 | 5. Conveying Modified Source Versions. 240 | 241 | You may convey a work based on the Program, or the modifications to 242 | produce it from the Program, in the form of source code under the 243 | terms of section 4, provided that you also meet all of these conditions: 244 | 245 | a) The work must carry prominent notices stating that you modified 246 | it, and giving a relevant date. 247 | 248 | b) The work must carry prominent notices stating that it is 249 | released under this License and any conditions added under section 250 | 7. This requirement modifies the requirement in section 4 to 251 | "keep intact all notices". 252 | 253 | c) You must license the entire work, as a whole, under this 254 | License to anyone who comes into possession of a copy. This 255 | License will therefore apply, along with any applicable section 7 256 | additional terms, to the whole of the work, and all its parts, 257 | regardless of how they are packaged. This License gives no 258 | permission to license the work in any other way, but it does not 259 | invalidate such permission if you have separately received it. 260 | 261 | d) If the work has interactive user interfaces, each must display 262 | Appropriate Legal Notices; however, if the Program has interactive 263 | interfaces that do not display Appropriate Legal Notices, your 264 | work need not make them do so. 265 | 266 | A compilation of a covered work with other separate and independent 267 | works, which are not by their nature extensions of the covered work, 268 | and which are not combined with it such as to form a larger program, 269 | in or on a volume of a storage or distribution medium, is called an 270 | "aggregate" if the compilation and its resulting copyright are not 271 | used to limit the access or legal rights of the compilation's users 272 | beyond what the individual works permit. Inclusion of a covered work 273 | in an aggregate does not cause this License to apply to the other 274 | parts of the aggregate. 275 | 276 | 6. Conveying Non-Source Forms. 277 | 278 | You may convey a covered work in object code form under the terms 279 | of sections 4 and 5, provided that you also convey the 280 | machine-readable Corresponding Source under the terms of this License, 281 | in one of these ways: 282 | 283 | a) Convey the object code in, or embodied in, a physical product 284 | (including a physical distribution medium), accompanied by the 285 | Corresponding Source fixed on a durable physical medium 286 | customarily used for software interchange. 287 | 288 | b) Convey the object code in, or embodied in, a physical product 289 | (including a physical distribution medium), accompanied by a 290 | written offer, valid for at least three years and valid for as 291 | long as you offer spare parts or customer support for that product 292 | model, to give anyone who possesses the object code either (1) a 293 | copy of the Corresponding Source for all the software in the 294 | product that is covered by this License, on a durable physical 295 | medium customarily used for software interchange, for a price no 296 | more than your reasonable cost of physically performing this 297 | conveying of source, or (2) access to copy the 298 | Corresponding Source from a network server at no charge. 299 | 300 | c) Convey individual copies of the object code with a copy of the 301 | written offer to provide the Corresponding Source. This 302 | alternative is allowed only occasionally and noncommercially, and 303 | only if you received the object code with such an offer, in accord 304 | with subsection 6b. 305 | 306 | d) Convey the object code by offering access from a designated 307 | place (gratis or for a charge), and offer equivalent access to the 308 | Corresponding Source in the same way through the same place at no 309 | further charge. You need not require recipients to copy the 310 | Corresponding Source along with the object code. If the place to 311 | copy the object code is a network server, the Corresponding Source 312 | may be on a different server (operated by you or a third party) 313 | that supports equivalent copying facilities, provided you maintain 314 | clear directions next to the object code saying where to find the 315 | Corresponding Source. Regardless of what server hosts the 316 | Corresponding Source, you remain obligated to ensure that it is 317 | available for as long as needed to satisfy these requirements. 318 | 319 | e) Convey the object code using peer-to-peer transmission, provided 320 | you inform other peers where the object code and Corresponding 321 | Source of the work are being offered to the general public at no 322 | charge under subsection 6d. 323 | 324 | A separable portion of the object code, whose source code is excluded 325 | from the Corresponding Source as a System Library, need not be 326 | included in conveying the object code work. 327 | 328 | A "User Product" is either (1) a "consumer product", which means any 329 | tangible personal property which is normally used for personal, family, 330 | or household purposes, or (2) anything designed or sold for incorporation 331 | into a dwelling. In determining whether a product is a consumer product, 332 | doubtful cases shall be resolved in favor of coverage. For a particular 333 | product received by a particular user, "normally used" refers to a 334 | typical or common use of that class of product, regardless of the status 335 | of the particular user or of the way in which the particular user 336 | actually uses, or expects or is expected to use, the product. A product 337 | is a consumer product regardless of whether the product has substantial 338 | commercial, industrial or non-consumer uses, unless such uses represent 339 | the only significant mode of use of the product. 340 | 341 | "Installation Information" for a User Product means any methods, 342 | procedures, authorization keys, or other information required to install 343 | and execute modified versions of a covered work in that User Product from 344 | a modified version of its Corresponding Source. The information must 345 | suffice to ensure that the continued functioning of the modified object 346 | code is in no case prevented or interfered with solely because 347 | modification has been made. 348 | 349 | If you convey an object code work under this section in, or with, or 350 | specifically for use in, a User Product, and the conveying occurs as 351 | part of a transaction in which the right of possession and use of the 352 | User Product is transferred to the recipient in perpetuity or for a 353 | fixed term (regardless of how the transaction is characterized), the 354 | Corresponding Source conveyed under this section must be accompanied 355 | by the Installation Information. But this requirement does not apply 356 | if neither you nor any third party retains the ability to install 357 | modified object code on the User Product (for example, the work has 358 | been installed in ROM). 359 | 360 | The requirement to provide Installation Information does not include a 361 | requirement to continue to provide support service, warranty, or updates 362 | for a work that has been modified or installed by the recipient, or for 363 | the User Product in which it has been modified or installed. Access to a 364 | network may be denied when the modification itself materially and 365 | adversely affects the operation of the network or violates the rules and 366 | protocols for communication across the network. 367 | 368 | Corresponding Source conveyed, and Installation Information provided, 369 | in accord with this section must be in a format that is publicly 370 | documented (and with an implementation available to the public in 371 | source code form), and must require no special password or key for 372 | unpacking, reading or copying. 373 | 374 | 7. Additional Terms. 375 | 376 | "Additional permissions" are terms that supplement the terms of this 377 | License by making exceptions from one or more of its conditions. 378 | Additional permissions that are applicable to the entire Program shall 379 | be treated as though they were included in this License, to the extent 380 | that they are valid under applicable law. If additional permissions 381 | apply only to part of the Program, that part may be used separately 382 | under those permissions, but the entire Program remains governed by 383 | this License without regard to the additional permissions. 384 | 385 | When you convey a copy of a covered work, you may at your option 386 | remove any additional permissions from that copy, or from any part of 387 | it. (Additional permissions may be written to require their own 388 | removal in certain cases when you modify the work.) You may place 389 | additional permissions on material, added by you to a covered work, 390 | for which you have or can give appropriate copyright permission. 391 | 392 | Notwithstanding any other provision of this License, for material you 393 | add to a covered work, you may (if authorized by the copyright holders of 394 | that material) supplement the terms of this License with terms: 395 | 396 | a) Disclaiming warranty or limiting liability differently from the 397 | terms of sections 15 and 16 of this License; or 398 | 399 | b) Requiring preservation of specified reasonable legal notices or 400 | author attributions in that material or in the Appropriate Legal 401 | Notices displayed by works containing it; or 402 | 403 | c) Prohibiting misrepresentation of the origin of that material, or 404 | requiring that modified versions of such material be marked in 405 | reasonable ways as different from the original version; or 406 | 407 | d) Limiting the use for publicity purposes of names of licensors or 408 | authors of the material; or 409 | 410 | e) Declining to grant rights under trademark law for use of some 411 | trade names, trademarks, or service marks; or 412 | 413 | f) Requiring indemnification of licensors and authors of that 414 | material by anyone who conveys the material (or modified versions of 415 | it) with contractual assumptions of liability to the recipient, for 416 | any liability that these contractual assumptions directly impose on 417 | those licensors and authors. 418 | 419 | All other non-permissive additional terms are considered "further 420 | restrictions" within the meaning of section 10. If the Program as you 421 | received it, or any part of it, contains a notice stating that it is 422 | governed by this License along with a term that is a further 423 | restriction, you may remove that term. If a license document contains 424 | a further restriction but permits relicensing or conveying under this 425 | License, you may add to a covered work material governed by the terms 426 | of that license document, provided that the further restriction does 427 | not survive such relicensing or conveying. 428 | 429 | If you add terms to a covered work in accord with this section, you 430 | must place, in the relevant source files, a statement of the 431 | additional terms that apply to those files, or a notice indicating 432 | where to find the applicable terms. 433 | 434 | Additional terms, permissive or non-permissive, may be stated in the 435 | form of a separately written license, or stated as exceptions; 436 | the above requirements apply either way. 437 | 438 | 8. Termination. 439 | 440 | You may not propagate or modify a covered work except as expressly 441 | provided under this License. Any attempt otherwise to propagate or 442 | modify it is void, and will automatically terminate your rights under 443 | this License (including any patent licenses granted under the third 444 | paragraph of section 11). 445 | 446 | However, if you cease all violation of this License, then your 447 | license from a particular copyright holder is reinstated (a) 448 | provisionally, unless and until the copyright holder explicitly and 449 | finally terminates your license, and (b) permanently, if the copyright 450 | holder fails to notify you of the violation by some reasonable means 451 | prior to 60 days after the cessation. 452 | 453 | Moreover, your license from a particular copyright holder is 454 | reinstated permanently if the copyright holder notifies you of the 455 | violation by some reasonable means, this is the first time you have 456 | received notice of violation of this License (for any work) from that 457 | copyright holder, and you cure the violation prior to 30 days after 458 | your receipt of the notice. 459 | 460 | Termination of your rights under this section does not terminate the 461 | licenses of parties who have received copies or rights from you under 462 | this License. If your rights have been terminated and not permanently 463 | reinstated, you do not qualify to receive new licenses for the same 464 | material under section 10. 465 | 466 | 9. Acceptance Not Required for Having Copies. 467 | 468 | You are not required to accept this License in order to receive or 469 | run a copy of the Program. Ancillary propagation of a covered work 470 | occurring solely as a consequence of using peer-to-peer transmission 471 | to receive a copy likewise does not require acceptance. However, 472 | nothing other than this License grants you permission to propagate or 473 | modify any covered work. These actions infringe copyright if you do 474 | not accept this License. Therefore, by modifying or propagating a 475 | covered work, you indicate your acceptance of this License to do so. 476 | 477 | 10. Automatic Licensing of Downstream Recipients. 478 | 479 | Each time you convey a covered work, the recipient automatically 480 | receives a license from the original licensors, to run, modify and 481 | propagate that work, subject to this License. You are not responsible 482 | for enforcing compliance by third parties with this License. 483 | 484 | An "entity transaction" is a transaction transferring control of an 485 | organization, or substantially all assets of one, or subdividing an 486 | organization, or merging organizations. If propagation of a covered 487 | work results from an entity transaction, each party to that 488 | transaction who receives a copy of the work also receives whatever 489 | licenses to the work the party's predecessor in interest had or could 490 | give under the previous paragraph, plus a right to possession of the 491 | Corresponding Source of the work from the predecessor in interest, if 492 | the predecessor has it or can get it with reasonable efforts. 493 | 494 | You may not impose any further restrictions on the exercise of the 495 | rights granted or affirmed under this License. For example, you may 496 | not impose a license fee, royalty, or other charge for exercise of 497 | rights granted under this License, and you may not initiate litigation 498 | (including a cross-claim or counterclaim in a lawsuit) alleging that 499 | any patent claim is infringed by making, using, selling, offering for 500 | sale, or importing the Program or any portion of it. 501 | 502 | 11. Patents. 503 | 504 | A "contributor" is a copyright holder who authorizes use under this 505 | License of the Program or a work on which the Program is based. The 506 | work thus licensed is called the contributor's "contributor version". 507 | 508 | A contributor's "essential patent claims" are all patent claims 509 | owned or controlled by the contributor, whether already acquired or 510 | hereafter acquired, that would be infringed by some manner, permitted 511 | by this License, of making, using, or selling its contributor version, 512 | but do not include claims that would be infringed only as a 513 | consequence of further modification of the contributor version. For 514 | purposes of this definition, "control" includes the right to grant 515 | patent sublicenses in a manner consistent with the requirements of 516 | this License. 517 | 518 | Each contributor grants you a non-exclusive, worldwide, royalty-free 519 | patent license under the contributor's essential patent claims, to 520 | make, use, sell, offer for sale, import and otherwise run, modify and 521 | propagate the contents of its contributor version. 522 | 523 | In the following three paragraphs, a "patent license" is any express 524 | agreement or commitment, however denominated, not to enforce a patent 525 | (such as an express permission to practice a patent or covenant not to 526 | sue for patent infringement). To "grant" such a patent license to a 527 | party means to make such an agreement or commitment not to enforce a 528 | patent against the party. 529 | 530 | If you convey a covered work, knowingly relying on a patent license, 531 | and the Corresponding Source of the work is not available for anyone 532 | to copy, free of charge and under the terms of this License, through a 533 | publicly available network server or other readily accessible means, 534 | then you must either (1) cause the Corresponding Source to be so 535 | available, or (2) arrange to deprive yourself of the benefit of the 536 | patent license for this particular work, or (3) arrange, in a manner 537 | consistent with the requirements of this License, to extend the patent 538 | license to downstream recipients. "Knowingly relying" means you have 539 | actual knowledge that, but for the patent license, your conveying the 540 | covered work in a country, or your recipient's use of the covered work 541 | in a country, would infringe one or more identifiable patents in that 542 | country that you have reason to believe are valid. 543 | 544 | If, pursuant to or in connection with a single transaction or 545 | arrangement, you convey, or propagate by procuring conveyance of, a 546 | covered work, and grant a patent license to some of the parties 547 | receiving the covered work authorizing them to use, propagate, modify 548 | or convey a specific copy of the covered work, then the patent license 549 | you grant is automatically extended to all recipients of the covered 550 | work and works based on it. 551 | 552 | A patent license is "discriminatory" if it does not include within 553 | the scope of its coverage, prohibits the exercise of, or is 554 | conditioned on the non-exercise of one or more of the rights that are 555 | specifically granted under this License. You may not convey a covered 556 | work if you are a party to an arrangement with a third party that is 557 | in the business of distributing software, under which you make payment 558 | to the third party based on the extent of your activity of conveying 559 | the work, and under which the third party grants, to any of the 560 | parties who would receive the covered work from you, a discriminatory 561 | patent license (a) in connection with copies of the covered work 562 | conveyed by you (or copies made from those copies), or (b) primarily 563 | for and in connection with specific products or compilations that 564 | contain the covered work, unless you entered into that arrangement, 565 | or that patent license was granted, prior to 28 March 2007. 566 | 567 | Nothing in this License shall be construed as excluding or limiting 568 | any implied license or other defenses to infringement that may 569 | otherwise be available to you under applicable patent law. 570 | 571 | 12. No Surrender of Others' Freedom. 572 | 573 | If conditions are imposed on you (whether by court order, agreement or 574 | otherwise) that contradict the conditions of this License, they do not 575 | excuse you from the conditions of this License. If you cannot convey a 576 | covered work so as to satisfy simultaneously your obligations under this 577 | License and any other pertinent obligations, then as a consequence you may 578 | not convey it at all. For example, if you agree to terms that obligate you 579 | to collect a royalty for further conveying from those to whom you convey 580 | the Program, the only way you could satisfy both those terms and this 581 | License would be to refrain entirely from conveying the Program. 582 | 583 | 13. Use with the GNU Affero General Public License. 584 | 585 | Notwithstanding any other provision of this License, you have 586 | permission to link or combine any covered work with a work licensed 587 | under version 3 of the GNU Affero General Public License into a single 588 | combined work, and to convey the resulting work. The terms of this 589 | License will continue to apply to the part which is the covered work, 590 | but the special requirements of the GNU Affero General Public License, 591 | section 13, concerning interaction through a network will apply to the 592 | combination as such. 593 | 594 | 14. Revised Versions of this License. 595 | 596 | The Free Software Foundation may publish revised and/or new versions of 597 | the GNU General Public License from time to time. Such new versions will 598 | be similar in spirit to the present version, but may differ in detail to 599 | address new problems or concerns. 600 | 601 | Each version is given a distinguishing version number. If the 602 | Program specifies that a certain numbered version of the GNU General 603 | Public License "or any later version" applies to it, you have the 604 | option of following the terms and conditions either of that numbered 605 | version or of any later version published by the Free Software 606 | Foundation. If the Program does not specify a version number of the 607 | GNU General Public License, you may choose any version ever published 608 | by the Free Software Foundation. 609 | 610 | If the Program specifies that a proxy can decide which future 611 | versions of the GNU General Public License can be used, that proxy's 612 | public statement of acceptance of a version permanently authorizes you 613 | to choose that version for the Program. 614 | 615 | Later license versions may give you additional or different 616 | permissions. However, no additional obligations are imposed on any 617 | author or copyright holder as a result of your choosing to follow a 618 | later version. 619 | 620 | 15. Disclaimer of Warranty. 621 | 622 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 623 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 624 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 625 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 626 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 627 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 628 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 629 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 630 | 631 | 16. Limitation of Liability. 632 | 633 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 634 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 635 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 636 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 637 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 638 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 639 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 640 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 641 | SUCH DAMAGES. 642 | 643 | 17. Interpretation of Sections 15 and 16. 644 | 645 | If the disclaimer of warranty and limitation of liability provided 646 | above cannot be given local legal effect according to their terms, 647 | reviewing courts shall apply local law that most closely approximates 648 | an absolute waiver of all civil liability in connection with the 649 | Program, unless a warranty or assumption of liability accompanies a 650 | copy of the Program in return for a fee. 651 | 652 | END OF TERMS AND CONDITIONS 653 | 654 | How to Apply These Terms to Your New Programs 655 | 656 | If you develop a new program, and you want it to be of the greatest 657 | possible use to the public, the best way to achieve this is to make it 658 | free software which everyone can redistribute and change under these terms. 659 | 660 | To do so, attach the following notices to the program. It is safest 661 | to attach them to the start of each source file to most effectively 662 | state the exclusion of warranty; and each file should have at least 663 | the "copyright" line and a pointer to where the full notice is found. 664 | 665 | 666 | Copyright (C) 667 | 668 | This program is free software: you can redistribute it and/or modify 669 | it under the terms of the GNU General Public License as published by 670 | the Free Software Foundation, either version 3 of the License, or 671 | (at your option) any later version. 672 | 673 | This program is distributed in the hope that it will be useful, 674 | but WITHOUT ANY WARRANTY; without even the implied warranty of 675 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 676 | GNU General Public License for more details. 677 | 678 | You should have received a copy of the GNU General Public License 679 | along with this program. If not, see . 680 | 681 | Also add information on how to contact you by electronic and paper mail. 682 | 683 | If the program does terminal interaction, make it output a short 684 | notice like this when it starts in an interactive mode: 685 | 686 | Copyright (C) 687 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 688 | This is free software, and you are welcome to redistribute it 689 | under certain conditions; type `show c' for details. 690 | 691 | The hypothetical commands `show w' and `show c' should show the appropriate 692 | parts of the General Public License. Of course, your program's commands 693 | might be different; for a GUI interface, you would use an "about box". 694 | 695 | You should also get your employer (if you work as a programmer) or school, 696 | if any, to sign a "copyright disclaimer" for the program, if necessary. 697 | For more information on this, and how to apply and follow the GNU GPL, see 698 | . 699 | 700 | The GNU General Public License does not permit incorporating your program 701 | into proprietary programs. If your program is a subroutine library, you 702 | may consider it more useful to permit linking proprietary applications with 703 | the library. If this is what you want to do, use the GNU Lesser General 704 | Public License instead of this License. But first, please read 705 | . 706 | 707 | ### STAR ###################################################################### 708 | 709 | MIT License 710 | 711 | Copyright (c) 2019 Alexander Dobin 712 | 713 | Permission is hereby granted, free of charge, to any person obtaining a copy 714 | of this software and associated documentation files (the "Software"), to deal 715 | in the Software without restriction, including without limitation the rights 716 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 717 | copies of the Software, and to permit persons to whom the Software is 718 | furnished to do so, subject to the following conditions: 719 | 720 | The above copyright notice and this permission notice shall be included in all 721 | copies or substantial portions of the Software. 722 | 723 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 724 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 725 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 726 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 727 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 728 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 729 | SOFTWARE. 730 | --------------------------------------------------------------------------------