├── .gitignore
├── script
├── run_kallistoindex.sh
├── run_genotypingkallisto.sh
├── run_bam2fq.sh
├── write_pers_index.R
├── calc_starindex_params.R
├── run_starindex.sh
├── genotype_kallisto.R
├── write_winners.R
├── run_quantkallisto.sh
├── write_top5_fasta.R
├── run_quantSalmonReads.sh
├── write_final_genotypes.R
├── run_quant.sh
├── run_bam2fq_mhc.sh
├── run_genotyping.sh
└── make_index_files.R
├── README.Rmd
├── README.md
├── hlapers
└── license.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | *.tsv
2 | *.txt
3 | *.pbs
4 | index
5 | fastq
6 | gencode
7 | IMGTHLA
8 | results*
9 |
10 |
--------------------------------------------------------------------------------
/script/run_kallistoindex.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | transcripts=$1
4 | out=$2
5 |
6 | kallisto index -i $out $transcripts
7 |
--------------------------------------------------------------------------------
/script/run_genotypingkallisto.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | DIR=$( dirname "$0" )
4 | index=$1
5 | fq1=$2
6 | fq2=$3
7 | out=$4
8 | cpus=$5
9 |
10 | kallisto quant -i $index -t $cpus -o $out $fq1 $fq2
11 |
12 | Rscript $DIR/genotype_kallisto.R $out
13 |
--------------------------------------------------------------------------------
/script/run_bam2fq.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | bam=$1
4 | outPrefix=$2
5 |
6 | fq1=${outPrefix}_1.fq.gz
7 | fq2=${outPrefix}_2.fq.gz
8 |
9 | if [[ ! -f "$bam".bai ]]; then
10 | samtools index $bam $bam.bai
11 | fi
12 |
13 | samtools sort -n $bam |\
14 | samtools fastq -N -1 $fq1 -2 $fq2 -0 /dev/null -
15 |
16 |
--------------------------------------------------------------------------------
/script/write_pers_index.R:
--------------------------------------------------------------------------------
1 | library(hlaseqlib)
2 |
3 | opts <- commandArgs(TRUE)
4 | transcripts<- opts[1]
5 | typings <- opts[2]
6 | outPrefix <- opts[3]
7 |
8 | outindex <- paste0(outPrefix, "_index.fa")
9 |
10 | index <- Biostrings::readDNAStringSet(transcripts)
11 |
12 | typings_df <- readr::read_tsv(typings)
13 |
14 | alleles <- unlist(strsplit(unique(typings_df$allele), "-"))
15 |
16 | Biostrings::writeXStringSet(index[alleles], outindex)
17 |
--------------------------------------------------------------------------------
/script/calc_starindex_params.R:
--------------------------------------------------------------------------------
1 | suppressPackageStartupMessages(library(Biostrings))
2 |
3 | opts <- commandArgs(TRUE)
4 | index <- opts[1]
5 | outprefix <- opts[2]
6 |
7 | out <- file.path(outprefix, "indexparams.txt")
8 |
9 | gen <- readDNAStringSet(index)
10 |
11 | genlen <- sum(width(gen))
12 |
13 | nrefs <- length(gen)
14 |
15 | binbits <- floor(min(18, log2(genlen/nrefs)))
16 | saindex <- floor(min(14, log2(genlen)/2 - 1))
17 |
18 | writeLines(as.character(c(binbits, saindex)), out)
19 |
--------------------------------------------------------------------------------
/script/run_starindex.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | DIR=$( dirname "$0" )
4 | transcripts=$1
5 | out=$2
6 | threads=$3
7 |
8 | mkdir -p $out
9 |
10 | Rscript $DIR/calc_starindex_params.R $transcripts $out
11 |
12 | binbits=$(awk 'FNR == 1' $out/indexparams.txt)
13 | saindex=$(awk 'FNR == 2' $out/indexparams.txt)
14 |
15 | STAR --runThreadN $threads --runMode genomeGenerate \
16 | --genomeDir $out --genomeFastaFiles $transcripts \
17 | --genomeChrBinNbits $binbits --genomeSAindexNbases $saindex
18 |
--------------------------------------------------------------------------------
/script/genotype_kallisto.R:
--------------------------------------------------------------------------------
1 | library(hlaseqlib)
2 | suppressPackageStartupMessages(library(dplyr))
3 | suppressPackageStartupMessages(library(readr))
4 |
5 | opts <- commandArgs(TRUE)
6 | outPrefix <- opts[1]
7 |
8 | abundance_file <- file.path(outPrefix, "abundance.tsv")
9 | outgenos <- file.path(outPrefix, "genotypes.tsv")
10 |
11 | genos <- abundance_file %>%
12 | read_tsv() %>%
13 | filter(grepl("^IMGT_", target_id)) %>%
14 | mutate(locus = sub("^IMGT_([^\\*]+).+$", "\\1", target_id)) %>%
15 | select(locus, allele = target_id, counts = est_counts, tpm) %>%
16 | hla_genotype(th = 0.15) %>%
17 | filter(!is.na(allele))
18 |
19 | write_tsv(genos, outgenos)
20 |
--------------------------------------------------------------------------------
/script/write_winners.R:
--------------------------------------------------------------------------------
1 | suppressPackageStartupMessages(library(dplyr))
2 | suppressPackageStartupMessages(library(readr))
3 |
4 | opts <- commandArgs(TRUE)
5 | top5_quants <- opts[1]
6 | out <- opts[2]
7 |
8 | quants <- top5_quants %>%
9 | read_tsv() %>%
10 | mutate(locus = sub("^IMGT_([^\\*]+).+$", "\\1", Name),
11 | lineage = sub("^IMGT_([^:]+).*$", "\\1", Name)) %>%
12 | select(locus, lineage, allele = Name, counts = NumReads, tpm = TPM)
13 |
14 | winner_alleles <- quants %>%
15 | group_by(locus) %>%
16 | slice(which.max(counts)) %>%
17 | ungroup() %>%
18 | distinct(allele) %>%
19 | pull(allele)
20 |
21 | writeLines(winner_alleles, out)
22 |
--------------------------------------------------------------------------------
/script/run_quantkallisto.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | DIR=$( dirname "$0")
4 | hladb=$1
5 | genos=$2
6 | fq1=$3
7 | fq2=$4
8 | outPrefix=$5
9 | cpus=$6
10 |
11 | transcripts=$hladb/transcripts_MHC_HLAsupp.fa
12 |
13 | Rscript $DIR/write_pers_index.R $transcripts $genos $outPrefix
14 |
15 | transcriptsNoHLA=$hladb/transcripts_noHLA.fa
16 | samplehla=${outPrefix}_index.fa
17 | sample_transcripts=${outPrefix}_transcripts.fa
18 |
19 | cat $transcriptsNoHLA $samplehla > $sample_transcripts
20 |
21 | index=${outPrefix}_index
22 | out=${outPrefix}_quant
23 |
24 | kallisto index -i $index $sample_transcripts
25 |
26 | kallisto quant -i $index -t $cpus -o $out --bias $fq1 $fq2
27 |
28 | awk 'FNR == 1 {print $1"\t"$4"\t"$5}' $out/abundance.tsv > ${outPrefix}_hlaquant.tsv
29 | awk '/IMGT/ {print $1"\t"$4"\t"$5}' $out/abundance.tsv >> ${outPrefix}_hlaquant.tsv
30 |
31 | rm $samplehla $sample_transcripts $index
32 |
--------------------------------------------------------------------------------
/script/write_top5_fasta.R:
--------------------------------------------------------------------------------
1 | suppressPackageStartupMessages(library(dplyr))
2 | suppressPackageStartupMessages(library(readr))
3 |
4 | opts <- commandArgs(TRUE)
5 | quant_file <- opts[1]
6 | gencode <- opts[2]
7 | out <- opts[3]
8 |
9 | index <- Biostrings::readDNAStringSet(gencode)
10 |
11 | imgt_quants <- read_tsv(quant_file) %>%
12 | filter(grepl("^IMGT", Name)) %>%
13 | mutate(lineage = sub("^IMGT_([^:]+).*$", "\\1", Name),
14 | locus = sub("^([^\\*]+).+$", "\\1", lineage)) %>%
15 | select(locus, lineage, allele = Name, est_counts = NumReads, tpm = TPM)
16 |
17 | top_alleles <- imgt_quants %>%
18 | group_by(locus) %>%
19 | top_n(5, est_counts) %>%
20 | ungroup() %>%
21 | group_by(locus, lineage) %>%
22 | filter(tpm/max(tpm) > 0.25) %>%
23 | ungroup() %>%
24 | pull(allele) %>%
25 | unique()
26 |
27 | Biostrings::writeXStringSet(index[top_alleles], out)
28 |
--------------------------------------------------------------------------------
/script/run_quantSalmonReads.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | DIR=$( dirname "$0" )
4 | hladb=$1
5 | genos=$2
6 | fq1=$3
7 | fq2=$4
8 | outPrefix=$5
9 | cpus=$6
10 |
11 | out=${outPrefix}_quant
12 | index=${outPrefix}_index
13 | transcripts=$hladb/transcripts_MHC_HLAsupp.fa
14 | transcriptsNoHLA=$hladb/transcripts_noHLA.fa
15 | samplehla=${outPrefix}_index.fa
16 | sample_transcripts=${outPrefix}_transcripts.fa
17 |
18 | mkdir -p ${outPrefix}_log
19 |
20 | Rscript $DIR/write_pers_index.R $transcripts $genos $outPrefix
21 |
22 | cat $transcriptsNoHLA $samplehla > $sample_transcripts
23 |
24 | salmon index -t $sample_transcripts -i $index
25 |
26 | salmon quant -i $index -l A -1 $fq1 -2 $fq2 -o $out -p $cpus \
27 | --seqBias --gcBias --posBias
28 |
29 | awk 'FNR == 1 {print $1"\t"$4"\t"$5}' $out/quant.sf > ${outPrefix}_hlaquant.tsv
30 | awk '/IMGT/ {print $1"\t"$4"\t"$5}' $out/quant.sf >> ${outPrefix}_hlaquant.tsv
31 |
32 | rm -r $index $samplehla $sample_transcripts
33 |
--------------------------------------------------------------------------------
/script/write_final_genotypes.R:
--------------------------------------------------------------------------------
1 | library(hlaseqlib)
2 | suppressPackageStartupMessages(library(dplyr))
3 | suppressPackageStartupMessages(library(readr))
4 |
5 | opts <- commandArgs(TRUE)
6 | transcripts<- opts[1]
7 | quants_1st <- opts[2]
8 | quants_2nd <- opts[3]
9 | outPrefix <- opts[4]
10 |
11 | outgenos <- paste0(outPrefix, "_genotypes.tsv")
12 |
13 | index <- Biostrings::readDNAStringSet(transcripts)
14 |
15 | typings_1st <- quants_1st %>%
16 | read_tsv() %>%
17 | mutate(locus = sub("^IMGT_([^\\*]+).+$", "\\1", Name)) %>%
18 | select(locus, allele = Name, counts = NumReads, tpm = TPM) %>%
19 | group_by(locus) %>%
20 | slice(which.max(counts)) %>%
21 | ungroup()
22 |
23 | if (file.exists(quants_2nd)) {
24 |
25 | typings_2nd <- quants_2nd %>%
26 | read_tsv() %>%
27 | mutate(locus = sub("^IMGT_([^\\*]+).+$", "\\1", Name)) %>%
28 | select(locus, allele = Name, counts = NumReads, tpm = TPM) %>%
29 | group_by(locus) %>%
30 | slice(which.max(counts)) %>%
31 | ungroup() %>%
32 | filter(counts > 0)
33 |
34 | typings_df <- bind_rows(typings_1st, typings_2nd) %>%
35 | arrange(locus) %>%
36 | hla_genotype(th = 0.05)
37 |
38 | } else {
39 |
40 | typings_df <- typings_1st %>%
41 | arrange(locus) %>%
42 | hla_genotype(th = 0.05)
43 | }
44 |
45 | typings_df %>%
46 | filter(!is.na(allele)) %>%
47 | select(locus, allele) %>%
48 | write_tsv(outgenos)
49 |
--------------------------------------------------------------------------------
/script/run_quant.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | DIR=$( dirname "$0" )
4 | hladb=$1
5 | genos=$2
6 | fq1=$3
7 | fq2=$4
8 | outPrefix=$5
9 | cpus=$6
10 |
11 |
12 | transcripts=$hladb/transcripts_MHC_HLAsupp.fa
13 |
14 | Rscript $DIR/write_pers_index.R $transcripts $genos $outPrefix
15 |
16 | transcriptsNoHLA=$hladb/transcripts_noHLA.fa
17 | samplehla=${outPrefix}_index.fa
18 | sample_transcripts=${outPrefix}_transcripts.fa
19 |
20 | cat $transcriptsNoHLA $samplehla > $sample_transcripts
21 |
22 | index=${outPrefix}_index
23 | bampers=${outPrefix}_Aligned.out.bam
24 | out=${outPrefix}_quant
25 |
26 | mkdir -p $index
27 | mkdir -p ${outPrefix}_log
28 |
29 | STAR --runThreadN $cpus --runMode genomeGenerate --genomeDir $index\
30 | --genomeFastaFiles $sample_transcripts\
31 | --genomeChrBinNbits 11 --genomeSAindexNbases 13\
32 | --outFileNamePrefix ${index}_
33 |
34 | STAR --runMode alignReads --runThreadN $cpus --genomeDir $index\
35 | --readFilesIn $fq1 $fq2 --readFilesCommand zcat\
36 | --outFilterMismatchNmax 999\
37 | --outFilterMismatchNoverReadLmax 0.04\
38 | --outFilterMultimapScoreRange 1\
39 | --outFilterMultimapNmax 150\
40 | --winAnchorMultimapNmax 300\
41 | --alignIntronMax 0\
42 | --alignEndsType Local\
43 | --outSAMprimaryFlag AllBestScore\
44 | --outSAMtype BAM Unsorted\
45 | --outFileNamePrefix ${outPrefix}_
46 |
47 | salmon quant -t $sample_transcripts -l A -a $bampers -o $out -p $cpus \
48 | --seqBias --gcBias --posBias
49 |
50 | awk 'FNR == 1 {print $1"\t"$4"\t"$5}' $out/quant.sf > ${outPrefix}_hlaquant.tsv
51 | awk '/IMGT/ {print $1"\t"$4"\t"$5}' $out/quant.sf >> ${outPrefix}_hlaquant.tsv
52 |
53 | mv ${outPrefix}_*Log.* ${outPrefix}_log/
54 |
55 | rm -r $bampers $index $sample_transcripts $samplehla ${outPrefix}_SJ*
56 |
--------------------------------------------------------------------------------
/script/run_bam2fq_mhc.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | bam=$1
4 | mhccoords=$2
5 | outPrefix=$3
6 | dosorting=$4
7 |
8 | mhc=$(cat $mhccoords)
9 |
10 | mapbam=${outPrefix}_map.bam
11 | unmapbam=${outPrefix}_unmap.bam
12 | tmpbam=${outPrefix}_tmp.bam
13 | sortbam=${outPrefix}_tmpsorted.bam
14 | tmpfq1=${outPrefix}_tmp_1.fq
15 | tmpfq2=${outPrefix}_tmp_2.fq
16 | reads1tmp=${outPrefix}_reads1.tmp
17 | reads2tmp=${outPrefix}_reads2.tmp
18 | reads=${outPrefix}_reads
19 | reads1=${outPrefix}_reads1
20 | reads2=${outPrefix}_reads2
21 | finalfq1=${outPrefix}_mhc_unmap_1.fq
22 | finalfq2=${outPrefix}_mhc_unmap_2.fq
23 |
24 | if [[ "$dosorting" -eq 1 ]]; then
25 | echo "Sorting BAM file..."
26 | samtools sort -o $sortbam $bam
27 | else
28 | sortbam=$bam
29 | fi
30 |
31 | echo "Extracting MHC and unmapped reads from BAM..."
32 |
33 | if [[ ! -f "$sortbam".bai ]]; then
34 | samtools index $sortbam ${sortbam}.bai
35 | fi
36 |
37 | samtools view $sortbam $mhc -b -o $mapbam
38 | samtools view -F 0x2 $sortbam -b -o $unmapbam
39 | samtools merge $tmpbam $mapbam $unmapbam
40 |
41 | if [[ -f "${outPrefix}"_tmpsorted.bam ]]; then
42 | rm ${outPrefix}_tmpsorted.bam
43 | fi
44 |
45 | samtools sort -n $tmpbam | samtools fastq -1 $tmpfq1 -2 $tmpfq2 -0 /dev/null -
46 |
47 | sed -n '1~4p' $tmpfq1 | sed 's|^@||' | sed 's|/1$||'| sort > $reads1tmp
48 | sed -n '1~4p' $tmpfq2 | sed 's|^@||' | sed 's|/2$||'| sort > $reads2tmp
49 |
50 | comm -12 $reads1tmp $reads2tmp | sort -V | uniq > $reads
51 |
52 | if [[ $(head -n1 $tmpfq1) =~ /1$ ]] && [[ $(head -n1 $tmpfq2) =~ /2$ ]]; then
53 |
54 | awk '{ print $0 "/1" }' $reads > $reads1
55 | awk '{ print $0 "/2" }' $reads > $reads2
56 |
57 | else
58 |
59 | cp $reads $reads1
60 | cp $reads $reads2
61 |
62 | fi
63 |
64 | echo "Writing fastq files..."
65 |
66 | seqtk subseq $tmpfq1 $reads1 > $finalfq1
67 | seqtk subseq $tmpfq2 $reads2 > $finalfq2
68 |
69 | rm $mapbam $unmapbam $tmpbam $tmpfq1 $tmpfq2 $reads1tmp $reads2tmp $reads $reads1 $reads2
70 |
71 | echo "Done!"
72 |
--------------------------------------------------------------------------------
/script/run_genotyping.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | DIR=$( dirname "$0" )
4 | index=$1
5 | transcripts=$2
6 | fq1=$3
7 | fq2=$4
8 | outPrefix=$5
9 | cpus=$6
10 |
11 | bammhc=${outPrefix}_MHC_Aligned.out.bam
12 | outmhc=${outPrefix}_MHC_quants
13 | persindex=${outPrefix}_persindex
14 | outtop5=${outPrefix}_top5_quants
15 | readsWin=${outPrefix}_readsWin.txt
16 | readsNoWin=${outPrefix}_readsNoWin.txt
17 | fqnoWin1=${outPrefix}_noWin_1.fq
18 | fqnoWin2=${outPrefix}_noWin_2.fq
19 | outNoWin=${outPrefix}_NoWin_quants
20 |
21 | if file $fq1 | grep -q gzip ; then
22 | readcommand=zcat
23 | else
24 | readcommand="-"
25 | fi
26 |
27 | # Remap to supplemented index
28 | echo "Remapping extracted reads to personalized MHC index..."
29 |
30 | STAR --runMode alignReads --runThreadN $cpus --genomeDir $index\
31 | --readFilesIn $fq1 $fq2 --readFilesCommand $readcommand\
32 | --outFilterMismatchNmax 1\
33 | --outFilterMultimapScoreRange 0\
34 | --outFilterMultimapNmax 3000\
35 | --winAnchorMultimapNmax 6000\
36 | --alignEndsType EndToEnd\
37 | --outSAMprimaryFlag AllBestScore\
38 | --outSAMtype BAM Unsorted\
39 | --outFileNamePrefix ${outPrefix}_MHC_
40 |
41 | # Quantify MHC expression
42 | echo "Genotyping HLA..."
43 |
44 | salmon quant -t $transcripts -l A -a $bammhc -o $outmhc -p $cpus
45 |
46 | #Extract up to top 5 HLA alleles
47 | mkdir -p $persindex
48 |
49 | Rscript $DIR/write_top5_fasta.R $outmhc/quant.sf $transcripts $persindex/hla.fa
50 |
51 | #Requantify expression of the top5
52 | mkdir -p $outtop5
53 |
54 | salmon index -t $persindex/hla.fa -i $persindex/salmon
55 |
56 | salmon quant -i $persindex/salmon -l A -1 $fq1 -2 $fq2 -o $outtop5\
57 | -p $cpus --writeMappings=$outtop5/mappings.sam
58 |
59 | Rscript $DIR/write_winners.R $outtop5/quant.sf $outtop5/winners.txt
60 |
61 | #Remove reads from the winner alleles
62 | samtools view $outtop5/mappings.sam |\
63 | grep -F -f $outtop5/winners.txt - |\
64 | cut -f1 |\
65 | sort |\
66 | uniq > $readsWin
67 |
68 | samtools view $outtop5/mappings.sam |\
69 | cut -f1 |\
70 | awk 'FNR==NR {hash[$0]; next} !($0 in hash)' $readsWin - |\
71 | sort |\
72 | uniq > $readsNoWin
73 |
74 | if [[ $(head -n1 $fq1) =~ /1$ ]] && [[ $(head -n1 $fq2) =~ /2$ ]]; then
75 |
76 | awk '{ print $0 "/1" }' $readsNoWin > ${readsNoWin}1
77 | awk '{ print $0 "/2" }' $readsNoWin > ${readsNoWin}2
78 |
79 | seqtk subseq $fq1 ${readsNoWin}1 > $fqnoWin1
80 | seqtk subseq $fq2 ${readsNoWin}2 > $fqnoWin2
81 |
82 | else
83 |
84 | seqtk subseq $fq1 $readsNoWin > $fqnoWin1
85 | seqtk subseq $fq2 $readsNoWin > $fqnoWin2
86 |
87 | fi
88 |
89 | #Requantify to see if winner alleles explain all the expression or if
90 | # there is other relevant allele
91 |
92 | if [[ -s "$fqnoWin1" ]] && [[ -s "$fqnoWin2" ]]; then
93 | salmon quant -i $persindex/salmon -l A -1 $fqnoWin1 -2 $fqnoWin2\
94 | -o $outNoWin -p $cpus
95 | fi
96 |
97 | #Final gentotypes
98 | Rscript $DIR/write_final_genotypes.R $transcripts $outtop5/quant.sf $outNoWin/quant.sf $outPrefix
99 |
100 | mkdir -p ${outPrefix}_log
101 |
102 | mv ${outPrefix}_MHC_Log* ${outPrefix}_log/
103 | mv ${outPrefix}_MHC_quants/logs/salmon_quant.log ${outPrefix}_log/
104 |
105 | rm -r ${outPrefix}_MHC* $persindex $outtop5 $readsWin\
106 | ${readsNoWin}* $fqnoWin1 $fqnoWin2 $outNoWin
107 |
108 | echo "Done!"
109 |
--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: ""
3 | output: github_document
4 | ---
5 |
6 | ```{r setup, include=FALSE}
7 | knitr::opts_chunk$set(echo = TRUE, comment = "", engine.opts = list(bash = "-l"))
8 | ```
9 |
10 | # HLApers
11 |
12 | ## License
13 |
14 | HLApers integrates software such as kallisto, Salmon and STAR. Before using it, please read the license notices [here](https://github.com/genevol-usp/HLApers/blob/Latest/license.txt)
15 |
16 | ## Getting started
17 |
18 | ### Install required software
19 |
20 | ##### 1. HLApers
21 |
22 | ```
23 | git clone https://github.com/genevol-usp/HLApers.git
24 | ```
25 |
26 | ##### 2. R v3.4+
27 |
28 | ##### 3. In R, install the following packages
29 |
30 | - from Bioconductor:
31 |
32 | ```
33 | if (!requireNamespace("BiocManager", quietly = TRUE))
34 | install.packages("BiocManager")
35 |
36 | BiocManager::install("Biostrings")
37 | ```
38 |
39 | - from GitHub:
40 |
41 | ```
42 | if (!requireNamespace("devtools", quietly = TRUE))
43 | install.packages("devtools")
44 |
45 | devtools::install_github("genevol-usp/hlaseqlib")
46 | ```
47 |
48 | ##### 4. For STAR-Salmon-based pipeline, install:
49 |
50 | - STAR v2.5.3a+
51 |
52 | - Salmon v0.8.2+
53 |
54 | - samtools 1.3+
55 |
56 | - seqtk
57 |
58 |
59 | ##### 5. For kallisto-based pipeline, install:
60 |
61 | - kallisto
62 |
63 |
64 | ### Download data:
65 |
66 |
67 | ##### 1. IMGT database
68 |
69 | ```
70 | git clone https://github.com/ANHIG/IMGTHLA.git
71 | ```
72 |
73 | ##### 2. Gencode:
74 |
75 | - transcripts fasta (e.g., Gencode v37 fasta)
76 |
77 | - corresponding annotations GTF (e.g., Gencode v37 GTF)
78 |
79 |
80 |
81 | ## HLApers usage
82 |
83 | Link the hlapers executable in your execution path, or change to the HLApers directory and execute the program with `./hlapers`.
84 |
85 |
86 | ### Getting help
87 |
88 | HLApers is composed of the following modes:
89 |
90 | ```{bash}
91 | hlapers --help
92 | ```
93 |
94 |
95 | ### 1. Building a transcriptome supplemented with HLA sequences
96 |
97 | The first step is to use `hlapers prepare-ref` to build an index composed of
98 | Gencode transcripts, where we replace the HLA transcripts with IMGT HLA allele
99 | sequences.
100 |
101 | ```{bash}
102 | hlapers prepare-ref --help
103 | ```
104 |
105 | Example:
106 |
107 | ```
108 | hlapers prepare-ref -t gencode.v37.transcripts.fa.gz -a gencode.v37.annotation.gtf.gz -i IMGTHLA -o hladb
109 | ```
110 |
111 | ### 2. Creating an index for read alignment
112 |
113 | ```{bash}
114 | hlapers index --help
115 | ```
116 |
117 | Example:
118 |
119 | ```
120 | hlapers index -t hladb/transcripts_MHC_HLAsupp.fa -p 4 -o index
121 | ```
122 |
123 | ### 3. HLA genotyping
124 |
125 | Given a BAM file from a previous alignment to the genome, we first need to extract the reads mapped to the MHC region and those which are unmapped. For this, we can use the `bam2fq` utility.
126 |
127 | ```{bash}
128 | hlapers bam2fq --help
129 | ```
130 |
131 | Example:
132 |
133 | ```
134 | hlapers bam2fq -b HG00096.bam -m ./hladb/mhc_coords.txt -o HG00096
135 | ```
136 |
137 | Then we run the genotyping module.
138 |
139 | ```{bash}
140 | hlapers genotype --help
141 | ```
142 |
143 | Example:
144 |
145 | ```
146 | hlapers genotype -i index/STARMHC -t ./hladb/transcripts_MHC_HLAsupp.fa -1 HG00096_mhc_1.fq -2 HG00096_mhc_2.fq -p 8 -o results/HG00096
147 | ```
148 |
149 |
150 | ### 4. Quantify HLA expression
151 |
152 | In order to quantify expression, we use the `quant` module. If the original fastq files are available, we can proceed directly to the quantification step. If only a BAM file of a previous alignment to the genome is available, we first need to convert the BAM to fastq using the `bam2fq` utility.
153 |
154 | Example:
155 |
156 | ```
157 | hlapers bam2fq -b HG00096.bam -o HG00096
158 | ```
159 |
160 | Proceed to the quantification step.
161 |
162 |
163 | ```{bash}
164 | hlapers quant --help
165 | ```
166 |
167 | Example:
168 |
169 | ```
170 | hlapers quant -t ./hladb -g ./results/HG00096_genotypes.tsv -1 HG00096_1.fq.gz -2 HG00096_2.fq.gz -o ./results/HG00096 -p 8
171 | ```
172 |
173 |
--------------------------------------------------------------------------------
/script/make_index_files.R:
--------------------------------------------------------------------------------
1 | library(hlaseqlib)
2 | suppressPackageStartupMessages(library(Biostrings))
3 | suppressPackageStartupMessages(library(dplyr))
4 | suppressPackageStartupMessages(library(purrr))
5 | suppressPackageStartupMessages(library(readr))
6 | suppressPackageStartupMessages(library(tidyr))
7 |
8 | # inputs
9 | opts <- commandArgs(TRUE)
10 | transcript_fasta <- opts[1]
11 | transcript_annot <- opts[2]
12 | imgt_db <- opts[3]
13 | out <- opts[4]
14 |
15 | # outputs
16 | out_noHLA <- file.path(out, "transcripts_noHLA.fa")
17 | out_supp <- file.path(out, "transcripts_HLAsupp.fa")
18 | out_MHCsupp <- file.path(out, "transcripts_MHC_HLAsupp.fa")
19 | out_coord <- file.path(out, "mhc_coords.txt")
20 |
21 | if (!file.exists(out)) dir.create(out)
22 |
23 | # HLA database
24 | imgt_loci <- c("A", "B", "C", "E", "F", "G", "H",
25 | "DMA", "DMB", "DOA", "DOB",
26 | "DPA1", "DPA2", "DPB1", "DPB2",
27 | "DQA1", "DQA2", "DQB1",
28 | "DRA", "DRB1", "DRB3", "DRB4", "DRB5")
29 |
30 | present_loci <-
31 | list.files(file.path(imgt_db, "alignments"), full.names = TRUE) %>%
32 | .[grep("nuc", .)] %>%
33 | .[!grepl("(Class)|(HFE)|(TAP)|(MIC)", .)] %>%
34 | map(. %>%
35 | readLines() %>%
36 | trimws() %>%
37 | .[grep("^[A-Z1-9]+\\*\\d+:", .)] %>%
38 | sub("^([^*]+).*$", "\\1", .) %>%
39 | unique()) %>%
40 | unlist()
41 |
42 | imgt_loci_inc <- imgt_loci[imgt_loci %in% present_loci]
43 |
44 | message(paste("HLApers found data for the following loci, which will be personalized:",
45 | paste(imgt_loci_inc, collapse = ", ")))
46 |
47 | hladb <- tibble(locus = imgt_loci_inc) %>%
48 | mutate(data = map(locus, ~hla_compile_index(., imgt_db))) %>%
49 | filter(!is.na(data)) %>%
50 | unnest(data) %>%
51 | filter(!grepl("N$", allele)) %>%
52 | select(-locus) %>%
53 | mutate(allele = paste0("IMGT_", allele)) %>%
54 | split(.$allele) %>%
55 | map_chr("cds") %>%
56 | DNAStringSet()
57 |
58 | hladb_genes <- unique(sub("^IMGT_([^\\*]+).+$", "HLA-\\1", names(hladb)))
59 |
60 | # Annotations
61 | message("Reading transcript annotations...")
62 | g_annot <- read_tsv(transcript_annot, comment = "#", col_names = FALSE,
63 | col_types = "ccciicccc", progress = FALSE)
64 |
65 | transcripts_db <- g_annot %>%
66 | filter(X3 == "transcript") %>%
67 | mutate(gene_name = sub("^.*gene_name \"([^\"]+)\";.*$", "\\1", X9),
68 | gene_id = sub("^.*gene_id \"([^\"]+)\";.*$", "\\1", X9),
69 | transcript_id = sub("^.*transcript_id \"([^\"]+)\";.*$", "\\1", X9)) %>%
70 | select(chr = X1, start = X4, end = X5, gene_name, gene_id, transcript_id)
71 |
72 | mhc_coords <- transcripts_db %>%
73 | filter(chr == "chr6" | chr == 6, gene_name %in% hladb_genes) %>%
74 | summarise(chr = unique(chr), start = min(start) -5e5, end = max(end) + 5e5)
75 |
76 | mhc_coords %>%
77 | mutate(out = paste0(chr, ":", start, "-", end)) %>%
78 | pull(out) %>%
79 | writeLines(out_coord)
80 |
81 | # Transcript sequences
82 | transcripts <- readDNAStringSet(transcript_fasta) %>%
83 | `names<-`(sub("^([^\\|]+).*$", "\\1", names(.)))
84 |
85 | transcripts_no_hla <- transcripts_db %>%
86 | filter(! gene_name %in% hladb_genes) %>%
87 | pull(transcript_id) %>%
88 | transcripts[.]
89 |
90 | transcripts_hlasupp <- c(transcripts_no_hla, hladb)
91 |
92 | mhc_transc_ids <- transcripts_db %>%
93 | filter(chr == "chr6" | chr == 6, start >= mhc_coords$start, end <= mhc_coords$end,
94 | transcript_id %in% names(transcripts_no_hla)) %>%
95 | pull(transcript_id)
96 |
97 | transcripts_mhc <-
98 | tibble(tx_id = names(transcripts_no_hla),
99 | cds = as.character(transcripts_no_hla)) %>%
100 | filter(tx_id %in% mhc_transc_ids) %>%
101 | split(.$tx_id) %>%
102 | map_chr("cds") %>%
103 | DNAStringSet()
104 |
105 | transcripts_mhc_supp <- c(transcripts_mhc, hladb)
106 |
107 | message("writing index files...")
108 | writeXStringSet(transcripts_hlasupp, out_supp)
109 | writeXStringSet(transcripts_no_hla, out_noHLA)
110 | writeXStringSet(transcripts_mhc_supp, out_MHCsupp)
111 |
112 | message("Done!")
113 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # HLApers
3 |
4 | ## License
5 |
6 | HLApers integrates software such as kallisto, Salmon and STAR. Before
7 | using it, please read the license notices
8 | [here](https://github.com/genevol-usp/HLApers/blob/Latest/license.txt)
9 |
10 | ## Getting started
11 |
12 | ### Install required software
13 |
14 | ##### 1\. HLApers
15 |
16 | git clone https://github.com/genevol-usp/HLApers.git
17 |
18 | ##### 2\. R v3.4+
19 |
20 | ##### 3\. In R, install the following packages
21 |
22 | - from Bioconductor:
23 |
24 |
25 |
26 | if (!requireNamespace("BiocManager", quietly = TRUE))
27 | install.packages("BiocManager")
28 |
29 | BiocManager::install("Biostrings")
30 |
31 | - from GitHub:
32 |
33 |
34 |
35 | if (!requireNamespace("devtools", quietly = TRUE))
36 | install.packages("devtools")
37 |
38 | devtools::install_github("genevol-usp/hlaseqlib")
39 |
40 | ##### 4\. For STAR-Salmon-based pipeline, install:
41 |
42 | - STAR v2.5.3a+
43 |
44 | - Salmon v0.8.2+
45 |
46 | - samtools 1.3+
47 |
48 | - seqtk
49 |
50 | ##### 5\. For kallisto-based pipeline, install:
51 |
52 | - kallisto
53 |
54 | ### Download data:
55 |
56 | ##### 1\. IMGT database
57 |
58 | git clone https://github.com/ANHIG/IMGTHLA.git
59 |
60 | ##### 2\. Gencode:
61 |
62 | - transcripts fasta (e.g., Gencode v37 fasta)
63 |
64 | - corresponding annotations GTF (e.g., Gencode v37 GTF)
65 |
66 | ## HLApers usage
67 |
68 | Link the hlapers executable in your execution path, or change to the
69 | HLApers directory and execute the program with `./hlapers`.
70 |
71 | ### Getting help
72 |
73 | HLApers is composed of the following modes:
74 |
75 | ``` bash
76 | hlapers --help
77 | ```
78 |
79 | Usage: hlapers [modes]
80 |
81 | prepare-ref Prepare transcript fasta files.
82 | index Create index for read alignment.
83 | bam2fq Convert BAM to fastq.
84 | genotype Infer HLA genotypes.
85 | quant Quantify HLA expression.
86 |
87 | ### 1\. Building a transcriptome supplemented with HLA sequences
88 |
89 | The first step is to use `hlapers prepare-ref` to build an index
90 | composed of Gencode transcripts, where we replace the HLA transcripts
91 | with IMGT HLA allele sequences.
92 |
93 | ``` bash
94 | hlapers prepare-ref --help
95 | ```
96 |
97 | Usage: hlapers prepare-ref [options]
98 |
99 | -t | --transcripts Fasta with Gencode transcript sequences.
100 | -a | --annotations GTF from Gencode for the same Genome version.
101 | -i | --imgt Path to IMGT directory.
102 | -o | --out Output directory.
103 |
104 | Example:
105 |
106 | hlapers prepare-ref -t gencode.v37.transcripts.fa.gz -a gencode.v37.annotation.gtf.gz -i IMGTHLA -o hladb
107 |
108 | ### 2\. Creating an index for read alignment
109 |
110 | ``` bash
111 | hlapers index --help
112 | ```
113 |
114 | Usage: hlapers index [options]
115 |
116 | -t | --transcripts Fasta with Gencode transcript sequences.
117 | -p | --threads Number of threads.
118 | -o | --out Output directory.
119 | --kallisto Create index for kallisto pipeline instead of STARsalmon.
120 |
121 | Example:
122 |
123 | hlapers index -t hladb/transcripts_MHC_HLAsupp.fa -p 4 -o index
124 |
125 | ### 3\. HLA genotyping
126 |
127 | Given a BAM file from a previous alignment to the genome, we first need
128 | to extract the reads mapped to the MHC region and those which are
129 | unmapped. For this, we can use the `bam2fq` utility.
130 |
131 | ``` bash
132 | hlapers bam2fq --help
133 | ```
134 |
135 | Usage: hlapers bam2fq [options]
136 |
137 | -m | --mhc-coords Genomic coordinates of the MHC region in chrN:start-end format if MHC fastq is desired.
138 | -b | --bam BAM file (if -m is specified, needs to be sorted by coordinate; otherwise use --sort).
139 | -o | --outprefix Output prefix name.
140 | --sort Sort input BAM file by coordinate (REQUIRED if -m is specified and BAM is not sorted by coordinate).
141 |
142 | Example:
143 |
144 | hlapers bam2fq -b HG00096.bam -m ./hladb/mhc_coords.txt -o HG00096
145 |
146 | Then we run the genotyping module.
147 |
148 | ``` bash
149 | hlapers genotype --help
150 | ```
151 |
152 | Usage: hlapers genotype [options]
153 |
154 | -i | --index Index generated by 'hlapers index'.
155 | -t | --transcripts Fasta with Gencode transcripts sequences used for 'hlapers index'.
156 | -1 | --fq1 Fastq for READ 1.
157 | -2 | --fq2 Fastq for READ 2.
158 | -p | --threads Number of threads.
159 | -o | --outprefix Output prefix name.
160 | --kallisto Use kallisto for genotyping.
161 |
162 | Example:
163 |
164 | hlapers genotype -i index/STARMHC -t ./hladb/transcripts_MHC_HLAsupp.fa -1 HG00096_mhc_1.fq -2 HG00096_mhc_2.fq -p 8 -o results/HG00096
165 |
166 | ### 4\. Quantify HLA expression
167 |
168 | In order to quantify expression, we use the `quant` module. If the
169 | original fastq files are available, we can proceed directly to the
170 | quantification step. If only a BAM file of a previous alignment to the
171 | genome is available, we first need to convert the BAM to fastq using the
172 | `bam2fq` utility.
173 |
174 | Example:
175 |
176 | hlapers bam2fq -b HG00096.bam -o HG00096
177 |
178 | Proceed to the quantification step.
179 |
180 | ``` bash
181 | hlapers quant --help
182 | ```
183 |
184 | Usage: hlapers quant [options]
185 |
186 | -t | --transcripts Reference transcripts directory.
187 | -g | --genotypes *_genotypes.tsv file generated by 'hlapers genotype'.
188 | -1 | --fq1 Fastq for READ 1.
189 | -2 | --fq2 Fastq for READ 2.
190 | -p | --threads Number of threads.
191 | -o | --out Output prefix name.
192 | --salmonreads Use Salmon lightweight alignment for quantification (NOT TESTED)
193 | --kallisto Use kallisto for quantification.
194 |
195 | Example:
196 |
197 | hlapers quant -t ./hladb -g ./results/HG00096_genotypes.tsv -1 HG00096_1.fq.gz -2 HG00096_2.fq.gz -o ./results/HG00096 -p 8
198 |
--------------------------------------------------------------------------------
/hlapers:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # script directory
4 | if [[ "$OSTYPE" == "linux-gnu" ]]; then
5 |
6 | myexecpath="$( readlink -f "$0" )"
7 |
8 | elif [[ "$OSTYPE" == "darwin"* ]]; then
9 |
10 | myexecpath="$( readlink "$0" )"
11 |
12 | else
13 | myexecpath="$( pwd )"
14 | fi
15 |
16 | execdir="$( dirname "$myexecpath" )"
17 | scriptdir=$execdir/script
18 |
19 | # Usage functions
20 | usage() {
21 | echo "Usage: hlapers [modes]"
22 | echo ""
23 | printf "%-20s %s\n" "prepare-ref" "Prepare transcript fasta files."
24 | printf "%-20s %s\n" "index" "Create index for read alignment."
25 | printf "%-20s %s\n" "bam2fq" "Convert BAM to fastq."
26 | printf "%-20s %s\n" "genotype" "Infer HLA genotypes."
27 | printf "%-20s %s\n" "quant" "Quantify HLA expression."
28 | }
29 |
30 | usage_prepare() {
31 | echo "Usage: hlapers prepare-ref [options]"
32 | echo ""
33 | printf "%-20s %s\n" "-t | --transcripts" "Fasta with Gencode transcript sequences."
34 | printf "%-20s %s\n" "-a | --annotations" "GTF from Gencode for the same Genome version."
35 | printf "%-20s %s\n" "-i | --imgt" "Path to IMGT directory."
36 | printf "%-20s %s\n" "-o | --out" "Output directory."
37 | }
38 |
39 | usage_index() {
40 | echo "Usage: hlapers index [options]"
41 | echo ""
42 | printf "%-20s %s\n" "-t | --transcripts" "Fasta with Gencode transcript sequences."
43 | printf "%-20s %s\n" "-p | --threads" "Number of threads."
44 | printf "%-20s %s\n" "-o | --out" "Output directory."
45 | printf "%-20s %s\n" "--kallisto" "Create index for kallisto pipeline instead of STARsalmon."
46 | }
47 |
48 | usage_genotype() {
49 | echo "Usage: hlapers genotype [options]"
50 | echo ""
51 | printf "%-20s %s\n" "-i | --index" "Index generated by 'hlapers index'."
52 | printf "%-20s %s\n" "-t | --transcripts" "Fasta with Gencode transcripts sequences used for 'hlapers index'."
53 | printf "%-20s %s\n" "-1 | --fq1" "Fastq for READ 1."
54 | printf "%-20s %s\n" "-2 | --fq2" "Fastq for READ 2."
55 | printf "%-20s %s\n" "-p | --threads" "Number of threads."
56 | printf "%-20s %s\n" "-o | --outprefix" "Output prefix name."
57 | printf "%-20s %s\n" "--kallisto" "Use kallisto for genotyping."
58 | }
59 |
60 | usage_quant() {
61 | echo "Usage: hlapers quant [options]"
62 | echo ""
63 | printf "%-20s %s\n" "-t | --transcripts" "Reference transcripts directory."
64 | printf "%-20s %s\n" "-g | --genotypes" "*_genotypes.tsv file generated by 'hlapers genotype'."
65 | printf "%-20s %s\n" "-1 | --fq1" "Fastq for READ 1."
66 | printf "%-20s %s\n" "-2 | --fq2" "Fastq for READ 2."
67 | printf "%-20s %s\n" "-p | --threads" "Number of threads."
68 | printf "%-20s %s\n" "-o | --out" "Output prefix name."
69 | printf "%-20s %s\n" "--salmonreads" "Use Salmon lightweight alignment for quantification (NOT TESTED)"
70 | printf "%-20s %s\n" "--kallisto" "Use kallisto for quantification."
71 | }
72 |
73 | usage_bam2fq() {
74 | echo "Usage: hlapers bam2fq [options]"
75 | echo ""
76 | printf "%-20s %s\n" "-m | --mhc-coords" "Genomic coordinates of the MHC region in chrN:start-end format if MHC fastq is desired."
77 | printf "%-20s %s\n" "-b | --bam" "BAM file (if -m is specified, needs to be sorted by coordinate; otherwise use --sort)."
78 | printf "%-20s %s\n" "-o | --outprefix" "Output prefix name."
79 | printf "%-20s %s\n" "--sort" "Sort input BAM file by coordinate (REQUIRED if -m is specified and BAM is not sorted by coordinate)."
80 | }
81 |
82 | # Mode functions
83 | run_prepref () {
84 |
85 | if [[ "$#" -eq 0 ]]; then
86 | usage_prepare
87 | exit 1
88 | fi
89 |
90 | while [[ "$#" -gt 0 ]]; do
91 | case "$1" in
92 | -h|--help)
93 | usage_prepare
94 | exit
95 | ;;
96 | -t|--transcripts)
97 | transcripts="$2"
98 | shift 2
99 | ;;
100 | -a|--annotations)
101 | annotations="$2"
102 | shift 2
103 | ;;
104 | -i|--imgt)
105 | imgt="$2"
106 | shift 2
107 | ;;
108 | -o|-out)
109 | out="$2"
110 | shift 2
111 | ;;
112 | *)
113 | echo "ERROR: unknown parameter $1"
114 | usage_prepare
115 | exit 1
116 | ;;
117 | esac
118 | done
119 |
120 | Rscript $scriptdir/make_index_files.R $transcripts $annotations $imgt $out
121 | }
122 |
123 |
124 | run_index() {
125 |
126 | if [[ "$#" -eq 0 ]]; then
127 | usage_index
128 | exit 1
129 | fi
130 |
131 | while [[ "$#" -gt 0 ]]; do
132 | case "$1" in
133 | -h|--help)
134 | usage_index
135 | exit
136 | ;;
137 | -t|--transcripts)
138 | transcripts="$2"
139 | shift 2
140 | ;;
141 | -p|--threads)
142 | threads="$2"
143 | shift 2
144 | ;;
145 | -o|--out)
146 | out="$2"
147 | shift 2
148 | ;;
149 | --kallisto)
150 | k=1
151 | shift
152 | ;;
153 | *)
154 | echo "ERROR: unknown parameter $1"
155 | usage_index
156 | exit 1
157 | ;;
158 | esac
159 | done
160 |
161 | kallisto=${k-0}
162 |
163 | if [[ "$kallisto" -eq 0 ]]; then
164 |
165 | $scriptdir/run_starindex.sh $transcripts $out $threads
166 |
167 | elif [[ "$kallisto" -eq 1 ]]; then
168 |
169 | $scriptdir/run_kallistoindex.sh $transcripts $out
170 |
171 | fi
172 | }
173 |
174 | run_genotype() {
175 |
176 | if [[ "$#" -eq 0 ]]; then
177 | usage_genotype
178 | exit 1
179 | fi
180 |
181 | while [[ "$#" -gt 0 ]]; do
182 | case "$1" in
183 | -h|--help)
184 | usage_genotype
185 | exit
186 | ;;
187 | -i|--index)
188 | index="$2"
189 | shift 2
190 | ;;
191 | -t|--transcripts)
192 | transcripts="$2"
193 | shift 2
194 | ;;
195 | -1|--fq1)
196 | fq1="$2"
197 | shift 2
198 | ;;
199 | -2|--fq2)
200 | fq2="$2"
201 | shift 2
202 | ;;
203 | -p|--threads)
204 | threads="$2"
205 | shift 2
206 | ;;
207 | -o|--out)
208 | outprefix="$2"
209 | shift 2
210 | ;;
211 | --kallisto)
212 | k=1
213 | shift
214 | ;;
215 | *)
216 | echo "ERROR: unknown parameter $1"
217 | usage_genotype
218 | exit 1
219 | ;;
220 | esac
221 | done
222 |
223 | kallisto=${k-0}
224 |
225 | if [[ "$kallisto" -eq 0 ]]; then
226 |
227 | $scriptdir/run_genotyping.sh $index $transcripts $fq1 $fq2 $outprefix $threads
228 |
229 | elif [[ "$kallisto" -eq 1 ]]; then
230 |
231 | $scriptdir/run_genotypingkallisto.sh $index $fq1 $fq2 $outprefix $threads
232 |
233 | fi
234 |
235 | }
236 |
237 | run_quant() {
238 | if [[ "$#" -eq 0 ]]; then
239 | usage_quant
240 | exit 1
241 | fi
242 |
243 | while [[ "$#" -gt 0 ]]; do
244 | case "$1" in
245 | -h|--help)
246 | usage_quant
247 | exit
248 | ;;
249 | -t|--transcripts)
250 | transcripts="$2"
251 | shift 2
252 | ;;
253 | -g|--genotypes)
254 | genos="$2"
255 | shift 2
256 | ;;
257 | -1|--fq1)
258 | fq1="$2"
259 | shift 2
260 | ;;
261 | -2|--fq2)
262 | fq2="$2"
263 | shift 2
264 | ;;
265 | -p|--threads)
266 | threads="$2"
267 | shift 2
268 | ;;
269 | -o|--outprefix)
270 | outprefix="$2"
271 | shift 2
272 | ;;
273 | --kallisto)
274 | k=1
275 | shift
276 | ;;
277 | --salmonreads)
278 | s=1
279 | shift
280 | ;;
281 | *)
282 | echo "ERROR: unknown parameter $1"
283 | usage_quant
284 | exit 1
285 | ;;
286 | esac
287 | done
288 |
289 | kallisto=${k-0}
290 | salmonreads=${s-0}
291 |
292 | if [[ "$kallisto" -eq 0 ]] && [[ "$salmonreads" -eq 0 ]]; then
293 |
294 | $scriptdir/run_quant.sh $transcripts $genos $fq1 $fq2 $outprefix $threads
295 |
296 | elif [[ "$kallisto" -eq 1 ]] && [[ "$salmonreads" -eq 0 ]]; then
297 |
298 | $scriptdir/run_quantkallisto.sh $transcripts $genos $fq1 $fq2 $outprefix $threads
299 |
300 | elif [[ "$kallisto" -eq 0 ]] && [[ "$salmonreads" -eq 1 ]]; then
301 |
302 | $scriptdir/run_quantSalmonReads.sh $transcripts $genos $fq1 $fq2 $outprefix $threads
303 |
304 | fi
305 | }
306 |
307 | run_bam2fq() {
308 | if [[ "$#" -eq 0 ]]; then
309 | usage_bam2fq
310 | exit 1
311 | fi
312 |
313 | while [[ "$#" -gt 0 ]]; do
314 | case "$1" in
315 | -h|--help)
316 | usage_bam2fq
317 | exit
318 | ;;
319 | -b|--bam)
320 | bam="$2"
321 | shift 2
322 | ;;
323 | -m|--mhc)
324 | if [[ -n "$2" ]] && [[ "$2" != -* ]]; then
325 | mhc="$2"
326 | shift 2
327 | else
328 | echo "ERROR: missing MHC coordinates."
329 | exit 1
330 | fi
331 | ;;
332 | -o|--out)
333 | out="$2"
334 | shift 2
335 | ;;
336 | --sort)
337 | s=1
338 | shift
339 | ;;
340 | *)
341 | echo "ERROR: unknown parameter $1"
342 | usage_bam2fq
343 | exit 1
344 | ;;
345 | esac
346 | done
347 |
348 | sort=${s-0}
349 |
350 | if [[ -n "$mhc" ]]; then
351 | $scriptdir/run_bam2fq_mhc.sh $bam $mhc $out $sort
352 | else
353 | $scriptdir/run_bam2fq.sh $bam $out $sort
354 | fi
355 |
356 | }
357 |
358 | # main
359 | if [[ "$#" -eq 0 ]]; then
360 | usage
361 | exit 1
362 | fi
363 |
364 | while [[ $# -gt 0 ]]
365 | do
366 | case "$1" in
367 | -h|--help)
368 | usage
369 | exit
370 | ;;
371 |
372 | prepare-ref)
373 | shift
374 | run_prepref "$@"
375 | break
376 | ;;
377 |
378 | index)
379 | shift
380 | run_index "$@"
381 | break
382 | ;;
383 |
384 | genotype)
385 | shift
386 | run_genotype "$@"
387 | break
388 | ;;
389 |
390 | quant)
391 | shift
392 | run_quant "$@"
393 | break
394 | ;;
395 | bam2fq)
396 | shift
397 | run_bam2fq "$@"
398 | break
399 | ;;
400 | *)
401 | echo "ERROR: unknown parameter $1"
402 | usage
403 | exit 1
404 | ;;
405 | esac
406 | done
407 |
--------------------------------------------------------------------------------
/license.txt:
--------------------------------------------------------------------------------
1 | ### kallisto ##################################################################
2 |
3 | BSD 2-Clause License
4 |
5 | Copyright (c) 2017, Nicolas Bray, Harold Pimentel, Páll Melsted and Lior
6 | Pachter All rights reserved.
7 |
8 | Redistribution and use in source and binary forms, with or without
9 | modification, are permitted provided that the following conditions are met:
10 |
11 | * Redistributions of source code must retain the above copyright notice, this
12 | list of conditions and the following disclaimer.
13 |
14 | * Redistributions in binary form must reproduce the above copyright notice,
15 | this list of conditions and the following disclaimer in the documentation
16 | and/or other materials provided with the distribution.
17 |
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 |
29 |
30 | ### Salmon ###################################################################
31 |
32 | GNU GENERAL PUBLIC LICENSE
33 | Version 3, 29 June 2007
34 |
35 | Copyright (C) 2007 Free Software Foundation, Inc.
36 | Everyone is permitted to copy and distribute verbatim copies
37 | of this license document, but changing it is not allowed.
38 |
39 | Preamble
40 |
41 | The GNU General Public License is a free, copyleft license for
42 | software and other kinds of works.
43 |
44 | The licenses for most software and other practical works are designed
45 | to take away your freedom to share and change the works. By contrast,
46 | the GNU General Public License is intended to guarantee your freedom to
47 | share and change all versions of a program--to make sure it remains free
48 | software for all its users. We, the Free Software Foundation, use the
49 | GNU General Public License for most of our software; it applies also to
50 | any other work released this way by its authors. You can apply it to
51 | your programs, too.
52 |
53 | When we speak of free software, we are referring to freedom, not
54 | price. Our General Public Licenses are designed to make sure that you
55 | have the freedom to distribute copies of free software (and charge for
56 | them if you wish), that you receive source code or can get it if you
57 | want it, that you can change the software or use pieces of it in new
58 | free programs, and that you know you can do these things.
59 |
60 | To protect your rights, we need to prevent others from denying you
61 | these rights or asking you to surrender the rights. Therefore, you have
62 | certain responsibilities if you distribute copies of the software, or if
63 | you modify it: responsibilities to respect the freedom of others.
64 |
65 | For example, if you distribute copies of such a program, whether
66 | gratis or for a fee, you must pass on to the recipients the same
67 | freedoms that you received. You must make sure that they, too, receive
68 | or can get the source code. And you must show them these terms so they
69 | know their rights.
70 |
71 | Developers that use the GNU GPL protect your rights with two steps:
72 | (1) assert copyright on the software, and (2) offer you this License
73 | giving you legal permission to copy, distribute and/or modify it.
74 |
75 | For the developers' and authors' protection, the GPL clearly explains
76 | that there is no warranty for this free software. For both users' and
77 | authors' sake, the GPL requires that modified versions be marked as
78 | changed, so that their problems will not be attributed erroneously to
79 | authors of previous versions.
80 |
81 | Some devices are designed to deny users access to install or run
82 | modified versions of the software inside them, although the manufacturer
83 | can do so. This is fundamentally incompatible with the aim of
84 | protecting users' freedom to change the software. The systematic
85 | pattern of such abuse occurs in the area of products for individuals to
86 | use, which is precisely where it is most unacceptable. Therefore, we
87 | have designed this version of the GPL to prohibit the practice for those
88 | products. If such problems arise substantially in other domains, we
89 | stand ready to extend this provision to those domains in future versions
90 | of the GPL, as needed to protect the freedom of users.
91 |
92 | Finally, every program is threatened constantly by software patents.
93 | States should not allow patents to restrict development and use of
94 | software on general-purpose computers, but in those that do, we wish to
95 | avoid the special danger that patents applied to a free program could
96 | make it effectively proprietary. To prevent this, the GPL assures that
97 | patents cannot be used to render the program non-free.
98 |
99 | The precise terms and conditions for copying, distribution and
100 | modification follow.
101 |
102 | TERMS AND CONDITIONS
103 |
104 | 0. Definitions.
105 |
106 | "This License" refers to version 3 of the GNU General Public License.
107 |
108 | "Copyright" also means copyright-like laws that apply to other kinds of
109 | works, such as semiconductor masks.
110 |
111 | "The Program" refers to any copyrightable work licensed under this
112 | License. Each licensee is addressed as "you". "Licensees" and
113 | "recipients" may be individuals or organizations.
114 |
115 | To "modify" a work means to copy from or adapt all or part of the work
116 | in a fashion requiring copyright permission, other than the making of an
117 | exact copy. The resulting work is called a "modified version" of the
118 | earlier work or a work "based on" the earlier work.
119 |
120 | A "covered work" means either the unmodified Program or a work based
121 | on the Program.
122 |
123 | To "propagate" a work means to do anything with it that, without
124 | permission, would make you directly or secondarily liable for
125 | infringement under applicable copyright law, except executing it on a
126 | computer or modifying a private copy. Propagation includes copying,
127 | distribution (with or without modification), making available to the
128 | public, and in some countries other activities as well.
129 |
130 | To "convey" a work means any kind of propagation that enables other
131 | parties to make or receive copies. Mere interaction with a user through
132 | a computer network, with no transfer of a copy, is not conveying.
133 |
134 | An interactive user interface displays "Appropriate Legal Notices"
135 | to the extent that it includes a convenient and prominently visible
136 | feature that (1) displays an appropriate copyright notice, and (2)
137 | tells the user that there is no warranty for the work (except to the
138 | extent that warranties are provided), that licensees may convey the
139 | work under this License, and how to view a copy of this License. If
140 | the interface presents a list of user commands or options, such as a
141 | menu, a prominent item in the list meets this criterion.
142 |
143 | 1. Source Code.
144 |
145 | The "source code" for a work means the preferred form of the work
146 | for making modifications to it. "Object code" means any non-source
147 | form of a work.
148 |
149 | A "Standard Interface" means an interface that either is an official
150 | standard defined by a recognized standards body, or, in the case of
151 | interfaces specified for a particular programming language, one that
152 | is widely used among developers working in that language.
153 |
154 | The "System Libraries" of an executable work include anything, other
155 | than the work as a whole, that (a) is included in the normal form of
156 | packaging a Major Component, but which is not part of that Major
157 | Component, and (b) serves only to enable use of the work with that
158 | Major Component, or to implement a Standard Interface for which an
159 | implementation is available to the public in source code form. A
160 | "Major Component", in this context, means a major essential component
161 | (kernel, window system, and so on) of the specific operating system
162 | (if any) on which the executable work runs, or a compiler used to
163 | produce the work, or an object code interpreter used to run it.
164 |
165 | The "Corresponding Source" for a work in object code form means all
166 | the source code needed to generate, install, and (for an executable
167 | work) run the object code and to modify the work, including scripts to
168 | control those activities. However, it does not include the work's
169 | System Libraries, or general-purpose tools or generally available free
170 | programs which are used unmodified in performing those activities but
171 | which are not part of the work. For example, Corresponding Source
172 | includes interface definition files associated with source files for
173 | the work, and the source code for shared libraries and dynamically
174 | linked subprograms that the work is specifically designed to require,
175 | such as by intimate data communication or control flow between those
176 | subprograms and other parts of the work.
177 |
178 | The Corresponding Source need not include anything that users
179 | can regenerate automatically from other parts of the Corresponding
180 | Source.
181 |
182 | The Corresponding Source for a work in source code form is that
183 | same work.
184 |
185 | 2. Basic Permissions.
186 |
187 | All rights granted under this License are granted for the term of
188 | copyright on the Program, and are irrevocable provided the stated
189 | conditions are met. This License explicitly affirms your unlimited
190 | permission to run the unmodified Program. The output from running a
191 | covered work is covered by this License only if the output, given its
192 | content, constitutes a covered work. This License acknowledges your
193 | rights of fair use or other equivalent, as provided by copyright law.
194 |
195 | You may make, run and propagate covered works that you do not
196 | convey, without conditions so long as your license otherwise remains
197 | in force. You may convey covered works to others for the sole purpose
198 | of having them make modifications exclusively for you, or provide you
199 | with facilities for running those works, provided that you comply with
200 | the terms of this License in conveying all material for which you do
201 | not control copyright. Those thus making or running the covered works
202 | for you must do so exclusively on your behalf, under your direction
203 | and control, on terms that prohibit them from making any copies of
204 | your copyrighted material outside their relationship with you.
205 |
206 | Conveying under any other circumstances is permitted solely under
207 | the conditions stated below. Sublicensing is not allowed; section 10
208 | makes it unnecessary.
209 |
210 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
211 |
212 | No covered work shall be deemed part of an effective technological
213 | measure under any applicable law fulfilling obligations under article
214 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
215 | similar laws prohibiting or restricting circumvention of such
216 | measures.
217 |
218 | When you convey a covered work, you waive any legal power to forbid
219 | circumvention of technological measures to the extent such circumvention
220 | is effected by exercising rights under this License with respect to
221 | the covered work, and you disclaim any intention to limit operation or
222 | modification of the work as a means of enforcing, against the work's
223 | users, your or third parties' legal rights to forbid circumvention of
224 | technological measures.
225 |
226 | 4. Conveying Verbatim Copies.
227 |
228 | You may convey verbatim copies of the Program's source code as you
229 | receive it, in any medium, provided that you conspicuously and
230 | appropriately publish on each copy an appropriate copyright notice;
231 | keep intact all notices stating that this License and any
232 | non-permissive terms added in accord with section 7 apply to the code;
233 | keep intact all notices of the absence of any warranty; and give all
234 | recipients a copy of this License along with the Program.
235 |
236 | You may charge any price or no price for each copy that you convey,
237 | and you may offer support or warranty protection for a fee.
238 |
239 | 5. Conveying Modified Source Versions.
240 |
241 | You may convey a work based on the Program, or the modifications to
242 | produce it from the Program, in the form of source code under the
243 | terms of section 4, provided that you also meet all of these conditions:
244 |
245 | a) The work must carry prominent notices stating that you modified
246 | it, and giving a relevant date.
247 |
248 | b) The work must carry prominent notices stating that it is
249 | released under this License and any conditions added under section
250 | 7. This requirement modifies the requirement in section 4 to
251 | "keep intact all notices".
252 |
253 | c) You must license the entire work, as a whole, under this
254 | License to anyone who comes into possession of a copy. This
255 | License will therefore apply, along with any applicable section 7
256 | additional terms, to the whole of the work, and all its parts,
257 | regardless of how they are packaged. This License gives no
258 | permission to license the work in any other way, but it does not
259 | invalidate such permission if you have separately received it.
260 |
261 | d) If the work has interactive user interfaces, each must display
262 | Appropriate Legal Notices; however, if the Program has interactive
263 | interfaces that do not display Appropriate Legal Notices, your
264 | work need not make them do so.
265 |
266 | A compilation of a covered work with other separate and independent
267 | works, which are not by their nature extensions of the covered work,
268 | and which are not combined with it such as to form a larger program,
269 | in or on a volume of a storage or distribution medium, is called an
270 | "aggregate" if the compilation and its resulting copyright are not
271 | used to limit the access or legal rights of the compilation's users
272 | beyond what the individual works permit. Inclusion of a covered work
273 | in an aggregate does not cause this License to apply to the other
274 | parts of the aggregate.
275 |
276 | 6. Conveying Non-Source Forms.
277 |
278 | You may convey a covered work in object code form under the terms
279 | of sections 4 and 5, provided that you also convey the
280 | machine-readable Corresponding Source under the terms of this License,
281 | in one of these ways:
282 |
283 | a) Convey the object code in, or embodied in, a physical product
284 | (including a physical distribution medium), accompanied by the
285 | Corresponding Source fixed on a durable physical medium
286 | customarily used for software interchange.
287 |
288 | b) Convey the object code in, or embodied in, a physical product
289 | (including a physical distribution medium), accompanied by a
290 | written offer, valid for at least three years and valid for as
291 | long as you offer spare parts or customer support for that product
292 | model, to give anyone who possesses the object code either (1) a
293 | copy of the Corresponding Source for all the software in the
294 | product that is covered by this License, on a durable physical
295 | medium customarily used for software interchange, for a price no
296 | more than your reasonable cost of physically performing this
297 | conveying of source, or (2) access to copy the
298 | Corresponding Source from a network server at no charge.
299 |
300 | c) Convey individual copies of the object code with a copy of the
301 | written offer to provide the Corresponding Source. This
302 | alternative is allowed only occasionally and noncommercially, and
303 | only if you received the object code with such an offer, in accord
304 | with subsection 6b.
305 |
306 | d) Convey the object code by offering access from a designated
307 | place (gratis or for a charge), and offer equivalent access to the
308 | Corresponding Source in the same way through the same place at no
309 | further charge. You need not require recipients to copy the
310 | Corresponding Source along with the object code. If the place to
311 | copy the object code is a network server, the Corresponding Source
312 | may be on a different server (operated by you or a third party)
313 | that supports equivalent copying facilities, provided you maintain
314 | clear directions next to the object code saying where to find the
315 | Corresponding Source. Regardless of what server hosts the
316 | Corresponding Source, you remain obligated to ensure that it is
317 | available for as long as needed to satisfy these requirements.
318 |
319 | e) Convey the object code using peer-to-peer transmission, provided
320 | you inform other peers where the object code and Corresponding
321 | Source of the work are being offered to the general public at no
322 | charge under subsection 6d.
323 |
324 | A separable portion of the object code, whose source code is excluded
325 | from the Corresponding Source as a System Library, need not be
326 | included in conveying the object code work.
327 |
328 | A "User Product" is either (1) a "consumer product", which means any
329 | tangible personal property which is normally used for personal, family,
330 | or household purposes, or (2) anything designed or sold for incorporation
331 | into a dwelling. In determining whether a product is a consumer product,
332 | doubtful cases shall be resolved in favor of coverage. For a particular
333 | product received by a particular user, "normally used" refers to a
334 | typical or common use of that class of product, regardless of the status
335 | of the particular user or of the way in which the particular user
336 | actually uses, or expects or is expected to use, the product. A product
337 | is a consumer product regardless of whether the product has substantial
338 | commercial, industrial or non-consumer uses, unless such uses represent
339 | the only significant mode of use of the product.
340 |
341 | "Installation Information" for a User Product means any methods,
342 | procedures, authorization keys, or other information required to install
343 | and execute modified versions of a covered work in that User Product from
344 | a modified version of its Corresponding Source. The information must
345 | suffice to ensure that the continued functioning of the modified object
346 | code is in no case prevented or interfered with solely because
347 | modification has been made.
348 |
349 | If you convey an object code work under this section in, or with, or
350 | specifically for use in, a User Product, and the conveying occurs as
351 | part of a transaction in which the right of possession and use of the
352 | User Product is transferred to the recipient in perpetuity or for a
353 | fixed term (regardless of how the transaction is characterized), the
354 | Corresponding Source conveyed under this section must be accompanied
355 | by the Installation Information. But this requirement does not apply
356 | if neither you nor any third party retains the ability to install
357 | modified object code on the User Product (for example, the work has
358 | been installed in ROM).
359 |
360 | The requirement to provide Installation Information does not include a
361 | requirement to continue to provide support service, warranty, or updates
362 | for a work that has been modified or installed by the recipient, or for
363 | the User Product in which it has been modified or installed. Access to a
364 | network may be denied when the modification itself materially and
365 | adversely affects the operation of the network or violates the rules and
366 | protocols for communication across the network.
367 |
368 | Corresponding Source conveyed, and Installation Information provided,
369 | in accord with this section must be in a format that is publicly
370 | documented (and with an implementation available to the public in
371 | source code form), and must require no special password or key for
372 | unpacking, reading or copying.
373 |
374 | 7. Additional Terms.
375 |
376 | "Additional permissions" are terms that supplement the terms of this
377 | License by making exceptions from one or more of its conditions.
378 | Additional permissions that are applicable to the entire Program shall
379 | be treated as though they were included in this License, to the extent
380 | that they are valid under applicable law. If additional permissions
381 | apply only to part of the Program, that part may be used separately
382 | under those permissions, but the entire Program remains governed by
383 | this License without regard to the additional permissions.
384 |
385 | When you convey a copy of a covered work, you may at your option
386 | remove any additional permissions from that copy, or from any part of
387 | it. (Additional permissions may be written to require their own
388 | removal in certain cases when you modify the work.) You may place
389 | additional permissions on material, added by you to a covered work,
390 | for which you have or can give appropriate copyright permission.
391 |
392 | Notwithstanding any other provision of this License, for material you
393 | add to a covered work, you may (if authorized by the copyright holders of
394 | that material) supplement the terms of this License with terms:
395 |
396 | a) Disclaiming warranty or limiting liability differently from the
397 | terms of sections 15 and 16 of this License; or
398 |
399 | b) Requiring preservation of specified reasonable legal notices or
400 | author attributions in that material or in the Appropriate Legal
401 | Notices displayed by works containing it; or
402 |
403 | c) Prohibiting misrepresentation of the origin of that material, or
404 | requiring that modified versions of such material be marked in
405 | reasonable ways as different from the original version; or
406 |
407 | d) Limiting the use for publicity purposes of names of licensors or
408 | authors of the material; or
409 |
410 | e) Declining to grant rights under trademark law for use of some
411 | trade names, trademarks, or service marks; or
412 |
413 | f) Requiring indemnification of licensors and authors of that
414 | material by anyone who conveys the material (or modified versions of
415 | it) with contractual assumptions of liability to the recipient, for
416 | any liability that these contractual assumptions directly impose on
417 | those licensors and authors.
418 |
419 | All other non-permissive additional terms are considered "further
420 | restrictions" within the meaning of section 10. If the Program as you
421 | received it, or any part of it, contains a notice stating that it is
422 | governed by this License along with a term that is a further
423 | restriction, you may remove that term. If a license document contains
424 | a further restriction but permits relicensing or conveying under this
425 | License, you may add to a covered work material governed by the terms
426 | of that license document, provided that the further restriction does
427 | not survive such relicensing or conveying.
428 |
429 | If you add terms to a covered work in accord with this section, you
430 | must place, in the relevant source files, a statement of the
431 | additional terms that apply to those files, or a notice indicating
432 | where to find the applicable terms.
433 |
434 | Additional terms, permissive or non-permissive, may be stated in the
435 | form of a separately written license, or stated as exceptions;
436 | the above requirements apply either way.
437 |
438 | 8. Termination.
439 |
440 | You may not propagate or modify a covered work except as expressly
441 | provided under this License. Any attempt otherwise to propagate or
442 | modify it is void, and will automatically terminate your rights under
443 | this License (including any patent licenses granted under the third
444 | paragraph of section 11).
445 |
446 | However, if you cease all violation of this License, then your
447 | license from a particular copyright holder is reinstated (a)
448 | provisionally, unless and until the copyright holder explicitly and
449 | finally terminates your license, and (b) permanently, if the copyright
450 | holder fails to notify you of the violation by some reasonable means
451 | prior to 60 days after the cessation.
452 |
453 | Moreover, your license from a particular copyright holder is
454 | reinstated permanently if the copyright holder notifies you of the
455 | violation by some reasonable means, this is the first time you have
456 | received notice of violation of this License (for any work) from that
457 | copyright holder, and you cure the violation prior to 30 days after
458 | your receipt of the notice.
459 |
460 | Termination of your rights under this section does not terminate the
461 | licenses of parties who have received copies or rights from you under
462 | this License. If your rights have been terminated and not permanently
463 | reinstated, you do not qualify to receive new licenses for the same
464 | material under section 10.
465 |
466 | 9. Acceptance Not Required for Having Copies.
467 |
468 | You are not required to accept this License in order to receive or
469 | run a copy of the Program. Ancillary propagation of a covered work
470 | occurring solely as a consequence of using peer-to-peer transmission
471 | to receive a copy likewise does not require acceptance. However,
472 | nothing other than this License grants you permission to propagate or
473 | modify any covered work. These actions infringe copyright if you do
474 | not accept this License. Therefore, by modifying or propagating a
475 | covered work, you indicate your acceptance of this License to do so.
476 |
477 | 10. Automatic Licensing of Downstream Recipients.
478 |
479 | Each time you convey a covered work, the recipient automatically
480 | receives a license from the original licensors, to run, modify and
481 | propagate that work, subject to this License. You are not responsible
482 | for enforcing compliance by third parties with this License.
483 |
484 | An "entity transaction" is a transaction transferring control of an
485 | organization, or substantially all assets of one, or subdividing an
486 | organization, or merging organizations. If propagation of a covered
487 | work results from an entity transaction, each party to that
488 | transaction who receives a copy of the work also receives whatever
489 | licenses to the work the party's predecessor in interest had or could
490 | give under the previous paragraph, plus a right to possession of the
491 | Corresponding Source of the work from the predecessor in interest, if
492 | the predecessor has it or can get it with reasonable efforts.
493 |
494 | You may not impose any further restrictions on the exercise of the
495 | rights granted or affirmed under this License. For example, you may
496 | not impose a license fee, royalty, or other charge for exercise of
497 | rights granted under this License, and you may not initiate litigation
498 | (including a cross-claim or counterclaim in a lawsuit) alleging that
499 | any patent claim is infringed by making, using, selling, offering for
500 | sale, or importing the Program or any portion of it.
501 |
502 | 11. Patents.
503 |
504 | A "contributor" is a copyright holder who authorizes use under this
505 | License of the Program or a work on which the Program is based. The
506 | work thus licensed is called the contributor's "contributor version".
507 |
508 | A contributor's "essential patent claims" are all patent claims
509 | owned or controlled by the contributor, whether already acquired or
510 | hereafter acquired, that would be infringed by some manner, permitted
511 | by this License, of making, using, or selling its contributor version,
512 | but do not include claims that would be infringed only as a
513 | consequence of further modification of the contributor version. For
514 | purposes of this definition, "control" includes the right to grant
515 | patent sublicenses in a manner consistent with the requirements of
516 | this License.
517 |
518 | Each contributor grants you a non-exclusive, worldwide, royalty-free
519 | patent license under the contributor's essential patent claims, to
520 | make, use, sell, offer for sale, import and otherwise run, modify and
521 | propagate the contents of its contributor version.
522 |
523 | In the following three paragraphs, a "patent license" is any express
524 | agreement or commitment, however denominated, not to enforce a patent
525 | (such as an express permission to practice a patent or covenant not to
526 | sue for patent infringement). To "grant" such a patent license to a
527 | party means to make such an agreement or commitment not to enforce a
528 | patent against the party.
529 |
530 | If you convey a covered work, knowingly relying on a patent license,
531 | and the Corresponding Source of the work is not available for anyone
532 | to copy, free of charge and under the terms of this License, through a
533 | publicly available network server or other readily accessible means,
534 | then you must either (1) cause the Corresponding Source to be so
535 | available, or (2) arrange to deprive yourself of the benefit of the
536 | patent license for this particular work, or (3) arrange, in a manner
537 | consistent with the requirements of this License, to extend the patent
538 | license to downstream recipients. "Knowingly relying" means you have
539 | actual knowledge that, but for the patent license, your conveying the
540 | covered work in a country, or your recipient's use of the covered work
541 | in a country, would infringe one or more identifiable patents in that
542 | country that you have reason to believe are valid.
543 |
544 | If, pursuant to or in connection with a single transaction or
545 | arrangement, you convey, or propagate by procuring conveyance of, a
546 | covered work, and grant a patent license to some of the parties
547 | receiving the covered work authorizing them to use, propagate, modify
548 | or convey a specific copy of the covered work, then the patent license
549 | you grant is automatically extended to all recipients of the covered
550 | work and works based on it.
551 |
552 | A patent license is "discriminatory" if it does not include within
553 | the scope of its coverage, prohibits the exercise of, or is
554 | conditioned on the non-exercise of one or more of the rights that are
555 | specifically granted under this License. You may not convey a covered
556 | work if you are a party to an arrangement with a third party that is
557 | in the business of distributing software, under which you make payment
558 | to the third party based on the extent of your activity of conveying
559 | the work, and under which the third party grants, to any of the
560 | parties who would receive the covered work from you, a discriminatory
561 | patent license (a) in connection with copies of the covered work
562 | conveyed by you (or copies made from those copies), or (b) primarily
563 | for and in connection with specific products or compilations that
564 | contain the covered work, unless you entered into that arrangement,
565 | or that patent license was granted, prior to 28 March 2007.
566 |
567 | Nothing in this License shall be construed as excluding or limiting
568 | any implied license or other defenses to infringement that may
569 | otherwise be available to you under applicable patent law.
570 |
571 | 12. No Surrender of Others' Freedom.
572 |
573 | If conditions are imposed on you (whether by court order, agreement or
574 | otherwise) that contradict the conditions of this License, they do not
575 | excuse you from the conditions of this License. If you cannot convey a
576 | covered work so as to satisfy simultaneously your obligations under this
577 | License and any other pertinent obligations, then as a consequence you may
578 | not convey it at all. For example, if you agree to terms that obligate you
579 | to collect a royalty for further conveying from those to whom you convey
580 | the Program, the only way you could satisfy both those terms and this
581 | License would be to refrain entirely from conveying the Program.
582 |
583 | 13. Use with the GNU Affero General Public License.
584 |
585 | Notwithstanding any other provision of this License, you have
586 | permission to link or combine any covered work with a work licensed
587 | under version 3 of the GNU Affero General Public License into a single
588 | combined work, and to convey the resulting work. The terms of this
589 | License will continue to apply to the part which is the covered work,
590 | but the special requirements of the GNU Affero General Public License,
591 | section 13, concerning interaction through a network will apply to the
592 | combination as such.
593 |
594 | 14. Revised Versions of this License.
595 |
596 | The Free Software Foundation may publish revised and/or new versions of
597 | the GNU General Public License from time to time. Such new versions will
598 | be similar in spirit to the present version, but may differ in detail to
599 | address new problems or concerns.
600 |
601 | Each version is given a distinguishing version number. If the
602 | Program specifies that a certain numbered version of the GNU General
603 | Public License "or any later version" applies to it, you have the
604 | option of following the terms and conditions either of that numbered
605 | version or of any later version published by the Free Software
606 | Foundation. If the Program does not specify a version number of the
607 | GNU General Public License, you may choose any version ever published
608 | by the Free Software Foundation.
609 |
610 | If the Program specifies that a proxy can decide which future
611 | versions of the GNU General Public License can be used, that proxy's
612 | public statement of acceptance of a version permanently authorizes you
613 | to choose that version for the Program.
614 |
615 | Later license versions may give you additional or different
616 | permissions. However, no additional obligations are imposed on any
617 | author or copyright holder as a result of your choosing to follow a
618 | later version.
619 |
620 | 15. Disclaimer of Warranty.
621 |
622 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
623 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
624 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
625 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
626 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
627 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
628 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
629 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
630 |
631 | 16. Limitation of Liability.
632 |
633 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
634 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
635 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
636 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
637 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
638 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
639 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
640 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
641 | SUCH DAMAGES.
642 |
643 | 17. Interpretation of Sections 15 and 16.
644 |
645 | If the disclaimer of warranty and limitation of liability provided
646 | above cannot be given local legal effect according to their terms,
647 | reviewing courts shall apply local law that most closely approximates
648 | an absolute waiver of all civil liability in connection with the
649 | Program, unless a warranty or assumption of liability accompanies a
650 | copy of the Program in return for a fee.
651 |
652 | END OF TERMS AND CONDITIONS
653 |
654 | How to Apply These Terms to Your New Programs
655 |
656 | If you develop a new program, and you want it to be of the greatest
657 | possible use to the public, the best way to achieve this is to make it
658 | free software which everyone can redistribute and change under these terms.
659 |
660 | To do so, attach the following notices to the program. It is safest
661 | to attach them to the start of each source file to most effectively
662 | state the exclusion of warranty; and each file should have at least
663 | the "copyright" line and a pointer to where the full notice is found.
664 |
665 |
666 | Copyright (C)
667 |
668 | This program is free software: you can redistribute it and/or modify
669 | it under the terms of the GNU General Public License as published by
670 | the Free Software Foundation, either version 3 of the License, or
671 | (at your option) any later version.
672 |
673 | This program is distributed in the hope that it will be useful,
674 | but WITHOUT ANY WARRANTY; without even the implied warranty of
675 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
676 | GNU General Public License for more details.
677 |
678 | You should have received a copy of the GNU General Public License
679 | along with this program. If not, see .
680 |
681 | Also add information on how to contact you by electronic and paper mail.
682 |
683 | If the program does terminal interaction, make it output a short
684 | notice like this when it starts in an interactive mode:
685 |
686 | Copyright (C)
687 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
688 | This is free software, and you are welcome to redistribute it
689 | under certain conditions; type `show c' for details.
690 |
691 | The hypothetical commands `show w' and `show c' should show the appropriate
692 | parts of the General Public License. Of course, your program's commands
693 | might be different; for a GUI interface, you would use an "about box".
694 |
695 | You should also get your employer (if you work as a programmer) or school,
696 | if any, to sign a "copyright disclaimer" for the program, if necessary.
697 | For more information on this, and how to apply and follow the GNU GPL, see
698 | .
699 |
700 | The GNU General Public License does not permit incorporating your program
701 | into proprietary programs. If your program is a subroutine library, you
702 | may consider it more useful to permit linking proprietary applications with
703 | the library. If this is what you want to do, use the GNU Lesser General
704 | Public License instead of this License. But first, please read
705 | .
706 |
707 | ### STAR ######################################################################
708 |
709 | MIT License
710 |
711 | Copyright (c) 2019 Alexander Dobin
712 |
713 | Permission is hereby granted, free of charge, to any person obtaining a copy
714 | of this software and associated documentation files (the "Software"), to deal
715 | in the Software without restriction, including without limitation the rights
716 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
717 | copies of the Software, and to permit persons to whom the Software is
718 | furnished to do so, subject to the following conditions:
719 |
720 | The above copyright notice and this permission notice shall be included in all
721 | copies or substantial portions of the Software.
722 |
723 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
724 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
725 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
726 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
727 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
728 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
729 | SOFTWARE.
730 |
--------------------------------------------------------------------------------