├── .gitignore ├── rtg ├── RTG.jar ├── LICENSE.txt ├── rtg.cfg └── rtg ├── k8-0.2.2.tar.bz2 ├── .gitmodules ├── Makefile ├── README.md └── run-eval /.gitignore: -------------------------------------------------------------------------------- 1 | .*.swp 2 | *.o 3 | -------------------------------------------------------------------------------- /rtg/RTG.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lh3/rtgeval/HEAD/rtg/RTG.jar -------------------------------------------------------------------------------- /k8-0.2.2.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lh3/rtgeval/HEAD/k8-0.2.2.tar.bz2 -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "htsbox"] 2 | path = htsbox 3 | url = https://github.com/lh3/htsbox.git 4 | [submodule "bgt"] 5 | path = bgt 6 | url = https://github.com/lh3/bgt.git 7 | [submodule "hapdip"] 8 | path = hapdip 9 | url = https://github.com/lh3/hapdip 10 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SUBDIRS=htsbox bgt 2 | 3 | all:rtgeval.kit/htsbox rtgeval.kit/bgt rtgeval.kit/hapdip.js rtgeval.kit/k8 \ 4 | rtgeval.kit/RTG.jar rtgeval.kit/run-eval rtgeval.kit/run-flt \ 5 | rtgeval.kit/RTG-LICENSE.txt 6 | 7 | rtgeval.kit: 8 | mkdir -p $@ 9 | 10 | all-recur clean-recur: 11 | @target=`echo $@ | sed s/-recur//`; \ 12 | wdir=`pwd`; \ 13 | list='$(SUBDIRS)'; for subdir in $$list; do \ 14 | cd $$subdir; \ 15 | $(MAKE) $$target || exit 1; \ 16 | cd $$wdir; \ 17 | done; 18 | 19 | prepare:all-recur rtgeval.kit 20 | 21 | rtgeval.kit/htsbox:prepare 22 | cp htsbox/htsbox $@; strip $@ 23 | 24 | rtgeval.kit/bgt:prepare 25 | cp bgt/bgt $@; strip $@ 26 | 27 | rtgeval.kit/hapdip.js:prepare 28 | cp hapdip/hapdip.js $@ 29 | 30 | rtgeval.kit/k8:k8-0.2.2.tar.bz2 rtgeval.kit 31 | (cd rtgeval.kit; tar -jxf ../$< k8-`uname -s|tr [A-Z] [a-z]` && mv k8-`uname -s|tr [A-Z] [a-z]` k8) 32 | 33 | rtgeval.kit/RTG.jar:prepare 34 | cp rtg/RTG.jar rtg/rtg rtg/rtg.cfg rtgeval.kit 35 | 36 | rtgeval.kit/run-eval:prepare 37 | cp run-eval $@ 38 | 39 | rtgeval.kit/run-flt:prepare 40 | cp run-flt $@ 41 | 42 | rtgeval.kit/RTG-LICENSE.txt:prepare 43 | cp rtg/LICENSE.txt $@ 44 | 45 | clean:clean-recur 46 | rm -fr rtgeval.kit 47 | 48 | .PHONY: all all-recur clean-recur prepare clean 49 | -------------------------------------------------------------------------------- /rtg/LICENSE.txt: -------------------------------------------------------------------------------- 1 | The Simplified BSD License 2 | 3 | Copyright (c) 2015, Real Time Genomics Limited 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are 8 | met: 9 | 10 | 1. Redistributions of source code must retain the above copyright 11 | notice, this list of conditions and the following disclaimer. 12 | 13 | 2. Redistributions in binary form must reproduce the above copyright 14 | notice, this list of conditions and the following disclaimer in the 15 | documentation and/or other materials provided with the 16 | distribution. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Getting Started 2 | 3 | ```sh 4 | wget -O- https://github.com/lh3/rtgeval/releases/download/v0.1/rtgevalkit-0.1_x64-linux.tar.bz2 | tar -jxf - 5 | [ -d ref.sdf ] || rtgeval.kit/rtg format -o ref.sdf ref.fa # create RTG index 6 | rtgeval.kit/run-eval -s ref.sdf -b confident.bed truth.vcf.gz test.vcf.gz 7 | cat test.re.eval 8 | ``` 9 | where `confident.bed` gives confident regions, `truth.vcf.gz` the truth VCF and 10 | `test.vcf.gz` the VCF to evaluate. `test.re.eval` gives the true positives 11 | (TP), false negatives (FN) and false positives (FP) measured in three ways, 12 | which will be explained below. 13 | 14 | For the CHM1-CHM13 benchmark data, it is recommended to invoke `rtgeval` with 15 | ```sh 16 | rtgeval.kit/run-eval -s ref.sdf -b conf.bed -l2 -L100 -h hrun.bed truth.vcf test.vcf 17 | ``` 18 | This ignores 1bp INDELs and homopolymer INDELs. 19 | 20 | ## Introduction 21 | 22 | This repo implements a wrapper for [RTG's vcfeval][vcfeval], a sophisticated 23 | open source variant comparison tool developed by [Realtime Genomics][rtg]. It 24 | simplifies the use of `vcfeval` and potentially helps to get consistent results 25 | given VCFs produced by different variant callers. 26 | 27 | The wrapper calls [bgt][bgt] to decompose complex and multi-allelic variants to 28 | the smallest possible alleles, optionally filters each allele and regroups 29 | overlapping multi-alleles into one VCF line (apparently `rtg vcfeval` works 30 | better this way). The wrapper then calls [hapdip][hapdip] and `rtg` to perform 31 | three types of evaluations: 32 | 33 | 1. Positional. With this approach, a TN is a true variant that is within 10bp 34 | around a called variant; a FN is a true variant that is not within 10bp; a 35 | FP is a called variant that is not within 10bp around a true variant. FN 36 | and FP intervals can be found in `PREFIX.de-fnp.bed.gz`. 37 | 38 | 2. Allelic. The wrapper calls `rtg vcfeval --squash-ploidy`. For biallelic 39 | variants, this type of evaluation measures if an allele from one VCF is 40 | confirmed in the other VCF, disregarding genotypes. The behavior for 41 | multi-allelic variants is unclear. FN and FP calls are reported in 42 | `PREFIX.vea/f?.vcf.gz`. 43 | 44 | 3. Genotypic. The wrapper calls `rtg vcfeval`. Genotype errors are also 45 | counted. This is the most stringent type of evaluation. False calls are 46 | reported in `PREFIX.veg/f?.vcf.gz`. 47 | 48 | [rtg]: http://www.realtimegenomics.com 49 | [bgt]: https://github.com/lh3/bgt 50 | [vcfeval]: http://realtimegenomics.com/products/rtg-tools/ 51 | [hapdip]: https://github.com/lh3/hapdip 52 | -------------------------------------------------------------------------------- /rtg/rtg.cfg: -------------------------------------------------------------------------------- 1 | # Configuration file for RTG. 2 | 3 | # This file is automatically created upon first run, and may be edited 4 | # to allow some customization of default behaviour. In a 5 | # multi-machine environment, this file can be used to achieve 6 | # machine-specific configuration either by placing in /etc/rtg.cfg on 7 | # each machine or in the rtg installation directory values with the 8 | # name rtg.$(hostname -s).cfg, e.g. rtg.gsa6.cfg 9 | 10 | 11 | # The path to the java executable (need not be full path if in $PATH). 12 | # If unset, RTG will use the bundled JRE if present, otherwise 13 | # will expect java to be on $PATH 14 | # RTG_JAVA="java" 15 | 16 | # Amount of memory to allocate to RTG. Use G suffix for gigabytes. 17 | # If unset, allow RTG to use up to 90% of available RAM (see RTG_MEM_PCT) 18 | RTG_MEM="4G" 19 | # Note that memory allocation can also be overridden on a per-command basis, e.g: 20 | # $ rtg RTG_MEM=24G population ... 21 | 22 | # If RTG_MEM is unset, use this percentage of total RAM. 23 | # The default is 90 percent. 24 | # RTG_MEM_PCT=90 25 | 26 | 27 | # Attempt to send crash logs to Real Time Genomics, true to enable, false to disable 28 | RTG_TALKBACK=true 29 | 30 | # Enable simple usage logging, true to enable. Default is no logging. 31 | RTG_USAGE= 32 | 33 | # Server URL when usage logging to a server. Default is to use RTG hosted server. 34 | # RTG_USAGE_HOST= 35 | 36 | # If performing single-user file-based usage logging, this specifies the directory to log to. 37 | # RTG_USAGE_DIR= 38 | 39 | # List of optional fields to add to usage logging (when enabled). 40 | # If unset do not add any of these fields. (commandline may contain information 41 | # considered sensitive) 42 | # RTG_USAGE_OPTIONAL=username,hostname,commandline 43 | RTG_USAGE_OPTIONAL=username,hostname 44 | 45 | 46 | # Allows specification of the HTTP proxy to use for 47 | # talkback/usage, specified in host:port form. 48 | # If unset, assume no http proxy is required. 49 | # RTG_PROXY=http://my.proxy.host:3128/ 50 | 51 | 52 | # Directory in which to look for pipeline reference datasets. 53 | # If unset, uses the references subdirectory of this installation. 54 | # RTG_REFERENCES_DIR= 55 | 56 | # Directory in which to look for AVR models. 57 | # If unset, uses the models subdirectory of this installation. 58 | # RTG_MODELS_DIR= 59 | 60 | # Allows passing additional arguments passed to the JVM. e.g: 61 | # RTG_JAVA_OPTIONS="-Djava.io.tmpdir=XXYY -XX:+UseLargePages" 62 | 63 | 64 | # Set the number of threads to use when not otherwise specified via command line flags. 65 | # The default behavior is to allocate one thread per machine core. 66 | RTG_DEFAULT_THREADS=4 67 | 68 | 69 | -------------------------------------------------------------------------------- /run-eval: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | use Getopt::Std; 6 | 7 | my %opts = (); 8 | getopts('b:p:o:l:L:t:s:', \%opts); 9 | 10 | # check path 11 | my $exepath = $0 =~/^\S+\/[^\/\s]+/? $0 : &which($0); 12 | my $root = $0 =~/^(\S+)\/[^\/\s]+/? $1 : undef; 13 | $root = $exepath =~/^(\S+)\/[^\/\s]+/? $1 : undef if !defined($root); 14 | die "ERROR: failed to locate the root directory\n" if !defined($root); 15 | 16 | die("Usage: run-eval [options] 17 | Options: 18 | -o STR output prefix [auto] 19 | -b FILE confident regions in BED [null] 20 | -p FILE homopolymer positions generated by 'seqtk hrun' [null] 21 | -t FILE list of contig name and length [null] 22 | -l INT min INDEL length [0] 23 | -L INT max INDEL length [inf] 24 | -s DIR RTG's SDF reference directory (generated by 'rtg format') [null] 25 | ") if @ARGV < 2; 26 | 27 | # infer prefix 28 | my $prefix; 29 | if (defined $opts{o}) { 30 | $prefix = $opts{o}; 31 | } elsif ($ARGV[1] =~ /\.vcf(\.gz?)$/) { 32 | $prefix = $ARGV[1]; 33 | $prefix =~ s/\.vcf(\.gz?)$//; 34 | $prefix .= ".re"; 35 | } 36 | die "ERROR: failed to infer the prefix for output. Please specify -o.\n" unless defined($prefix); 37 | 38 | # test ##contig in VCF header 39 | my @ctg_truth = &test_contig($ARGV[0]); 40 | my @ctg_test = &test_contig($ARGV[1]); 41 | die "ERROR: failed to find ##contig lines in both VCFs. Please specify -t.\n" if (@ctg_truth == 0 && @ctg_test == 0); 42 | my $have_ctg = 0; 43 | if (@ctg_test == 0 && @ctg_truth != 0) { 44 | &print_ctg(\@ctg_truth, "$prefix.ctg"); 45 | $have_ctg = 1; 46 | } elsif (@ctg_test != 0 && @ctg_truth == 0) { 47 | &print_ctg(\@ctg_test, "$prefix.ctg"); 48 | $have_ctg = 1; 49 | } 50 | $opts{t} = "$prefix.ctg" unless defined($opts{t}); 51 | 52 | open(OUT, ">$prefix.eval") || die; 53 | 54 | warn "Evaluating positional accuracy...\n"; 55 | my $de_opt = ""; 56 | $de_opt .= " -l $opts{l}" if defined($opts{l}); 57 | $de_opt .= " -L $opts{L}" if defined($opts{L}); 58 | $de_opt .= " -b $opts{b}" if defined($opts{b}); 59 | $de_opt .= " -p $opts{p}" if defined($opts{p}); 60 | system("($root/k8 $root/hapdip.js distEval $de_opt $ARGV[0] $ARGV[1] > $prefix.de) 2>> $prefix.log"); 61 | system("($root/k8 $root/hapdip.js distEval -e $de_opt $ARGV[0] $ARGV[1] | $root/htsbox bgzip > $prefix.p_err.bed.gz) 2>> $prefix.log"); 62 | 63 | open(FH, "$prefix.de") || die; 64 | while () { 65 | s/^distEval/positional/; 66 | print OUT $_; 67 | } 68 | close(FH); 69 | 70 | if (defined $opts{s}) { 71 | unless (-f "$prefix.truth.vcf.gz") { 72 | warn "Normalizing the truth VCF...\n"; 73 | my $ot = @ctg_truth? '' : "-t $opts{t}"; 74 | my $op = defined($opts{p})? "-p $opts{p}" : ""; 75 | my $cmd = "($root/bgt atomize -S $ot $ARGV[0] | $root/k8 $root/hapdip.js atompost $op /dev/stdin | $root/htsbox bgzip > $prefix.truth.vcf.gz) 2>> $prefix.log"; 76 | system($cmd); 77 | } 78 | system("$root/htsbox tabix -fpvcf $prefix.truth.vcf.gz"); 79 | 80 | unless (-f "$prefix.test.vcf.gz") { 81 | warn "Normalizing the test VCF...\n"; 82 | my $ot = @ctg_test? '' : "-t $opts{t}"; 83 | my $op = defined($opts{p})? "-p $opts{p}" : ""; 84 | my $cmd = "($root/bgt atomize -S $ot $ARGV[1] | $root/k8 $root/hapdip.js atompost $op /dev/stdin | $root/htsbox bgzip > $prefix.test.vcf.gz) 2>> $prefix.log"; 85 | system($cmd); 86 | } 87 | system("$root/htsbox tabix -fpvcf $prefix.test.vcf.gz"); 88 | 89 | unless (-d "$prefix.a") { 90 | warn "Evaluating allelic accuracy...\n"; 91 | my $cmd = "($root/rtg vcfeval -t $opts{s} -b $prefix.truth.vcf.gz -c $prefix.test.vcf.gz -o $prefix.a --squash-ploidy) 2>> $prefix.log"; 92 | system($cmd); 93 | } 94 | 95 | unless (-d "$prefix.g") { 96 | warn "Evaluating genotypic accuracy...\n"; 97 | my $cmd = "($root/rtg vcfeval -t $opts{s} -b $prefix.truth.vcf.gz -c $prefix.test.vcf.gz -o $prefix.g) 2>> $prefix.log"; 98 | system($cmd); 99 | } 100 | 101 | warn "Counting...\n"; 102 | my $cnt_opt = ''; 103 | $cnt_opt .= " -l $opts{l}" if defined($opts{l}); 104 | $cnt_opt .= " -L $opts{L}" if defined($opts{L}); 105 | $cnt_opt .= " -b $opts{b}" if defined($opts{b}); 106 | $cnt_opt .= " -p 1" if defined($opts{p}); 107 | 108 | my @a_tp0 = &get_cnt($root, $cnt_opt, "$prefix.a/tp-baseline.vcf.gz"); 109 | my @a_tp1 = &get_cnt($root, $cnt_opt, "$prefix.a/tp.vcf.gz"); 110 | my @a_fn = &get_cnt($root, $cnt_opt, "$prefix.a/fn.vcf.gz", "$prefix.a_fn.vcf.gz"); 111 | my @a_fp = &get_cnt($root, $cnt_opt, "$prefix.a/fp.vcf.gz", "$prefix.a_fp.vcf.gz"); 112 | my @g_tp0 = &get_cnt($root, $cnt_opt, "$prefix.g/tp-baseline.vcf.gz"); 113 | my @g_tp1 = &get_cnt($root, $cnt_opt, "$prefix.g/tp.vcf.gz"); 114 | my @g_fn = &get_cnt($root, $cnt_opt, "$prefix.g/fn.vcf.gz", "$prefix.g_fn.vcf.gz"); 115 | my @g_fp = &get_cnt($root, $cnt_opt, "$prefix.g/fp.vcf.gz", "$prefix.g_fp.vcf.gz"); 116 | 117 | print OUT "allelic\tSNP\tFN\t$a_fn[0]\n"; 118 | print OUT "allelic\tSNP\tFP\t$a_fp[0]\n"; 119 | print OUT "allelic\tSNP\t%FNR\t", sprintf("%.2f", 100 * $a_fn[0] / ($a_fn[0] + $a_tp0[0])), "\n"; 120 | print OUT "allelic\tSNP\t%FDR\t", sprintf("%.2f", 100 * $a_fp[0] / ($a_fp[0] + $a_tp1[0])), "\n"; 121 | print OUT "allelic\tINDEL\tFN\t$a_fn[1]\n"; 122 | print OUT "allelic\tINDEL\tFP\t$a_fp[1]\n"; 123 | print OUT "allelic\tINDEL\t%FNR\t", sprintf("%.2f", 100 * $a_fn[1] / ($a_fn[1] + $a_tp0[1])), "\n"; 124 | print OUT "allelic\tINDEL\t%FDR\t", sprintf("%.2f", 100 * $a_fp[1] / ($a_fp[1] + $a_tp1[1])), "\n"; 125 | print OUT "genotypic\tSNP\tFN\t$g_fn[0]\n"; 126 | print OUT "genotypic\tSNP\tFP\t$g_fp[0]\n"; 127 | print OUT "genotypic\tSNP\t%FNR\t", sprintf("%.2f", 100 * $g_fn[0] / ($g_fn[0] + $g_tp0[0])), "\n"; 128 | print OUT "genotypic\tSNP\t%FDR\t", sprintf("%.2f", 100 * $g_fp[0] / ($g_fp[0] + $g_tp1[0])), "\n"; 129 | print OUT "genotypic\tINDEL\tFN\t$g_fn[1]\n"; 130 | print OUT "genotypic\tINDEL\tFP\t$g_fp[1]\n"; 131 | print OUT "genotypic\tINDEL\t%FNR\t", sprintf("%.2f", 100 * $g_fn[1] / ($g_fn[1] + $g_tp0[1])), "\n"; 132 | print OUT "genotypic\tINDEL\t%FDR\t", sprintf("%.2f", 100 * $g_fp[1] / ($g_fp[1] + $g_tp1[1])), "\n"; 133 | } 134 | close(OUT); 135 | 136 | sub which { 137 | my $file = shift; 138 | my $path = (@_)? shift : $ENV{PATH}; 139 | return if (!defined($path)); 140 | foreach my $x (split(":", $path)) { 141 | $x =~ s/\/$//; 142 | return "$x/$file" if (-x "$x/$file"); 143 | } 144 | return; 145 | } 146 | 147 | sub test_contig { 148 | my $fn = shift; 149 | my @ctg = (); 150 | open(FH, $fn =~ /\.gz$/? "gzip -dc $fn |" : $fn) || die; 151 | while () { 152 | last unless /^#/; 153 | my ($id, $len); 154 | if (/^##contig/) { 155 | $id = $1 if /ID=([^\s,]+)/; 156 | $len = $1 if /length=(\d+)/; 157 | } 158 | push(@ctg, [$id, $len]) if defined($id) && defined($len); 159 | } 160 | close(FH); 161 | return @ctg; 162 | } 163 | 164 | sub print_ctg { 165 | my $ctg = shift; 166 | my $fn = shift; 167 | open(FH, ">$fn") || die; 168 | for (@$ctg) { 169 | print FH "$_->[0]\t$_->[1]\n"; 170 | } 171 | close(FH); 172 | } 173 | 174 | sub get_cnt { 175 | my $root = shift; 176 | my $cnt_opt = shift; 177 | my $fn = shift; 178 | my $out = shift; 179 | my @cnt; 180 | open(FH, "$root/k8 $root/hapdip.js atomcnt $cnt_opt $fn |") || die; 181 | $_ = ; 182 | @cnt = ($1, $2) if (/^(\d+)\s+(\d+)/); 183 | close(FH); 184 | system("$root/k8 $root/hapdip.js atomcnt $cnt_opt -P $fn | $root/htsbox bgzip > $out") if defined($out); 185 | return @cnt; 186 | } 187 | -------------------------------------------------------------------------------- /rtg/rtg: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Pre-flight safety-belts 4 | if [ "$(uname -s)" != "Linux" ] && [ "$(uname -s)" != "Darwin" ]; then 5 | # If you comment this check out you are on your own :-) 6 | echo "Sorry, only Linux and MacOS are supported." 7 | exit 1 8 | elif [ "$(uname -m)" != "x86_64" ]; then 9 | # If you comment this check out you are on your own :-) 10 | echo "Sorry, you must be running a 64bit operating system." 11 | exit 1 12 | fi 13 | 14 | if [ "$(uname -s)" == "Darwin" ]; then 15 | # Determine the directory that this script is installed in 16 | # MacOS X doesn't support readlink -f, so do it slow and crufty 17 | THIS_DIR="$(cd "$(dirname "$0")"; TARGET=$(basename "$0"); while [ -L "$TARGET" ]; do TARGET="$(readlink "$TARGET")"; cd $(dirname "$TARGET"); TARGET="$(basename "$TARGET")"; done; pwd -P)" 18 | else 19 | # Determine the canonical directory that this script is installed in, using readlink -f if possible 20 | THIS_SCRIPT="$(readlink -f "$0" 2>/dev/null || echo "$0")" 21 | THIS_DIR="$(cd "$(dirname "$THIS_SCRIPT")" 2>/dev/null && pwd -P)" 22 | fi 23 | 24 | # Allow RTG_MEM/RTG_JAVA_OPTS to be set before subcommand name 25 | while [ "${1:0:4}" == "RTG_" ]; do 26 | if [ "${1:0:8}" == "RTG_MEM=" ]; then 27 | RTG_MEM=${1:8} 28 | elif [ "${1:0:14}" == "RTG_JAVA_OPTS=" ]; then 29 | RTG_JAVA_OPTS=${1:14} 30 | else 31 | echo "Error: expected RTG_MEM=NNg or RTG_JAVA_OPTS=\"\", e.g.:" 32 | echo " $0 RTG_MEM=16g help" 33 | exit 1 34 | fi 35 | shift 36 | done 37 | 38 | # First stash settings from environment that the user may wish to change on a per-run basis (they need to override other configuration sources below) 39 | USER_RTG_MEM=$RTG_MEM 40 | USER_JAVA_OPTS=$RTG_JAVA_OPTS 41 | 42 | if [ ! -r "$THIS_DIR/rtg.cfg" ] ; then 43 | shopt -s nocasematch 44 | # Perform initial setup 45 | 46 | if [ -r "$THIS_DIR/LICENSE.txt" ] && ! head -n 1 "$THIS_DIR/LICENSE.txt" | grep -q BSD; then 47 | # If this distribution includes an EULA that requires display and agreement to it 48 | more "$THIS_DIR/LICENSE.txt" 49 | 50 | if [ -r "$THIS_DIR/rtg-license.txt" ]; then 51 | echo 52 | echo "This software has been prepared for $(awk -F= '/^!person=/{print $2}'<$THIS_DIR/rtg-license.txt)" 53 | fi 54 | echo 55 | read -p "Do you agree to the terms and conditions (y/n)? " EULA_REPLY 56 | echo 57 | [[ "$EULA_REPLY" == "y" ]] || [[ "$EULA_REPLY" == "yes" ]] || { 58 | echo "You must agree with the license terms before you can use the software." 59 | exit 1 60 | } 61 | fi 62 | 63 | # Default crash reporting to true. The user can alter this in the rtg.cfg if they wish. 64 | RTG_TALKBACK=true 65 | TEST_TALKBACK=y 66 | 67 | # Find out if the user would like to submit usage information. 68 | cat <"$THIS_DIR/rtg.cfg" 87 | # Configuration file for RTG. 88 | 89 | # This file is automatically created upon first run, and may be edited 90 | # to allow some customization of default behaviour. In a 91 | # multi-machine environment, this file can be used to achieve 92 | # machine-specific configuration either by placing in /etc/rtg.cfg on 93 | # each machine or in the rtg installation directory values with the 94 | # name rtg.\$(hostname -s).cfg, e.g. rtg.$(hostname -s).cfg 95 | 96 | 97 | # The path to the java executable (need not be full path if in \$PATH). 98 | # If unset, RTG will use the bundled JRE if present, otherwise 99 | # will expect java to be on \$PATH 100 | # RTG_JAVA="java" 101 | 102 | # Amount of memory to allocate to RTG. Use G suffix for gigabytes. 103 | # If unset, allow RTG to use up to 90% of available RAM (see RTG_MEM_PCT) 104 | # RTG_MEM="4G" 105 | # Note that memory allocation can also be overridden on a per-command basis, e.g: 106 | # \$ rtg RTG_MEM=24G population ... 107 | 108 | # If RTG_MEM is unset, use this percentage of total RAM. 109 | # The default is 90 percent. 110 | # RTG_MEM_PCT=90 111 | 112 | 113 | # Attempt to send crash logs to Real Time Genomics, true to enable, false to disable 114 | RTG_TALKBACK=$RTG_TALKBACK 115 | 116 | # Enable simple usage logging, true to enable. Default is no logging. 117 | RTG_USAGE=$RTG_USAGE 118 | 119 | # Server URL when usage logging to a server. Default is to use RTG hosted server. 120 | # RTG_USAGE_HOST= 121 | 122 | # If performing single-user file-based usage logging, this specifies the directory to log to. 123 | # RTG_USAGE_DIR= 124 | 125 | # List of optional fields to add to usage logging (when enabled). 126 | # If unset do not add any of these fields. (commandline may contain information 127 | # considered sensitive) 128 | # RTG_USAGE_OPTIONAL=username,hostname,commandline 129 | RTG_USAGE_OPTIONAL=username,hostname 130 | 131 | 132 | # Allows specification of the HTTP proxy to use for 133 | # talkback/usage, specified in host:port form. 134 | # If unset, assume no http proxy is required. 135 | # RTG_PROXY=http://my.proxy.host:3128/ 136 | 137 | 138 | # Directory in which to look for pipeline reference datasets. 139 | # If unset, uses the references subdirectory of this installation. 140 | # RTG_REFERENCES_DIR= 141 | 142 | # Directory in which to look for AVR models. 143 | # If unset, uses the models subdirectory of this installation. 144 | # RTG_MODELS_DIR= 145 | 146 | # Allows passing additional arguments passed to the JVM. e.g: 147 | # RTG_JAVA_OPTIONS="-Djava.io.tmpdir=XXYY -XX:+UseLargePages" 148 | 149 | 150 | # Set the number of threads to use when not otherwise specified via command line flags. 151 | # The default behavior is to allocate one thread per machine core. 152 | # RTG_DEFAULT_THREADS=1 153 | 154 | 155 | EOF 156 | echo 157 | echo "Initial configuration complete. Advanced user configuration is" 158 | echo "available by editing settings in rtg.cfg" 159 | echo 160 | fi 161 | 162 | # Set other defaults 163 | if [ -x "$THIS_DIR/jre/bin/java" ]; then 164 | RTG_JAVA="$THIS_DIR/jre/bin/java" # Path to java (for JRE bundled versions) 165 | else 166 | RTG_JAVA="java" # Path to java (for no-JRE versions assume java is on current PATH) 167 | fi 168 | RTG_JAR="$THIS_DIR/RTG.jar" # Path to RTG.jar (default assumes jarfile in directory of this script) 169 | RTG_JAVA_OPTS= # Additional JVM options (e.g.: "-Djava.io.tmpdir=XXYY -XX:+UseLargePages") 170 | RTG_MEM= # Maximum memory for rtg to use (e.g. 48g) 171 | RTG_MEM_PCT=90 # If RTG_MEM is not defined use this percentage of total RAM 172 | 173 | # Read in default config (primarily containing talkback/usage prefs) 174 | if [ ! -r "$THIS_DIR/rtg.cfg" ] ; then 175 | echo "No initial configuration." 176 | exit 1 177 | fi 178 | source "$THIS_DIR/rtg.cfg" || exit 1 179 | 180 | # Read in machine specific customizations installed in system location 181 | if [ -r /etc/rtg.cfg ] ; then 182 | source /etc/rtg.cfg || exit 1 183 | fi 184 | 185 | # Read in machine specific customizations installed in local directory (without requiring sysadmin setup) 186 | hostname=$(hostname -s) 187 | if [ "${hostname}" ] && [ -r "$THIS_DIR/rtg.${hostname}.cfg" ] ; then 188 | source "$THIS_DIR/rtg.${hostname}.cfg" || exit 1 189 | fi 190 | 191 | # Apply settings from user environment 192 | RTG_MEM=${USER_RTG_MEM:-$RTG_MEM} 193 | RTG_JAVA_OPTS=${USER_JAVA_OPTS:-$RTG_JAVA_OPTS} 194 | 195 | # Check that the mandatory config is present 196 | if [ -z "$RTG_JAVA" ] ; then 197 | echo "RTG_JAVA must be specified in /etc/rtg.cfg or $THIS_DIR/rtg.${hostname}.cfg" 198 | exit 1 199 | fi 200 | if [ -z "$RTG_JAR" ] ; then 201 | echo "RTG_JAR must be specified in /etc/rtg.cfg or $THIS_DIR/rtg.${hostname}.cfg" 202 | exit 1 203 | fi 204 | 205 | # Check that the config is suitable 206 | java_path=$(which "$RTG_JAVA" 2> /dev/null) 207 | if [ -z "$java_path" -o ! -x "$java_path" ] ; then 208 | echo "Unable to execute $RTG_JAVA" 209 | exit 1 210 | fi 211 | 212 | RTG_MIN_MEM=-Xmx64m # Minimal memory just used for java version checking, log sending etc. 213 | version=$("$RTG_JAVA" $RTG_MIN_MEM -version 2>&1 | sed -n '/^.* version/s/.* version "\([0-9]\)\.\([0-9]*\).*/\1\2/p') 214 | if [ ! "$version" ] || [ ! "$version" -ge 17 ]; then 215 | echo "$RTG_JAVA is not Java 7 or later" 216 | exit 1 217 | fi 218 | 219 | if [ ! -r "$RTG_JAR" ] ; then 220 | echo "Unable to read $RTG_JAR" 221 | exit 1 222 | fi 223 | 224 | if [ ! -z "$RTG_PROXY" ] ; then 225 | proxy_host=$(echo "$RTG_PROXY" | awk '{print $1}' FS=:) 226 | proxy_port=$(echo "$RTG_PROXY" | awk '{print $2}' FS=:) 227 | if [ -z "$proxy_port" ] ; then 228 | proxy_port=80 229 | fi 230 | RTG_PROXY="-Dhttp.proxyHost=$proxy_host -Dhttp.proxyPort=$proxy_port" 231 | fi 232 | 233 | if [ ! -z "$RTG_USAGE" ]; then 234 | RTG_USAGE="-Dusage=$RTG_USAGE" 235 | if [ ! -z "$RTG_USAGE_HOST" ]; then 236 | RTG_USAGE="$RTG_USAGE -Dusage.host=$RTG_USAGE_HOST" 237 | fi 238 | if [ ! -z "$RTG_USAGE_DIR" ]; then 239 | RTG_USAGE="$RTG_USAGE -Dusage.dir=$RTG_USAGE_DIR" 240 | fi 241 | if [[ "$RTG_USAGE_OPTIONAL" == *username* ]]; then 242 | RTG_USAGE="$RTG_USAGE -Dusage.log.username=true" 243 | fi 244 | if [[ "$RTG_USAGE_OPTIONAL" == *hostname* ]]; then 245 | RTG_USAGE="$RTG_USAGE -Dusage.log.hostname=true" 246 | fi 247 | if [[ "$RTG_USAGE_OPTIONAL" == *commandline* ]]; then 248 | RTG_USAGE="$RTG_USAGE -Dusage.log.commandline=true" 249 | fi 250 | fi 251 | 252 | if [ ! -z "$RTG_TALKBACK" ]; then 253 | RTG_TALKBACK="-Dtalkback=$RTG_TALKBACK" 254 | if [ "$TEST_TALKBACK" == "y" ]; then # First run only, perform test of crash reporting 255 | if ! "$RTG_JAVA" $RTG_USAGE $RTG_TALKBACK $RTG_PROXY $RTG_MIN_MEM -cp "$RTG_JAR" com.rtg.util.diagnostic.SimpleTalkback "Post-install talkback test"; then 256 | echo "Initial crash-report connectivity test did not succeed, probably due to firewall issues." 257 | echo "You will be asked to manually submit any error logs." 258 | fi 259 | echo 260 | fi 261 | fi 262 | 263 | if [ -z "$RTG_MEM" ] ; then 264 | RTG_MEM=$("$RTG_JAVA" $RTG_MIN_MEM -cp "$RTG_JAR" com.rtg.util.ChooseMemory $RTG_MEM_PCT) 265 | fi 266 | 267 | if [ ! -z "$RTG_REFERENCES_DIR" ]; then 268 | RTG_REFERENCES_DIR="-Dreferences.dir=$RTG_REFERENCES_DIR" 269 | else 270 | RTG_REFERENCES_DIR="-Dreferences.dir=$THIS_DIR/references" 271 | fi 272 | if [ ! -z "$RTG_MODELS_DIR" ]; then 273 | RTG_MODELS_DIR="-Dmodels.dir=$RTG_MODELS_DIR" 274 | else 275 | RTG_MODELS_DIR="-Dmodels.dir=$THIS_DIR/models" 276 | fi 277 | 278 | if [ ! -z "$RTG_DEFAULT_THREADS" ] ; then 279 | RTG_DEFAULT_THREADS="-Druntime.defaultThreads=$RTG_DEFAULT_THREADS" 280 | fi 281 | 282 | # Now run the primary RTG command 283 | HSLOG=./hs_err_pid$$.log 284 | "$RTG_JAVA" -Djava.library.path="$THIS_DIR" -XX:ErrorFile=$HSLOG $RTG_JAVA_OPTS "$RTG_REFERENCES_DIR" "$RTG_MODELS_DIR" $RTG_USAGE $RTG_TALKBACK $RTG_DEFAULT_THREADS -Xmx$RTG_MEM $RTG_PROXY -jar "$RTG_JAR" "$@" 285 | ECODE=$? 286 | if [ -f "$HSLOG" ]; then 287 | if grep -q "insufficient memory" "$HSLOG"; then 288 | echo "The operating system did not make requested memory available to the JVM. Try removing other jobs on this machine, adjusting allocated memory appropriate to currently available memory, or adjusting command parameters to reduce memory requirements. More information is contained in the file: $HSLOG" >&2 289 | else 290 | "$RTG_JAVA" $RTG_TALKBACK $RTG_PROXY $RTG_MIN_MEM -cp "$RTG_JAR" com.rtg.util.diagnostic.SimpleTalkback "$HSLOG" "$@" 291 | fi 292 | fi 293 | exit $ECODE 294 | --------------------------------------------------------------------------------