├── king
    ├── LongHash.cpp
    ├── MemoryInfo.h
    ├── MapFunction.h
    ├── TraitTransformations.h
    ├── WindowsHelper.h
    ├── Hash.h
    ├── Error.h
    ├── LongLongCounter.h
    ├── Intervals.h
    ├── KinshipX.h
    ├── Kinship.h
    ├── MathDeriv.h
    ├── Sort.h
    ├── MemoryAllocators.h
    ├── Davies.h
    ├── MemoryInfo.cpp
    ├── Input.h
    ├── MapFunction.cpp
    ├── MathSobol.h
    ├── Matings.h
    ├── diseaseGEE.h
    ├── LongLongCounter.cpp
    ├── QuickIndex.h
    ├── Error.cpp
    ├── WindowsHelper.cpp
    ├── MerlinSort.h
    ├── OLS.h
    ├── Genetics.h
    ├── rplot.h
    ├── PedigreeAlleleFreq.h
    ├── MathLu.h
    ├── KinshipX.cpp
    ├── PeelerNodes.h
    ├── OptimizerConstraints.h
    ├── MathConstant.h
    ├── MathMiser.h
    ├── Constant.h
    ├── GenotypeLists.h
    ├── MathSVD.h
    ├── MathGold.h
    ├── MathCholesky.h
    ├── TDT.h
    ├── GenotypeCompressor.h
    ├── InputFile.cpp
    ├── Matings.cpp
    ├── MathVegas.h
    ├── MathAssoc.h
    ├── LongArray.h
    ├── MathDeriv.cpp
    ├── BasicHash.h
    ├── VCLinear.h
    ├── Genetics.cpp
    ├── Kinship.cpp
    ├── MathStats.h
    ├── PedigreeFamily.h
    ├── PedigreeDescription.h
    ├── VCGEE.h
    ├── IBD.h
    ├── MathSobol.cpp
    ├── PeelerNodes.cpp
    ├── TraitTransformations.cpp
    ├── LongArray.cpp
    ├── FortranFormat.h
    ├── PedigreePerson.h
    ├── MiniDeflate.h
    ├── MemoryAllocators.cpp
    ├── Input.cpp
    ├── MathCholesky.cpp
    ├── MerlinSort.cpp
    ├── BasicHash.cpp
    ├── StringArray.h
    ├── InputFile.h
    ├── StringMap.h
    ├── Hash.cpp
    ├── IntArray.h
    ├── Random.h
    ├── MathLu.cpp
    ├── MathFloatVector.h
    ├── PedigreeAlleles.h
    ├── OptimizerConstraints.cpp
    ├── BrentC.cpp
    ├── MathGenMin.h
    └── MathNormal.h
├── libsvm
    ├── .gitignore
    ├── svm.def
    ├── Makefile
    ├── COPYRIGHT
    ├── tools
    │   ├── easy.py
    │   ├── checkdata.py
    │   └── subset.py
    └── svm.h
├── examples
    └── index
    │   ├── seq.batches.by.20.txt
    │   ├── README
    │   └── list.107.local.crams.index
├── topmed_variant_calling_overview.png
├── .gitignore
├── scripts
    ├── run-merge-sites-local.cmd
    ├── run-milk-local.cmd
    ├── run-union-sites-local.cmd
    ├── run-batch-genotype-local.cmd
    ├── d13-add-fmis-to-frz9.pl
    ├── run-discovery-local.cmd
    ├── run-paste-genotype-local.cmd
    ├── e05-whitelist-gwas-variants.pl
    └── e04-filter-vars.pl
├── .gitmodules
├── config.yml
├── Dockerfile
└── singularity.def


/king/LongHash.cpp:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 


--------------------------------------------------------------------------------
/libsvm/.gitignore:
--------------------------------------------------------------------------------
1 | svm-predict
2 | svm-scale
3 | svm-train
4 | svm.o
5 | 


--------------------------------------------------------------------------------
/examples/index/seq.batches.by.20.txt:
--------------------------------------------------------------------------------
1 | 1
2 | 21
3 | 41
4 | 61
5 | 81
6 | 101
7 | 


--------------------------------------------------------------------------------
/topmed_variant_calling_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/statgen/topmed_variant_calling/HEAD/topmed_variant_calling_overview.png


--------------------------------------------------------------------------------
/king/MemoryInfo.h:
--------------------------------------------------------------------------------
1 | #ifndef __MEMORYINFO_H__
2 | #define __MEMORYINFO_H__
3 | 
4 | #include "StringBasics.h"
5 | 
6 | String & MemoryInfo(double bytes);
7 | 
8 | #endif
9 | 


--------------------------------------------------------------------------------
/king/MapFunction.h:
--------------------------------------------------------------------------------
1 | #ifndef __MAPFUNCTION_H__
2 | #define __MAPFUNCTION_H__
3 | 
4 | double DistanceToRecombination(double distance);
5 | double RecombinationToDistance(double recombination);
6 | 
7 | #endif
8 | 


--------------------------------------------------------------------------------
/king/TraitTransformations.h:
--------------------------------------------------------------------------------
 1 | #ifndef __TRAIT_TRANSFORMS__
 2 | #define __TRAIT_TRANSFORMS__
 3 | 
 4 | #include "Pedigree.h"
 5 | 
 6 | void InverseNormalTransform(Pedigree & ped);
 7 | void InverseNormalTransform(Pedigree & ped, int trait);
 8 | 
 9 | #endif
10 | 
11 |  
12 | 
13 | 


--------------------------------------------------------------------------------
/king/WindowsHelper.h:
--------------------------------------------------------------------------------
 1 | #ifndef __WINDOWSHELPER_H__
 2 | #define __WINDOWSHELPER_H__
 3 | 
 4 | #ifndef __WIN32__
 5 | inline void WildCardArguments(int argc, char ** argv) { }
 6 | #else
 7 | void WildCardArguments(int & argc, char ** & argv);
 8 | #endif
 9 | 
10 | #endif
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/king/Hash.h:
--------------------------------------------------------------------------------
 1 | #ifndef __HASH_H__
 2 | #define __HASH_H__
 3 | 
 4 | unsigned int hash ( const unsigned char * key, unsigned int length, unsigned int initval);
 5 | 
 6 | unsigned int hash_no_case ( const unsigned char * key, unsigned int length, unsigned int initval);
 7 | 
 8 | #endif
 9 | 
10 | 


--------------------------------------------------------------------------------
/king/Error.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ERROR_H_
 2 | #define _ERROR_H_
 3 | 
 4 | // #ifdef __cplusplus
 5 | // extern "C" {
 6 | // #endif
 7 | 
 8 | void error(const char * msg, ...);
 9 | void warning(const char * msg, ...);
10 | void numerror(const char * msg, ...);
11 | 
12 | // #ifdef __cplusplus
13 | //   };
14 | // #endif
15 | 
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/king/LongLongCounter.h:
--------------------------------------------------------------------------------
 1 | #ifndef __LONGLONGCOUNTER_H_
 2 | #define __LONGLONGCOUNTER_H_
 3 | 
 4 | #include "LongHash.h"
 5 | 
 6 | class LongCounter : public LongHash<int>
 7 |    {
 8 |    public:
 9 |       LongCounter();
10 | 
11 |       void IncrementCount(long long key);
12 |       void DecrementCount(long long key);
13 |       int  GetCount(long long key);
14 |    };
15 | 
16 | #endif
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/king/Intervals.h:
--------------------------------------------------------------------------------
 1 | #ifndef __Intervals_h__
 2 | #define __Intervals_h__
 3 | #include "IntArray.h"
 4 | 
 5 | double RoRP(IntArray &RP1, IntArray &RP2, IntArray &R1R2, IntArray &positionBP);
 6 | double SegmentLength(IntArray &segs, IntArray &positionBP);
 7 | double JoinLength(IntArray &A, IntArray &B, IntArray &positionBP);
 8 | void SegmentIntersect(IntArray &A, IntArray &B, IntArray &C);
 9 | void SegmentUnion(IntArray &A, IntArray &B, IntArray &C);
10 | 
11 | #endif
12 | 


--------------------------------------------------------------------------------
/king/KinshipX.h:
--------------------------------------------------------------------------------
 1 | #ifndef __KINSHIPX_H__
 2 | #define __KINSHIPX_H__
 3 | 
 4 | #include "Pedigree.h"
 5 | #include "MathMatrix.h"
 6 | 
 7 | class KinshipX
 8 |    {
 9 |    public:
10 |       Matrix    allPairs;
11 |       Family *  fam;
12 | 
13 |       KinshipX() : allPairs()
14 |          { fam = NULL; }
15 | 
16 |       void Setup(Family & f);
17 | 
18 |       double operator () (Person & p1, Person & p2);
19 | 
20 |    };
21 | 
22 | #endif
23 | 
24 | 


--------------------------------------------------------------------------------
/king/Kinship.h:
--------------------------------------------------------------------------------
 1 | #ifndef __KINSHIP_H__
 2 | #define __KINSHIP_H__
 3 | 
 4 | #include "Pedigree.h"
 5 | #include "MathMatrix.h"
 6 | 
 7 | class Kinship
 8 |    {
 9 |    public:
10 |       Matrix    allPairs;
11 |       Family *  fam;
12 | 
13 |       Kinship() : allPairs()
14 |          { fam = NULL; }
15 | 
16 |       void Setup(Family & f);
17 | 
18 |       bool isInbred();
19 | 
20 |       double operator () (Person & p1, Person & p2);
21 | 
22 |    };
23 | 
24 | #endif
25 | 
26 | 


--------------------------------------------------------------------------------
/libsvm/svm.def:
--------------------------------------------------------------------------------
 1 | LIBRARY libsvm
 2 | EXPORTS
 3 | 	svm_train	@1
 4 | 	svm_cross_validation	@2
 5 | 	svm_save_model	@3
 6 | 	svm_load_model	@4
 7 | 	svm_get_svm_type	@5
 8 | 	svm_get_nr_class	@6
 9 | 	svm_get_labels	@7
10 | 	svm_get_svr_probability	@8
11 | 	svm_predict_values	@9
12 | 	svm_predict	@10
13 | 	svm_predict_probability	@11
14 | 	svm_free_model_content	@12
15 | 	svm_free_and_destroy_model	@13
16 | 	svm_destroy_param	@14
17 | 	svm_check_parameter	@15
18 | 	svm_check_probability_model	@16
19 | 	svm_set_print_string_function	@17
20 | 


--------------------------------------------------------------------------------
/king/MathDeriv.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MATHDERIV_H__
 2 | #define __MATHDERIV_H__
 3 | 
 4 | #include "MathVector.h"
 5 | 
 6 | // Evaluates the derivative of function func() at x, using h as an initial guess
 7 | // stepsize. An estimate of the error in the derivative is stored in err.
 8 | 
 9 | double dfunction(double (* func)(double), double x, double h, double & err);
10 | 
11 | // Same as above, but without error estimate
12 | //
13 | 
14 | double dfunction(double (* func)(double), double x, double h);
15 | 
16 | #endif
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/examples/index/README:
--------------------------------------------------------------------------------
1 | This directory contains the input file of the example TOPMed variant calling pipeline.
2 | 
3 | The list.107.local.crams.index file contains 107 public (part of 1000 Genomes) samples sequenced for the TOPMed project. The samples can be downloaded from Google Cloud Storage Bucket. (The location of the bucket will be added here. Contact hmkang@umich.edu in the meantime). 
4 | 
5 | Also, the resource files are required to perform the example variant calling procedure. The files should be downloadable from ftp://share.sph.umich.edu/1000genomes/fullProject/hg38_resources/


--------------------------------------------------------------------------------
/king/Sort.h:
--------------------------------------------------------------------------------
 1 | #ifndef __SORT_H__
 2 | #define __SORT_H__
 3 | 
 4 | #include "Constant.h"
 5 | 
 6 | #include <stddef.h>
 7 | 
 8 | void QuickSort(void *base, size_t nelem, size_t width,
 9 |                int (*cmp)(const void *, const void *));
10 | 
11 | void QuickSort2(void *base, void * base2, size_t nelem, size_t width,
12 |                int (*cmp)(const void *, const void *));
13 | 
14 | void * BinarySearch(const void *key, const void *base,
15 |                size_t nelem, size_t width,
16 |                int (*cmp)(const void *, const void *));
17 | 
18 | #endif
19 | 


--------------------------------------------------------------------------------
/king/MemoryAllocators.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MEMORY_ALLOCATORS_H__
 2 | #define __MEMORY_ALLOCATORS_H__
 3 | 
 4 | char **  AllocateCharMatrix(int rows, int cols);
 5 | void     FreeCharMatrix(char ** & matrix, int rows);
 6 | 
 7 | float ** AllocateFloatMatrix(int rows, int cols);
 8 | void     FreeFloatMatrix(float ** & matrix, int rows);
 9 | 
10 | int  **  AllocateIntMatrix(int rows, int cols);
11 | void     FreeIntMatrix(int ** & matrix, int rows);
12 | 
13 | char *** AllocateCharCube(int n, int rows, int cols);
14 | void     FreeCharCube(char *** & matrix, int n, int rows);
15 | 
16 | #endif
17 | 
18 | 


--------------------------------------------------------------------------------
/king/Davies.h:
--------------------------------------------------------------------------------
 1 | #ifndef __DAVIES_h__
 2 | #define __DAVIES_h__
 3 | //Davies R.B., Algorithm AS 155: The Distribution of a Linear Combination of chi-2 Random Variables,
 4 | //Journal of the Royal Statistical Society. Series C (Applied Statistics), 29(3), p. 323-333, (1980)
 5 | //void  Davies(double* lb1, double* nc1, int* n1, int *r1, double *sigma, double *c1, int *lim1, double *acc, double* trace, int* ifault, double *res);
 6 | #include <stdio.h>
 7 | double Davies(double c1, double* lb1, int r1, int *n1=NULL, double *nc1=NULL, double sigma=0, int lim1=10000, double acc=0.0001);
 8 | 
 9 | #endif
10 | 
11 | 


--------------------------------------------------------------------------------
/libsvm/Makefile:
--------------------------------------------------------------------------------
 1 | CXX ?= g++
 2 | CFLAGS = -Wall -Wconversion -O3 -fPIC
 3 | SHVER = 2
 4 | 
 5 | all: svm-train svm-predict svm-scale
 6 | 
 7 | lib: svm.o
 8 | 	$(CXX) -shared -dynamiclib svm.o -o libsvm.so.$(SHVER)
 9 | 
10 | svm-predict: svm-predict.c svm.o
11 | 	$(CXX) $(CFLAGS) svm-predict.c svm.o -o svm-predict -lm
12 | svm-train: svm-train.c svm.o
13 | 	$(CXX) $(CFLAGS) svm-train.c svm.o -o svm-train -lm
14 | svm-scale: svm-scale.c
15 | 	$(CXX) $(CFLAGS) svm-scale.c -o svm-scale
16 | svm.o: svm.cpp svm.h
17 | 	$(CXX) $(CFLAGS) -c svm.cpp
18 | clean:
19 | 	rm -f *~ svm.o svm-train svm-predict svm-scale libsvm.so.$(SHVER)
20 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Prerequisites
 2 | *.d
 3 | 
 4 | # Compiled Object files
 5 | *.slo
 6 | *.lo
 7 | *.o
 8 | *.obj
 9 | 
10 | # Precompiled Headers
11 | *.gch
12 | *.pch
13 | 
14 | # Compiled Dynamic libraries
15 | *.so
16 | *.dylib
17 | *.dll
18 | 
19 | # Fortran module files
20 | *.mod
21 | *.smod
22 | 
23 | # Compiled Static libraries
24 | *.lai
25 | *.la
26 | *.a
27 | *.lib
28 | 
29 | # Executables
30 | *.exe
31 | *.out
32 | *.app
33 | 
34 | htslib/tabix
35 | htslib/bgzip
36 | htslib/htsfile
37 | samtools/samtools
38 | bcftools/bcftools
39 | cramore/cramore
40 | vt-topmed/vt
41 | king/king
42 | king/*.o
43 | *.o
44 | libsvm/svm-train
45 | libsvm/svm-predict
46 | libsvm/svm-scale
47 | 


--------------------------------------------------------------------------------
/king/MemoryInfo.cpp:
--------------------------------------------------------------------------------
 1 | #include "MemoryInfo.h"
 2 | 
 3 | String & MemoryInfo(double bytes)
 4 |    {
 5 |    static String info;
 6 | 
 7 |    if (bytes < 1024)
 8 |       return info = "<1.0 kb";
 9 | 
10 |    if (bytes < 1024. * 1024.)
11 |       info.printf("%.1f kb", (bytes + 1023) / 1024.);
12 |    else if (bytes < 1024. * 1024. * 1024.)
13 |       info.printf("%.1f mb", (bytes + 1024. * 1024. - 1) / (1024. * 1024.));
14 |    else if (bytes < 1024. * 1024. * 1024. * 1024.)
15 |       info.printf("%.1f gb", bytes / (1024. * 1024. * 1024.));
16 |    else
17 |       info.printf("%.1f tb", bytes / (1024. * 1024. * 1024. * 1024.));
18 | 
19 |    return info;
20 |    }
21 | 


--------------------------------------------------------------------------------
/king/Input.h:
--------------------------------------------------------------------------------
 1 | #ifndef __INPUT_H__
 2 | #define __INPUT_H__
 3 | 
 4 | void Input(const char * prompt, int & n, int _default = 0);
 5 | void Input(const char * prompt, double & d, double _default = 0.0);
 6 | void Input(const char * prompt, char & c, char _default = 'A');
 7 | void Input(const char * prompt, char * s, char * _default = "");
 8 | void Input(const char * prompt, bool & b, bool _default);
 9 | 
10 | void InputBounds(const char * prompt, int & n, int  min, int max,
11 |                  int _default = 0);
12 | void InputBounds(const char * prompt, double & d, double min, double max,
13 |                  double _default = 0);
14 | 
15 | extern int InputPromptWidth;
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/king/MapFunction.cpp:
--------------------------------------------------------------------------------
 1 | #include "MapFunction.h"
 2 | #include "MathConstant.h"
 3 | 
 4 | #include <math.h>
 5 | 
 6 | double DistanceToRecombination(double distance)
 7 |    {
 8 |    return (1.0 - exp(-2.0 * distance)) * 0.5;
 9 |    }
10 | 
11 | double RecombinationToDistance(double recombination)
12 |    {
13 |    return (log(max(1.0 - 2 * recombination, 1e-7)) * -0.5);
14 |    }
15 | 
16 | double KosambiDistanceToRecombination(double distance)
17 |    {
18 |    double e_to_4x = exp(4.0 * distance);
19 | 
20 |    return (0.5 * (e_to_4x - 1.0) / (e_to_4x + 1.0));
21 |    }
22 | 
23 | double RecombinationToKosambiDistance(double theta)
24 |    {
25 |    return 0.25 * log((1.0 + 2*theta) / max(1.0 - 2.0*theta, 1e-7));
26 |    }
27 | 


--------------------------------------------------------------------------------
/king/MathSobol.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MATH_SOBOL__
 2 | #define __MATH_SOBOL__
 3 | 
 4 | #include "IntArray.h"
 5 | #include "MathVector.h"
 6 | 
 7 | #define POLY_COUNT   36
 8 | #define SOBOL_BITS   30
 9 | #define SOBOL_FACTOR (1.0 / (1L << SOBOL_BITS))
10 | 
11 | class SobolSequence
12 |    {
13 |    public:
14 |       IntArray * bits;
15 |       IntArray   x;
16 |       int        dim;
17 |       long       counter;
18 | 
19 |       SobolSequence();
20 |       ~SobolSequence();
21 | 
22 |       void     Init(int dimensions);
23 |       Vector & Next(Vector & point);
24 | 
25 |    private:
26 |       static int poly_integers[POLY_COUNT];
27 |       static int poly_degrees[POLY_COUNT];
28 |    };
29 | 
30 | #endif
31 | 
32 | 


--------------------------------------------------------------------------------
/scripts/run-merge-sites-local.cmd:
--------------------------------------------------------------------------------
 1 | # out : log/merge
 2 | # list : BATCH : index/seq.batches.by.20.txt
 3 | # list : INTERVAL : index/intervals/b38.intervals.X.10Mb.10Mb.txt
 4 | # var : ROOT : ..
 5 | # var : PREFIX : out/union/$BATCH$1$/b$BATCH$1$.$INTERVAL$1$_$INTERVAL$2$_$INTERVAL$3$
 6 | # target : $PREFIX$.merged.sites.bcf $PREFIX$.merged.sites.bcf.csi
 7 | # name: example-merge
 8 | mkdir -p out/union/$BATCH$1$/
 9 | cut -f 3 out/index/list.107.local.crams.vb_xy.index | tail -n +2 | tail -n +$BATCH$1$ | head -n 20 > $PREFIX$.bcflist.txt
10 | $ROOT$/cramore/cramore vcf-merge-candidate-variants --in-vcf-list $PREFIX$.bcflist.txt --region $INTERVAL$1$:$INTERVAL$2$-$INTERVAL$3$ --out-vcf $PREFIX$.merged.sites.bcf > $PREFIX$.merged.sites.bcf.out 2> $PREFIX$.merged.sites.bcf.err
11 | $ROOT$/bcftools/bcftools index $PREFIX$.merged.sites.bcf
12 | 


--------------------------------------------------------------------------------
/king/Matings.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MATINGS_H__
 2 | #define __MATINGS_H__
 3 | 
 4 | #include "Pedigree.h"
 5 | 
 6 | class Matings
 7 |    {
 8 |    public:
 9 |       // Number of distinct matings in the pedigree
10 |       int      matingCount;
11 |       int      founders;
12 | 
13 |       // Map linking each non-founder to a mating
14 |       IntArray matingMap;
15 | 
16 |       // Index all the matings in a family
17 |       void ListMatings(Family * family);
18 | 
19 |       // Lookup the mating index for a specific offspring
20 |       int  LookupMating(int serial);
21 |       int  LookupMating(Person & p);
22 | 
23 |    private:
24 |       void InitializeHash(int size);
25 |       int  LookupMating(int father, int mother);
26 | 
27 |       IntArray hash;
28 |       IntArray hashId;
29 |    };
30 | 
31 | 
32 | 
33 | #endif
34 | 
35 | 


--------------------------------------------------------------------------------
/king/diseaseGEE.h:
--------------------------------------------------------------------------------
 1 | //////////////////////////////////////////////////////////////////////
 2 | // diseaseGEE.h
 3 | // Author: Wei-Min Chen
 4 | // March 16, 2005
 5 | 
 6 | #ifndef __diseaseGEE_H__
 7 | #define __diseaseGEE_H__
 8 | 
 9 | #include "Pedigree.h"
10 | #include "IntArray.h"
11 | #include "MathMatrix.h"
12 | #include "MathVector.h"
13 | #include "MathCholesky.h"
14 | #include "VCGEE.h"
15 | 
16 | class GEE_DIS: public GEE{
17 | //   void constraint(void){}
18 |    void RefreshD(int f);
19 | //   void RefreshOD(int f){}
20 | public:
21 | //   double OR[6];
22 | //   double rho[6]; // correlation between relative pair
23 |    IntArray * diseases;
24 |    int disease;
25 |    IntArray mCovariate;
26 |    Vector *resid;
27 |    void solve();
28 |    GEE_DIS(Pedigree & pedigree);
29 |    ~GEE_DIS();
30 |    void InitCoef();
31 |    void summary(){}
32 |    void print();
33 | };
34 | 
35 | #endif
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/scripts/run-milk-local.cmd:
--------------------------------------------------------------------------------
 1 | # out : log/milk
 2 | # list : INTERVAL : index/intervals/b38.intervals.X.10Mb.1Mb.txt
 3 | # var : ROOT : ..
 4 | # var : IN_PREFIX : out/genotypes/merged/$INTERVAL$1$/merged.$INTERVAL$1$_$INTERVAL$4$_$INTERVAL$5$
 5 | # var : OUT_PREFIX : out/milk/$INTERVAL$1$/milk.$INTERVAL$1$_$INTERVAL$4$_$INTERVAL$5$
 6 | # name : example-milk
 7 | # target : $OUT_PREFIX$.full.vcf.gz $OUT_PREFIX$.sites.vcf.gz $OUT_PREFIX$.sites.vcf.gz.tbi
 8 | mkdir -p out/milk/$INTERVAL$1$/
 9 | $ROOT$/vt-topmed/vt milk_filter -f out/genotypes/hgdp/merged.autosomes.gtonly.minDP0.hgdp.king.inferred.ped -b $IN_PREFIX$.genotypes.bcf -o $OUT_PREFIX$.full.vcf.gz -g $IN_PREFIX$.sex_map.txt --xLabel chrX --yLabel chrY --mtLabel chrM --xStart 2781479 --xStop 155701383 --af-field AF
10 | zcat $OUT_PREFIX$.full.vcf.gz | cut -f 1-8 | $ROOT$/htslib/bgzip -c > $OUT_PREFIX$.sites.vcf.gz
11 | $ROOT$/htslib/tabix -pvcf $OUT_PREFIX$.sites.vcf.gz
12 | 


--------------------------------------------------------------------------------
/king/LongLongCounter.cpp:
--------------------------------------------------------------------------------
 1 | #include "LongLongCounter.h"
 2 | 
 3 | LongCounter::LongCounter() : LongHash<int>()
 4 |    {
 5 |    SetAllowDuplicateKeys(false);
 6 |    }
 7 | 
 8 | void LongCounter::IncrementCount(long long key)
 9 |    {
10 |    int slot = Find(key);
11 | 
12 |    if (slot == -1)
13 |       Add(key, 1);
14 |    else if (Object(slot) == -1)
15 |       Delete(slot);
16 |    else
17 |       Object(slot)++;
18 |    }
19 | 
20 | void LongCounter::DecrementCount(long long key)
21 |    {
22 |    int slot = Find(key);
23 | 
24 |    if (slot == -1)
25 |       Add(key, -1);
26 |    else if (Object(slot) == 1)
27 |       Delete(slot);
28 |    else
29 |       Object(slot)--;
30 |    }
31 | 
32 | int LongCounter::GetCount(long long key)
33 |    {
34 |    int slot = Find(key);
35 | 
36 |    if (slot == -1)
37 |       return 0;
38 |    else
39 |       return Object(slot)--;
40 |    }
41 | 
42 |    
43 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "apigenome"]
 2 | 	path = apigenome
 3 | 	url = https://github.com/hyunminkang/apigenome
 4 | [submodule "bamUtil"]
 5 | 	path = bamUtil
 6 | 	url = https://github.com/statgen/bamUtil
 7 | [submodule "libStatGen"]
 8 | 	path = libStatGen
 9 | 	url = https://github.com/statgen/libStatGen
10 | [submodule "invNorm"]
11 | 	path = invNorm
12 | 	url = https://github.com/hyunminkang/invNorm
13 | [submodule "cramore"]
14 | 	path = cramore
15 | 	url = https://github.com/hyunminkang/cramore
16 | [submodule "htslib"]
17 | 	path = htslib
18 | 	url = https://github.com/samtools/htslib
19 | [submodule "bcftools"]
20 | 	path = bcftools
21 | 	url = https://github.com/samtools/bcftools
22 | [submodule "samtools"]
23 | 	path = samtools
24 | 	url = https://github.com/samtools/samtools
25 | [submodule "libsvm"]
26 | 	path = libsvm
27 | 	url = https://github.com/cjlin1/libsvm
28 | [submodule "vt-topmed"]
29 | 	path = vt-topmed
30 | 	url = https://github.com/hyunminkang/vt-topmed
31 | 


--------------------------------------------------------------------------------
/king/QuickIndex.h:
--------------------------------------------------------------------------------
 1 | #ifndef __QUICKINDEX_H__
 2 | #define __QUICKINDEX_H__
 3 | 
 4 | #include "MathVector.h"
 5 | #include "StringArray.h"
 6 | #include "IntArray.h"
 7 | #include "StringMap.h"
 8 | 
 9 | class QuickIndex : public IntArray
10 |    {
11 |    public:
12 |       QuickIndex();
13 |       QuickIndex(const IntArray & source_data)
14 |          { Index(source_data); }
15 |       QuickIndex(const StringArray & source_data)
16 |          { Index(source_data); }
17 |       QuickIndex(const Vector & source_data)
18 |          { Index(source_data); }
19 | 
20 |       void Index(const IntArray & source_data);
21 |       void Index(const StringArray & source_data);
22 |       void Index(const Vector & source_data);
23 |       void IndexCounts(const StringIntMap & source_data);
24 | 
25 |    private:
26 |       const void * source;
27 |       int    datatype;
28 | 
29 |       bool IsBefore(int i, int j);
30 |       void Sort();
31 |    };
32 | 
33 | #endif
34 | 
35 | 


--------------------------------------------------------------------------------
/king/Error.cpp:
--------------------------------------------------------------------------------
 1 | #include "Error.h"
 2 | 
 3 | #include "stdlib.h"
 4 | #include "stdarg.h"
 5 | #include "stdio.h"
 6 | 
 7 | // Declare a dummy class to ensure that compilers recognize this as C++ code
 8 | class String;
 9 | 
10 | void error ( const char * msg, ... )
11 |    {
12 |    va_list  ap;
13 | 
14 |    va_start(ap, msg);
15 | 
16 |    printf("\nFATAL ERROR - \n");
17 |    vprintf(msg, ap);
18 |    printf("\n\n");
19 | 
20 |    va_end(ap);
21 | 
22 |    exit(EXIT_FAILURE);
23 |    }
24 | 
25 | void warning ( const char * msg, ... )
26 |    {
27 |    va_list  ap;
28 | 
29 |    va_start(ap, msg);
30 | 
31 |    printf("\n\aWARNING - \n");
32 |    vprintf(msg, ap);
33 |    printf("\n");
34 | 
35 |    va_end(ap);
36 |    }
37 | 
38 | void numerror ( const char * msg , ... )
39 |    {
40 |    va_list  ap;
41 | 
42 |    va_start(ap, msg);
43 | 
44 |    printf("\nFATAL NUMERIC ERROR - ");
45 |    vprintf(msg, ap);
46 |    printf("\n\n");
47 | 
48 |    va_end(ap);
49 | 
50 |    exit(EXIT_FAILURE);
51 |    }
52 | 


--------------------------------------------------------------------------------
/config.yml:
--------------------------------------------------------------------------------
 1 | exe_root: "/topmed_variant_calling"
 2 | input_expression: "input/{sample_id}.cram"
 3 | batch_size: 1000
 4 | region_size: 10000000
 5 | merge_region_size: 100000
 6 | thresholds:
 7 |   vb_depth: 15
 8 |   freemix: 0.1
 9 |   frac_dp10: 0.9
10 | contigs:
11 |   chr1: 248956422
12 |   chr2: 242193529
13 |   chr3: 198295559
14 |   chr4: 190214555
15 |   chr5: 181538259
16 |   chr6: 170805979
17 |   chr7: 159345973
18 |   chr8: 145138636
19 |   chr9: 138394717
20 |   chr10: 133797422
21 |   chr11: 135086622
22 |   chr12: 133275309
23 |   chr13: 114364328
24 |   chr14: 107043718
25 |   chr15: 101991189
26 |   chr16: 90338345
27 |   chr17: 83257441
28 |   chr18: 80373285
29 |   chr19: 58617616
30 |   chr20: 64444167
31 |   chr21: 46709983
32 |   chr22: 50818468
33 |   chrX: 156040895
34 |   chrY: 57227415
35 |   chrM: 16569
36 | autosome_contigs: [chr1, chr2, chr3, chr4, chr5, chr6, chr7, chr8, chr9, chr10, chr11, chr12, chr13, chr14, chr15, chr16, chr17, chr18, chr19, chr20, chr21, chr22]
37 | sample_ids:
38 |   - "HG00096"
39 |   - "HG00097"
40 |   - "HG00099"
41 | 


--------------------------------------------------------------------------------
/king/WindowsHelper.cpp:
--------------------------------------------------------------------------------
 1 | #include "WindowsHelper.h"
 2 | #ifdef    __WIN32__
 3 | #ifndef   __GNUC__
 4 | #include <dir.h>
 5 | 
 6 | void WildCardArguments(int & argc, char ** & argv)
 7 |    {
 8 |    if (argc < 2) return;
 9 | 
10 |    int  count = 0;
11 |    for (int i = 1; i < argc; i++)
12 |       {
13 |       struct ffblk blk;
14 | 
15 |       int done = findfirst(argv[i], &blk, 0);
16 |       while(!done)
17 |          {
18 |          done = findnext(&blk);
19 |          count++;
20 |          }
21 |       }
22 | 
23 |    char ** new_argv = new char * [count + 1];
24 |    int     new_argc = 1;
25 | 
26 |    new_argv[0] = argv[0];
27 |    for (int i = 1; i < argc; i++)
28 |       {
29 |       struct ffblk blk;
30 | 
31 |       int done = findfirst(argv[i], &blk, 0);
32 |       while (!done && new_argc <= count)
33 |          {
34 |          new_argv[new_argc++] = strdup(blk.ff_name);
35 |          done = findnext(&blk);
36 |          }
37 |       }
38 | 
39 |    argc = new_argc;
40 |    argv = new_argv;
41 |    }
42 | 
43 | #endif
44 | #endif
45 | 
46 | 


--------------------------------------------------------------------------------
/scripts/run-union-sites-local.cmd:
--------------------------------------------------------------------------------
 1 | # out : log/merge
 2 | # list : INTERVAL : index/intervals/b38.intervals.X.10Mb.10Mb.txt
 3 | # var : ROOT : ..
 4 | # var : PREFIX : out/union/union.$INTERVAL$1$_$INTERVAL$2$_$INTERVAL$3$
 5 | # target : $PREFIX$.sites.bcf $PREFIX$.sites.bcf.csi
 6 | # name: example-union
 7 | bash -c 'cat index/seq.batches.by.20.txt | xargs -I {} echo out/union/{}/b{}.$INTERVAL$1$_$INTERVAL$2$_$INTERVAL$3$.merged.sites.bcf > $PREFIX$.bcflist.txt'
 8 | $ROOT$/cramore/cramore vcf-merge-candidate-variants --in-vcf-list $PREFIX$.bcflist.txt --region $INTERVAL$1$:$INTERVAL$2$-$INTERVAL$3$ --out-vcf $PREFIX$.merged.sites.bcf > $PREFIX$.merged.sites.bcf.out 2> $PREFIX$.merged.sites.bcf.err
 9 | $ROOT$/bcftools/bcftools index -f $PREFIX$.merged.sites.bcf
10 | bash -c 'set -o pipefail; $ROOT$/vt-topmed/vt annotate_indels -r resources/ref/hs38DH.fa $PREFIX$.merged.sites.bcf -o + 2> $PREFIX$.annotated.sites.bcf.err | $ROOT$/vt-topmed/vt consolidate_variants + -o $PREFIX$.sites.bcf > $PREFIX$.sites.bcf.out 2> $PREFIX$.bcf.sites.err'
11 | $ROOT$/bcftools/bcftools index -f $PREFIX$.sites.bcf
12 | 


--------------------------------------------------------------------------------
/king/MerlinSort.h:
--------------------------------------------------------------------------------
 1 | ////////////////////////////////////////////////////////////////////// 
 2 | // merlin/MerlinSort.h 
 3 | // (c) 2000-2007 Goncalo Abecasis
 4 | // 
 5 | // This file is distributed as part of the MERLIN source code package   
 6 | // and may not be redistributed in any form, without prior written    
 7 | // permission from the author. Permission is granted for you to       
 8 | // modify this file for your own personal use, but modified versions  
 9 | // must retain this copyright notice and must not be distributed.     
10 | // 
11 | // Permission is granted for you to use this file to compile MERLIN.    
12 | // 
13 | // All computer programs have bugs. Use this file at your own risk.   
14 | // 
15 | // Tuesday December 18, 2007
16 | // 
17 |  
18 | #ifndef __MERLINSORT_H__
19 | #define __MERLINSORT_H__
20 | 
21 | #include "Pedigree.h"
22 | 
23 | // This routine sorts families so that densely genotyped individuals
24 | // appear before those with more missing data, which empirically appears
25 | // to reduce the average size of gene flow trees
26 | //
27 | 
28 | void SortFamilies(Pedigree & ped);
29 | 
30 | #endif
31 |   
32 | 


--------------------------------------------------------------------------------
/king/OLS.h:
--------------------------------------------------------------------------------
 1 | #ifndef __OLS_h__
 2 | #define __OLS_h__
 3 | 
 4 | #include "Pedigree.h"
 5 | #include "IntArray.h"
 6 | #include "MathMatrix.h"
 7 | #include "MathVector.h"
 8 | #include "MathCholesky.h"
 9 | 
10 | class OLS_REGRESSION{
11 |       Matrix MatrixOne;
12 |       Matrix L, Linverse;
13 |       double Q;
14 |       Matrix tMatrix;
15 |       Vector tVector;
16 |    public:
17 |    // Input
18 |       Vector Y;
19 |       Matrix X;
20 | 
21 |    // Output
22 |       int N;         // sample size
23 |       int P;         // # covariates
24 |       int testCount;
25 |       StringArray covariateNames;
26 |       int nuisanceCount;
27 |       Vector beta;   // regression coefficient
28 |       Vector SE;
29 |       Matrix Cov;
30 |       double loglik; // log likelihood
31 |       Vector t_statistic;
32 |       Vector pvalue;
33 |       Vector R2;     // r-square: a Cov(X, Y) / Var(Y)
34 | //      Vector R2_alt; // r-square: a^2 Var(X) / Var(Y)
35 |       bool failure;
36 | 
37 |       OLS_REGRESSION();
38 |       void run();
39 |       void run(Vector y, Matrix X);
40 |       void run(Vector y, Vector X);
41 |       void Print();
42 |       void Print(const char* title);
43 | };
44 | 
45 | #endif
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/scripts/run-batch-genotype-local.cmd:
--------------------------------------------------------------------------------
 1 | # out : log/batch-geno
 2 | # list : BATCH : index/seq.batches.by.20.txt
 3 | # list : INTERVAL : index/intervals/b38.intervals.X.10Mb.10Mb.txt
 4 | # var : ROOT : ..
 5 | # var : PREFIX : out/genotypes/batches/$BATCH$1$/b$BATCH$1$.$INTERVAL$1$_$INTERVAL$2$_$INTERVAL$3$
 6 | # name : example-batch-genotype
 7 | # target : $PREFIX$.genotypes.bcf $PREFIX$.genotypes.bcf.csi
 8 | mkdir -p out/genotypes/batches/$BATCH$1$/
 9 | cut -f 1,20 out/index/list.107.local.crams.vb_xy.index | tail -n +2 | tail -n +$BATCH$1$ | head -n 20 > $PREFIX$.sex_map.txt
10 | cut -f 1,2,5 out/index/list.107.local.crams.vb_xy.index | tail -n +2 | tail -n +$BATCH$1$ | head -n 20 > $PREFIX$.cram_index.txt
11 | bash -c 'set -o pipefail; REF_PATH=resources/ref/md5/%2s/%2s/%s $ROOT$/cramore/cramore dense-genotype --in-cram-list $PREFIX$.cram_index.txt --in-vcf out/union/union.$INTERVAL$1$_$INTERVAL$2$_$INTERVAL$3$.sites.bcf --unit 6000000 --region $INTERVAL$1$:$INTERVAL$2$-$INTERVAL$3$ --sex-map $PREFIX$.sex_map.txt --xLabel chrX --yLabel chrY --xStart 2781479 --xStop 155701383 --print-tmp-info --out $PREFIX$.genotypes.bcf --min-mq 1 > $PREFIX$.genotypes.bcf.out 2> $PREFIX$.genotypes.bcf.err'
12 | $ROOT$/bcftools/bcftools index -f $PREFIX$.genotypes.bcf
13 | 


--------------------------------------------------------------------------------
/king/Genetics.h:
--------------------------------------------------------------------------------
 1 | #ifndef  __GENETICS_H__
 2 | #define  __GENETICS_H__
 3 | 
 4 | #include "Parameters.h"
 5 | 
 6 | // Genetic models
 7 | #define  GM_FREE        0
 8 | #define  GM_RECESSIVE   1
 9 | #define  GM_ADDITIVE    2
10 | #define  GM_DOMINANT    3
11 | 
12 | // Constants for imprinting analysis
13 | #define  I_NONE         0
14 | #define  I_PATERNAL     1
15 | #define  I_MATERNAL     2
16 | #define  I_FULL         3
17 | #define  I_IMPRINTING   4
18 | 
19 | // Constants for special effects
20 | #define SFX_NONE        0
21 | #define SFX_PATERNAL    1
22 | #define SFX_MATERNAL    2
23 | 
24 | class ImprintingParameter : public Parameter
25 |    {
26 |    public:
27 |    ImprintingParameter(char c, char * desc, int & v)
28 |       : Parameter(c, desc, &v)
29 |       {}
30 | 
31 |    virtual void Status();
32 | 
33 |    protected:
34 |       virtual void Translate(char * value);
35 |    };
36 | 
37 | class GeneticModelParameter : public Parameter
38 |    {
39 |    public:
40 |    GeneticModelParameter(char c, char * desc, int & v)
41 |       : Parameter(c, desc, &v)
42 |       {}
43 | 
44 |    virtual void Status();
45 | 
46 |    protected:
47 |       virtual void Translate(char * value);
48 |    };
49 | 
50 | #endif
51 | 
52 | 


--------------------------------------------------------------------------------
/king/rplot.h:
--------------------------------------------------------------------------------
 1 | #ifndef __rplot_h__
 2 | #define __rplot_h__
 3 | 
 4 | #include "IntArray.h"
 5 | #include "MathVector.h"
 6 | 
 7 | void plotMIerror(const char *prefix);
 8 | void plotUniqueFamily(const char *prefix, int degree, const char *analysis);
 9 | void plotDuplicate(const char *prefix);
10 | void plotBuild(const char *prefix);
11 | void plotSplitped(const char *prefix);
12 | void plotCluster(const char *prefix);
13 | void plotGenderError(const char *prefix, IntArray & plotx, Vector & ploty, IntArray & plotz, double xHeterozygosity, int gendererrorCount);
14 | void plotRelationship(const char *prefix);
15 | void plotIBDSeg(const char *prefix);
16 | void plotPopStructure(const char *prefix, int projectFlag);
17 | 
18 | // not released yet
19 | void plotAUCmapping(const char *prefix, int SEXCHR);
20 | void plotNPL(const char *prefix, int SEXCHR);
21 | void plotHEreg(const char *prefix, int SEXCHR);
22 | void plotIBDmapping(const char *prefix, int SEXCHR);
23 | void plotROHmapping(const char *prefix, const char *stratName, int SEXCHR);
24 | void plotROHforQT(const char *prefix, int SEXCHR);
25 | void plotPopROH(const char *prefix, int SEXCHR);
26 | void plotPopDist(const char *prefix);
27 | void plotAncestry(const char *prefix);
28 | 
29 | #endif
30 | 


--------------------------------------------------------------------------------
/king/PedigreeAlleleFreq.h:
--------------------------------------------------------------------------------
 1 | ////////////////////////////////////////////////////////////////////// 
 2 | // libsrc/PedigreeAlleleFreq.h 
 3 | // (c) 2000-2007 Goncalo Abecasis
 4 | // 
 5 | // This file is distributed as part of the MERLIN source code package   
 6 | // and may not be redistributed in any form, without prior written    
 7 | // permission from the author. Permission is granted for you to       
 8 | // modify this file for your own personal use, but modified versions  
 9 | // must retain this copyright notice and must not be distributed.     
10 | // 
11 | // Permission is granted for you to use this file to compile MERLIN.    
12 | // 
13 | // All computer programs have bugs. Use this file at your own risk.   
14 | // 
15 | // Tuesday December 18, 2007
16 | // 
17 |  
18 | #ifndef __ALLELEFREQUENCIES_H__
19 | #define __ALLELEFREQUENCIES_H__
20 | 
21 | #include "Pedigree.h"
22 | 
23 | int  CountAlleles(Pedigree & ped, int marker);
24 | void LumpAlleles(Pedigree & ped, int marker, double threshold, bool reorder);
25 | 
26 | #define FREQ_ALL        0
27 | #define FREQ_FOUNDERS   1
28 | #define FREQ_EQUAL      2
29 | 
30 | // Returns true if frequencies estimated, false if previous information okay
31 | bool EstimateFrequencies(Pedigree & ped, int marker, int estimator);
32 | 
33 | #endif
34 | 
35 | 
36 |  
37 | 


--------------------------------------------------------------------------------
/king/MathLu.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MATH_LU__
 2 | #define __MATH_LU__
 3 | 
 4 | #include "MathMatrix.h"
 5 | #include "MathVector.h"
 6 | #include "IntArray.h"
 7 | 
 8 | class LU
 9 |    {
10 |    public:
11 |       Matrix   lu, inv;
12 |       Vector   x;
13 |       IntArray permutation;
14 |       double   d;
15 | 
16 |    LU() : lu("LU.LU"), x("LU.x"), inv("LU.inv") { }
17 |    ~LU();
18 | 
19 |    // Given a square matrix a, decomposes a permutation of a
20 |    // into an LU product, stored in LU as follows:
21 |    //    Lij = LUij when i > j; 1.0 when i == j; 0.0 otherwise
22 |    //    Uij = LUij when i <= j; 0.0 otherwise
23 |    // permutation[1..n] records the permutation effected by
24 |    // partial pivoting
25 |    // d is output as +1 or -1 depending on whether the number
26 |    // of row interchanges was even or odd
27 |    // (for calculating determinants)
28 |    void Decompose(Matrix & a);
29 | 
30 |    // Solves LU*X = B, taking b as the right hand side vector
31 |    // and storing the solution in x.
32 |    void BackSubst(Vector & b);
33 | 
34 |    // Calculate matrix inverse by backsubstituting basis vectors
35 |    void Invert();
36 | 
37 |    // Calculate determinant
38 |    double Determinant();
39 | 
40 |    // Calculate log of determinant
41 |    double lnDeterminant();
42 | 
43 |    };
44 | 
45 | #endif
46 | 


--------------------------------------------------------------------------------
/king/KinshipX.cpp:
--------------------------------------------------------------------------------
 1 | #include "KinshipX.h"
 2 | 
 3 | void KinshipX::Setup(Family & f)
 4 |    {
 5 |    allPairs.Dimension(f.count, f.count);
 6 | 
 7 |    for (int i = 0; i < f.founders; i++)
 8 |       {
 9 |       bool isMale = f.ped[f.path[i]].sex == SEX_MALE;
10 |       for (int j = 0; j < f.founders; j++)
11 |          allPairs[i][j] = 0.0;
12 |       allPairs[i][i] = isMale ? 1.0 : 0.5;
13 |       }
14 | 
15 |    for (int i = f.founders; i < f.count; i++)
16 |       {
17 |       Person * p = &(f.ped[f.path[i]]);
18 |       int k = p->father->traverse;
19 |       int l = p->mother->traverse;
20 | 
21 |       bool isMale = f.ped[f.path[i]].sex == SEX_MALE;
22 |       allPairs[i][i] = isMale ? 1.0 : 0.5 + allPairs[k][l] * 0.5;
23 | 
24 |       for (int j = 0; j < i; j++)
25 |          if (!p->isMzTwin(f.ped[f.path[j]]))
26 |             allPairs[i][j] = allPairs[j][i] = isMale ?
27 |                allPairs[l][j] : (allPairs[k][j] + allPairs[l][j]) * 0.5;
28 |          else
29 |             allPairs[j][i] = allPairs[i][j] = allPairs[i][i];
30 |       }
31 | 
32 |    fam = &f;
33 |    }
34 | 
35 | double KinshipX::operator() (Person & p1, Person & p2)
36 |    {
37 |    int i = p1.traverse;
38 |    int j = p2.traverse;
39 | 
40 |    return allPairs[i][j];
41 |    }
42 | 
43 | 
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/king/PeelerNodes.h:
--------------------------------------------------------------------------------
 1 | #ifndef __PEELERNODES_H__
 2 | #define __PEELERNODES_H__
 3 | 
 4 | #include "Pedigree.h"
 5 | 
 6 | class PeelerNode
 7 |    {
 8 |    public:
 9 |       ~PeelerNode();
10 | 
11 |    protected:
12 |       static Vector scratch;
13 |    };
14 | 
15 | class MatingNode;
16 | class PersonNode;
17 | 
18 | class PersonNode : public PeelerNode
19 |    {
20 |    public:
21 |       Person * person;
22 | 
23 |       IntArray states;
24 |       Vector   probabilities;
25 | 
26 |       void Clear();
27 | 
28 |       void PeelDescendants(MatingNode * mating, double (*trans) (int, int, int));
29 |       void PeelAncestors(MatingNode * mating, double (*trans) (int, int, int));
30 | 
31 |       double Probability()   { return probabilities.Sum(); }
32 |    };
33 | 
34 | class MatingNode : public PeelerNode
35 |    {
36 |    public:
37 |       IntArray mstates, pstates;
38 |       Vector   probabilities;
39 | 
40 |       PersonNode * father;
41 |       PersonNode * mother;
42 | 
43 |       void Initialize(PersonNode * father, PersonNode * mother);
44 | 
45 |       void PeelFather();
46 |       void PeelMother();
47 |       void PeelOffspring(PersonNode * child, double (*trans) (int, int, int));
48 | 
49 |       double Probability()   { return probabilities.Sum(); }
50 |    };
51 | 
52 | #endif
53 | 
54 | 


--------------------------------------------------------------------------------
/king/OptimizerConstraints.h:
--------------------------------------------------------------------------------
 1 | #ifndef __OPTIMIZER_INTERFACE_H__
 2 | #define __OPTIMIZER_INTERFACE_H__
 3 | 
 4 | #include "MathVector.h"
 5 | #include "IntArray.h"
 6 | 
 7 | class ObjectiveFunction
 8 |    {
 9 |    public:
10 |       virtual ~ObjectiveFunction() { };
11 | 
12 |       virtual double Evaluate(Vector & v) = 0;
13 |    };
14 | 
15 | class OptimizerInterface : public VectorFunc
16 |    {
17 |    public:
18 |       virtual  double Evaluate(Vector & v);
19 | 
20 |       void     Dimension(int parameters);
21 |       int      CountFreeParameters();
22 |       int      CountParameters();
23 | 
24 |       void     ClearConstraints();
25 | 
26 |       void     SetMin(int parameter, double min);
27 |       void     SetMax(int parameter, double max);
28 |       void     SetRange(int parameter, double min, double max);
29 |       void     Fix(int parameter, double value);
30 |       void     ClearConstraints(int parameter);
31 | 
32 |       void     SetObjectiveFunction(ObjectiveFunction & f);
33 | 
34 |       void     Translate(Vector & unconstrained, Vector & constrained);
35 |       void     BackTranslate(Vector & constrained, Vector & unconstrained);
36 | 
37 |    private:
38 |       IntArray constraints;
39 |       Vector   min, max, point;
40 | 
41 |       ObjectiveFunction * f;
42 |    };
43 | 
44 | 
45 | #endif
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/scripts/d13-add-fmis-to-frz9.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 |   
 3 | use strict;
 4 | 
 5 | #my $chr = $ARGV[0];
 6 | 
 7 | my $milkvcf = $ARGV[0]; #"fixed0/milk.filt/milk.chr$chr.merged.sites.vcf.gz";
 8 | my $fmisbcf = $ARGV[1]; #"fixed0/sites.update_info/merged.chr$chr.gtonly.minDP10.update_info.sites.bcf";
 9 | #my $out = "/dev/stdout" #"analysis/filt/ld/frz9/frz9.milk_nold.fmis10.chr$chr.vcf.gz";
10 | 
11 | open(BCF,"$ENV{'EXE_PREFIX'}/bcftools/bcftools view -H $fmisbcf |") || die "Cannot open file\n";
12 | 
13 | open(IN,"zcat $milkvcf |") || die "Cannot open file\n";
14 | open(OUT,"| $ENV{'EXE_PREFIX'}/htslib/bgzip -c") || die "Cannot open file\n";
15 | while(<IN>) {
16 |     print STDERR "Processing $. lines..\n" if ( $. % 1000000 == 0 );
17 |     if ( /^#/ ) {
18 |         print OUT $_;
19 |         if ( /ID=TRIO_CONC_THRES/ ) {
20 |             print OUT "##INFO=<ID=FMIS10,Number=1,Type=Float,Description=\"Fraction of missing genotype at depth 10\">\n";
21 |         }
22 |     }
23 |     else {
24 |         my @F = split;
25 |         my @B = split(/[\t\r\n ]+/,<BCF>);
26 |         next unless ( ( $F[1] eq $B[1] ) || ( $F[3] eq $B[3] ) || ( $F[4] eq $B[4] ) );
27 |         my $fmis = $1 if ( $B[7] =~ /;FMIS=(\S+)/ );
28 |         $F[7] =~ s/;MILK_LRE=/;FMIS10=$fmis;MILK_LRE=/;
29 |         print OUT join("\t",@F)."\n";
30 |     }
31 | }
32 | close OUT;
33 | close IN;
34 | close BCF;
35 | 
36 | #print `tabix -f -pvcf $out`;
37 | 


--------------------------------------------------------------------------------
/king/MathConstant.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MATHCONSTANT_H__
 2 | #define __MATHCONSTANT_H__
 3 | 
 4 | #ifdef  _MSC_VER
 5 | #define _USE_MATH_DEFINES 
 6 | #endif
 7 | 
 8 | #include "math.h"
 9 | #include "stdlib.h"
10 | 
11 | // Constants for numerical routines
12 | //
13 | 
14 | #define TINY    1.0e-30        // A small number
15 | #define ITMAX   200            // Maximum number of iterations
16 | #define EPS     3.0e-7         // Relative accuracy
17 | #define ZEPS    3.0e-10        // Precision around zero
18 | #define FPMIN   1.0e-30        // Number near the smallest representable number
19 | #define FPMAX   1.0e+100       // Number near the largest representable number
20 | #define TOL     1.0e-6         // Zero SVD values below this
21 | #define GOLD    0.61803399     // Golden ratio
22 | #define CGOLD   0.38196601     // Complement of golden ratio
23 | 
24 | inline double square(double a)         { return a * a; }
25 | inline double sign(double a, double b) { return b >= 0 ? fabs(a) : -fabs(a); }
26 | inline double min(double a, double b)  { return a < b ? a : b; }
27 | inline double max(double a, double b)  { return a > b ? a : b; }
28 | 
29 | inline int square(int a)      { return a * a; }
30 | inline int sign(int a, int b) { return b >= 0 ? abs(a) : -abs(a); }
31 | inline int min(int a, int b)  { return a < b ? a : b; }
32 | inline int max(int a, int b)  { return a > b ? a : b; }
33 | 
34 | #endif
35 | 


--------------------------------------------------------------------------------
/king/MathMiser.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MATH_MISER__
 2 | #define __MATH_MISER__
 3 | 
 4 | #include "Random.h"
 5 | #include "MathMatrix.h"
 6 | #include "MathVector.h"
 7 | #include "MathSobol.h"
 8 | 
 9 | // Monte Carlo Samples a user-supplied function in a rectangular volume
10 | // specified by region[2][dim]. The total of ncalls are made to the function
11 | // The integral of the function is returned in trgral and the standard
12 | // deviation of this estimate is in stdev.
13 | //
14 | 
15 | struct MiserStack
16 |    {
17 |    int    points;
18 |    double weight;
19 |    Matrix region;
20 |    };
21 | 
22 | class MathMiser
23 |    {
24 |    public:
25 |       SobolSequence sobol;
26 | 
27 |       long   ncall;
28 | 
29 |       double tgral;
30 |       double stdev;
31 | 
32 |       VectorFunc * vfunc;
33 | 
34 |       MathMiser() : sobol()
35 |          {
36 |          ncall = 1000;
37 |          }
38 | 
39 |       double Integrate(Matrix & region);
40 | 
41 |    protected:
42 |       // local variables for integration
43 |       // are here... to save on new / delete
44 |       // calls
45 | 
46 |       void RandomPoint(Matrix & region, Vector & point);
47 | 
48 |       double func (Vector & v)
49 |          { return vfunc->Evaluate(v); }
50 | 
51 |    private:
52 |       MiserStack stack[32];      // should be good for at least 2^31 points
53 | 
54 |       Vector midpoint, point, minl, minr, maxl, maxr;
55 | 
56 |    };
57 | 
58 | #endif
59 | 
60 | 


--------------------------------------------------------------------------------
/king/Constant.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CONSTANT_H_
 2 | #define _CONSTANT_H_
 3 | 
 4 | #define  COMPAREFUNC (int (*)(const void *, const void *))
 5 | 
 6 | #define  BUFSIZE     1024
 7 | #define  FILENAMELEN 100
 8 | #define  IDLEN       20
 9 | 
10 | #define  SEPARATORS  " \t\n\r\f/"
11 | #define  WHITESPACE  " \t\n\r\f"
12 | 
13 | #define  SWTABLESKIP 9
14 | #define  SWTABLEMAX  10000
15 | 
16 | #define  _NAN_       ((double) (6.66666e-66))
17 | 
18 | #define  QTDTDATA    "qtdt.dat"
19 | #define  QTDTPED     "qtdt.ped"
20 | #define  QTDTIBD     "qtdt.ibd"
21 | #define  QTDTRAW     "regress.tbl"
22 | #define  GENIHDATAIN "genih.dat"
23 | 
24 | #ifndef  __WIN32__
25 | #define  stricmp     strcasecmp
26 | #endif
27 | 
28 | // Constants for older haplotype handling programs
29 | // Constants for HAPLOXT
30 | #define XT_MAX_ALLELES  50          // Maximum alleles for crosstabulation
31 | #define XT_VECTORSIZE   10000       // Total haplotypes in population
32 | #define XT_POOLTRESH    7           // Threshold for pooling rare alleles
33 | // Simwalk Haplotype Vectors
34 | #define HV_MAXSIZE      100         // Haplotypes in single SimWalk pedigree
35 | #define HV_INFOTRESH    75          // Percentage of loci typed
36 | #define HV_STATELENGTH  100         // Markers per haplotype
37 | #define HV_SKIPLINES    4           // lines to skip at bottom of family tree
38 | // Simwalk Summary Files
39 | #define HT_TABLE_SIZE   1000
40 | #define HT_SKIP_LINES   9
41 | 
42 | #endif
43 | 
44 | 


--------------------------------------------------------------------------------
/king/GenotypeLists.h:
--------------------------------------------------------------------------------
 1 | #ifndef __GENOTYPE_ELIMINATION__
 2 | #define __GENOTYPE_ELIMINATION__
 3 | 
 4 | #include "Pedigree.h"
 5 | 
 6 | class GenotypeList
 7 |    {
 8 |    public:
 9 | 
10 |       IntArray allele1, allele2;
11 |       IntArray alleles;
12 | 
13 |       bool ignore;
14 |       int  checked;
15 | 
16 |       GenotypeList();
17 | 
18 |       static bool EliminateGenotypes(Pedigree & ped, Family * family, int marker);
19 | 
20 |       void   Dimension(int genotypes);
21 |       void   Delete(int genotype);
22 | 
23 |       bool   Matches(int genotype, int allele);
24 |       bool   Matches(int allele);
25 | 
26 |       int    SaveGenotype(int genotype);
27 |       void   SetGenotype(int genotype, int al1, int al2);
28 | 
29 |    private:
30 |       static void InitializeList(GenotypeList * list, Pedigree & p, Family * f, int marker);
31 |       static bool PairwiseCheck(GenotypeList * list, Pedigree & p, Family * f);
32 |       static bool FamilyCheck(GenotypeList * list, Pedigree & p, Family * f);
33 | 
34 |       static bool CheckTrio(GenotypeList * list, int fatid, int motid, int child, int i, int j, int k);
35 |       static bool TrimParent(GenotypeList * list, Person & person, int fatid, int motid);
36 |       static bool Cleanup(GenotypeList * list, Person & person, int fatid, int motid, int child, int geno);
37 | 
38 |       static void Print(GenotypeList * List, Pedigree & p, Family * f, int marker);
39 |    };
40 | 
41 | 
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/scripts/run-discovery-local.cmd:
--------------------------------------------------------------------------------
 1 | # out : log/discover
 2 | # list : SMIDX : index/list.107.local.crams.index
 3 | # var : ROOT : ..
 4 | # target : out/sm/$SMIDX$1$/$SMIDX$1$.vb2 out/sm/$SMIDX$1$/$SMIDX$1$.norm.xy out/sm/$SMIDX$1$/$SMIDX$1$.bcf out/sm/$SMIDX$1$/$SMIDX$1$.bcf.csi
 5 | # name: example-discovery
 6 | mkdir -p out/sm/$SMIDX$1$/
 7 | bash -c 'set -o pipefail; REF_PATH=resources/ref/md5/%2s/%2s/%s $ROOT$/samtools/samtools view -uh -T resources/ref/hs38DH.fa $SMIDX$2$ 2> out/sm/$SMIDX$1$/$SMIDX$1$.bcf.samtools_err | $ROOT$/bamUtil/bin/bam clipoverlap --poolSize 100000000 --in -.ubam --out -.ubam 2> out/sm/$SMIDX$1$/$SMIDX$1$.bcf.bamUtil_err | $ROOT$/vt-topmed/vt discover2 -z -q 20 -b + -r resources/ref/hs38DH.fa -s $SMIDX$1$ -o out/sm/$SMIDX$1$/$SMIDX$1$.bcf 2> out/sm/$SMIDX$1$/$SMIDX$1$.bcf.vt_err'
 8 | $ROOT$/bcftools/bcftools index -f out/sm/$SMIDX$1$/$SMIDX$1$.bcf
 9 | REF_PATH=resources/ref/md5/%2s/%2s/%s $ROOT$/cramore/cramore cram-verify-bam --svd resources/ref/HGDP_938.b38.genotypes.svd --sam $SMIDX$2$ --cap-DP 100 --out out/sm/$SMIDX$1$/$SMIDX$1$.vb2 --num-PC 4 > out/sm/$SMIDX$1$/$SMIDX$1$.vb2.stdout 2> out/sm/$SMIDX$1$/$SMIDX$1$.vb2.stderr
10 | $ROOT$/cramore/cramore vcf-normalize-depth --xy --vcf out/sm/$SMIDX$1$/$SMIDX$1$.bcf --known resources/ref/1000G_omni2.5.b38.sites.PASS.vcf.gz --gc resources/ref/hs38DH.gc.w150.s5.gz --xLabel chrX --yLabel chrY --xStart 2781479 --xStop 15570138 --out out/sm/$SMIDX$1$/$SMIDX$1$.norm > out/sm/$SMIDX$1$/$SMIDX$1$.norm.out 2> out/sm/$SMIDX$1$/$SMIDX$1$.norm.err
11 | 


--------------------------------------------------------------------------------
/king/MathSVD.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MATHSVD_H__
 2 | #define __MATHSVD_H__
 3 | 
 4 | #include "MathMatrix.h"
 5 | #include "MathVector.h"
 6 | #include "MathConstant.h"
 7 | 
 8 | // SVD Decomposition
 9 | //
10 | 
11 | class SVD
12 |    {
13 |    // Given a matrix a[1..m][1..n] computes its singular value
14 |    // decomposition, A = U*W*V^T.
15 |    public:
16 |       int         m, n;    // m - no. of rows, n - no. of parameters
17 |       //Matrix      u;       // The matrix U
18 |       double **u;
19 |       Vector      w;       // The diagonal matrix of singular
20 |       double **v;                     // values vector w[1..n]
21 |       //Matrix      v;       // The matrix V (not the transpose V^T)
22 |                            // is output as v[1..n][1..n]
23 | 
24 |       Vector      x;       // The solution vector after backsubstitution
25 | 
26 |       Matrix      cov;     // The covariance matrix for the parameters
27 |                            // obtained by the fit
28 | 
29 |    SVD();
30 |    ~SVD();
31 | 
32 |    void Decompose(Matrix & a, int mp = -1, int np = -1);
33 |    void Edit(double tol = TOL);
34 |    void BackSubst(Vector & b);
35 |    void Covariances();
36 | 
37 |    double RSS(Matrix & M, Vector & b);         // Residual Sum of Squares
38 |    void   Residuals(Matrix & M, Vector & b, Vector & delta); // Residuals
39 | 
40 |    protected:
41 |       void Empty();
42 | 
43 |    private:
44 |       static double pythag(double a, double b);
45 |    };
46 | 
47 | 
48 | #endif
49 | 


--------------------------------------------------------------------------------
/king/MathGold.h:
--------------------------------------------------------------------------------
 1 | #ifndef _MATHGOLD_H_
 2 | #define _MATHGOLD_H_
 3 | 
 4 | #include "MathConstant.h"
 5 | #include "MathVector.h"
 6 | 
 7 | // Minimizes functions of one variable in one dimension
 8 | class ScalarMinimizer
 9 |    {
10 |    public:
11 |       double (*func)(double);         // function to be minimized
12 |       double a, b, c, min;
13 |       double fa, fb, fc, fmin;
14 | 
15 |    ScalarMinimizer() { func = NULL; };
16 |    virtual ~ScalarMinimizer() { }
17 | 
18 |    virtual double f(double x);
19 | 
20 |    void   Bracket(double a, double b);      // bracket a minimum near a and b
21 |    virtual double Brent(double tol = TOL);  // return minimum, to precision TOL
22 |                                             // result stored in min
23 |    };
24 | 
25 | class LineMinimizer : public ScalarMinimizer
26 | // Minimizes f(P) along the line define by P = point + x * line
27 | // Stores the best point (in point) along the line
28 | // and the displacement from the original (in line)
29 |    {
30 |    private:
31 |       bool         garbage;
32 |    public:
33 |       VectorFunc * func;      // function to be minimized
34 |       Vector       line, point, temp;
35 | 
36 |    LineMinimizer();
37 |    LineMinimizer(VectorFunc & vfunc);
38 |    LineMinimizer(double (*vfunc)(Vector & v));
39 | 
40 |    virtual ~LineMinimizer()
41 |       { if (garbage) delete func; }
42 | 
43 |    virtual double f(double x);
44 | 
45 |    virtual double Brent(double tol = TOL);
46 |    };
47 | 
48 | #endif
49 | 
50 | 


--------------------------------------------------------------------------------
/king/MathCholesky.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MATH_CHOLESKY__
 2 | #define __MATH_CHOLESKY__
 3 | 
 4 | #include "MathMatrix.h"
 5 | #include "MathVector.h"
 6 | 
 7 | class Cholesky
 8 |    {
 9 |    public:
10 |       Matrix      L, inv;
11 |       Vector      x;
12 | 
13 |    Cholesky() : L("cholesky.L"), inv("cholesky.inverse"), x("cholesky.x")
14 |       { }
15 | 
16 |    ~Cholesky()
17 |       { }
18 | 
19 |    // Given a symmetric positive definite matrix A finds
20 |    // a lower triangular matrix L such that L * transpose(L) = A
21 |    // Only the upper triangle of A need be given
22 |    void Decompose(Matrix & A);
23 | 
24 |    // If you call fast decompose the upper triangle of U is
25 |    // undefined (as opposed to zero). This is often okay and
26 |    // allows for a little more speed...
27 |    void FastDecompose(Matrix & A);
28 | 
29 |    // Tries to decompose matrix A, returning true on success
30 |    // or zero on failure ... you should also check that
31 |    // determinant is not zero before using results if this
32 |    // is a concern
33 |    bool TryDecompose(Matrix & A);
34 | 
35 |    // solve Y = X b
36 |    void BackSubst(Vector & b);
37 |    void BackSubst0(Vector & b);   
38 |    void Invert();
39 | 
40 |    // determinant functions
41 |    double lnDeterminantL();
42 |    double DeterminantL();
43 | 
44 |    double lnDeterminant()
45 |       {
46 |       return 2 * lnDeterminantL();
47 |       }
48 |    double Determinant()
49 |       {
50 |       double temp = DeterminantL();
51 |       return temp * temp;
52 |       }
53 |    };
54 | 
55 | #endif
56 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:16.04
 2 | 
 3 | COPY . /topmed_variant_calling
 4 | 
 5 | RUN apt-get update && apt-get install -y \
 6 |   apt-utils \
 7 |   automake \
 8 |   autoconf \
 9 |   build-essential \
10 |   git \
11 |   ghostscript \
12 |   gnuplot \
13 |   groff \
14 |   libcurl4-openssl-dev \
15 |   liblzma-dev \
16 |   libncurses5-dev \
17 |   libssl-dev \
18 |   libzstd-dev \
19 |   python3 \
20 |   r-base \
21 |   unzip \
22 |   wget \
23 |   zlib1g-dev
24 | 
25 | RUN mkdir /tmp/plink && cd /tmp/plink && wget http://s3.amazonaws.com/plink1-assets/plink_linux_x86_64_20190617.zip && unzip plink_linux_x86_64_20190617.zip && cp plink /usr/local/bin/plink-1.9
26 | 
27 | WORKDIR /topmed_variant_calling
28 | RUN rm -r /tmp/plink
29 | 
30 | RUN git submodule init && git submodule update 
31 | 
32 | RUN cd libsvm/ && git clean -fdx && make && cd ..
33 | RUN cd apigenome && git clean -fdx && autoreconf -vfi && ./configure --prefix $PWD && make && make install && cd ..
34 | RUN cd libStatGen && git clean -fdx && make && cd ..
35 | RUN cd bamUtil && git clean -fdx && make && cd ..
36 | RUN cd invNorm && git clean -fdx && make && cd ..
37 | RUN cd htslib && git clean -fdx && autoheader && autoconf && ./configure && make && cd ..
38 | RUN cd vt-topmed && git clean -fdx && make && cd ..
39 | RUN cd cramore && git clean -fdx && autoreconf -vfi && ./configure && make && cd ..
40 | RUN cd samtools && git clean -fdx && autoheader && autoconf -Wno-syntax && ./configure && make && cd ..
41 | RUN cd bcftools && git clean -fdx && make && cd ..
42 | RUN cd king && rm -f king *.o && g++ -O3 -c *.cpp && g++ -O3 -o king *.o -lz && cd ..
43 | 
44 | 


--------------------------------------------------------------------------------
/libsvm/COPYRIGHT:
--------------------------------------------------------------------------------
 1 | 
 2 | Copyright (c) 2000-2011 Chih-Chung Chang and Chih-Jen Lin
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions
 7 | are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright
10 | notice, this list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright
13 | notice, this list of conditions and the following disclaimer in the
14 | documentation and/or other materials provided with the distribution.
15 | 
16 | 3. Neither name of copyright holders nor the names of its contributors
17 | may be used to endorse or promote products derived from this software
18 | without specific prior written permission.
19 | 
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 | A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 | 


--------------------------------------------------------------------------------
/king/TDT.h:
--------------------------------------------------------------------------------
 1 | #ifndef __TDT_h__
 2 | #define __TDT_h__
 3 | 
 4 | class TDT:public AssociationAnalysis{
 5 |    public:
 6 |       TDT(Pedigree & pedigree);
 7 |       ~TDT();
 8 |       IntArray MT, MNT, pooT[2], pooNT[2];
 9 |       void SetupGlobals();
10 |       void pre_genome();
11 |       void post_genome();
12 |       void PrintScores();
13 | 
14 |       void Analyze();
15 |       void AnalyzeX();
16 | 
17 |       void TDT_Preparation();
18 |       void TDT_Analysis();
19 |       void GEE_Analysis();
20 |       void WGDT_Analysis();
21 |       void TDT1P_Analysis();
22 |       void GDT_PO_Analysis();
23 |       void GDT_CP_Analysis();
24 |       void GDT_Missing_Analysis();
25 |       void PDT_Analysis();
26 |       void EPDT_Analysis();
27 |       void RDT_Analysis();
28 |       void GDT_Analysis();
29 |       void GDT_hetero_Analysis();
30 |       void FCAT_Analysis();
31 | 
32 |       void GDT_AnalysisX();
33 |       void GDT_PO_AnalysisX();
34 | 
35 |       int CheckTwin();
36 |       IntArray TwinFlag_Fam;
37 |       IntArray TwinFlag_ID;
38 |       void WrongQLS_Analysis();
39 |       void MQLS_Analysis();
40 | 
41 |       char relationship(int i, int j);
42 | //      String relationSet;
43 | 
44 |       // unreleased
45 |       void QLS_Analysis();
46 |       void bQLS_Analysis();
47 |       void tQLS_Analysis();
48 |       void cQLS_Analysis();
49 |       void QLS_hetero_Analysis();
50 |       void LogisticScore_Analysis();
51 |       void GDT_FO_Analysis();
52 |       void GDT_MO_Analysis();
53 | 
54 |       void rareGDT_Analysis();
55 |       void rareEDA_Analysis();
56 | //      void AutosomalCheck();
57 | };
58 | 
59 | #endif
60 | 


--------------------------------------------------------------------------------
/king/GenotypeCompressor.h:
--------------------------------------------------------------------------------
 1 | #ifndef __GENOTYPE_COMPRESSOR_H__
 2 | #define __GENOTYPE_COMPRESSOR_H__
 3 | 
 4 | #ifndef  uchar
 5 | #define uchar        unsigned char
 6 | #endif
 7 | 
 8 | class GenotypeCompressor
 9 |    {
10 |    public:
11 |       static uchar * CompressGenotypes(uchar * genotypes, int n);
12 |       static void    RetrieveGenotypes(uchar * compressed, uchar * genotypes, int n);
13 |       static char *  Describe(uchar * compressed);
14 | 
15 |       static int     MemoryAllocated();
16 |       static int     MemoryInUse();
17 | 
18 |    private:
19 |       static uchar * memoryBlocks[1024];
20 |       static int     blockIndex;
21 |       static int     blockByte;
22 | 
23 |       static void    AllocateBlock();
24 |       static void    AllocateMemory(int size);
25 | 
26 |       static uchar   OddOneOut(uchar a, uchar b, uchar c);
27 |       static uchar   EncodeTriplet(uchar a, uchar b, uchar c);
28 |       static void    DecodeTriplet(uchar triplet, uchar & a, uchar & b, uchar & c);
29 | 
30 |       static void    WRITEBIT(uchar * block, uchar & byte, uchar & mask, int bit)
31 |          {
32 |          if (bit) byte |= mask;
33 |          mask *= 2;
34 |          if (mask == 0)
35 |             {
36 |             block[blockByte++] = byte;
37 |             mask = 1;
38 |             byte = 0;
39 |             }
40 |          }
41 | 
42 |       static bool READBIT(uchar * & input, uchar & mask)
43 |          {
44 |          mask *= 2;
45 | 
46 |          if (mask == 0) mask = 1, input++;
47 | 
48 |          return *input & mask;
49 |          }
50 |    };
51 | 
52 | #endif
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/king/InputFile.cpp:
--------------------------------------------------------------------------------
 1 | ////////////////////////////////////////////////////////////////////// 
 2 | // libsrc/InputFile.cpp 
 3 | // (c) 2000-2007 Goncalo Abecasis
 4 | // 
 5 | // This file is distributed as part of the MERLIN source code package   
 6 | // and may not be redistributed in any form, without prior written    
 7 | // permission from the author. Permission is granted for you to       
 8 | // modify this file for your own personal use, but modified versions  
 9 | // must retain this copyright notice and must not be distributed.     
10 | // 
11 | // Permission is granted for you to use this file to compile MERLIN.    
12 | // 
13 | // All computer programs have bugs. Use this file at your own risk.   
14 | // 
15 | // Tuesday December 18, 2007
16 | // 
17 |  
18 | #include "InputFile.h"
19 | 
20 | #ifdef __ZLIB_AVAILABLE__
21 | 
22 | IFILE::IFILE(const char * filename, const char * mode)
23 |    {
24 |    // Some implementations of zlib will not open files that are
25 |    // larger than 2Gb. To ensure support for large (uncompressed)
26 |    // files, we fall-back on the regular fopen when the initial
27 |    // gzopen call fails and the filename does not end in .gz
28 | 
29 |    gzMode = true;
30 |    gzHandle = gzopen(filename, mode);
31 | 
32 |    if (gzHandle == NULL)
33 |       {
34 |       int lastchar = 0;
35 | 
36 |       while (filename[lastchar] != 0) lastchar++;
37 | 
38 |       if (lastchar >= 3 && filename[lastchar - 3] == '.' &&
39 |                            filename[lastchar - 2] == 'g' &&
40 |                            filename[lastchar - 1] == 'z')
41 |          return;
42 | 
43 |       gzMode = false;
44 |       handle = fopen(filename, mode);
45 |       }
46 |    };
47 | 
48 | #endif
49 |  
50 | 


--------------------------------------------------------------------------------
/king/Matings.cpp:
--------------------------------------------------------------------------------
 1 | #include "Matings.h"
 2 | 
 3 | #define MATING_HASH_PADDING      2     /* Number of empty slots per mating,
 4 |                                           which are used to speed up searching
 5 |                                           at the cost of increased memory use */
 6 | 
 7 | void Matings::ListMatings(Family * family)
 8 |    {
 9 |    founders = family->founders;
10 |    matingCount = 0;
11 | 
12 |    InitializeHash(family->count);
13 | 
14 |    for (int i = family->founders; i < family->nonFounders; i++)
15 |       {
16 |       Person & p = family->ped[family->path[i]];
17 | 
18 |       matingMap[i - family->founders] = LookupMating(p.father->serial, p.mother->serial);
19 |       }
20 |    }
21 | 
22 | void Matings::InitializeHash(int size)
23 |    {
24 |    size *= MATING_HASH_PADDING;
25 | 
26 |    hash.Dimension(size);
27 |    hash.Set(-1);
28 | 
29 |    hashId.Dimension(size);
30 |    hashId.Set(-1);
31 |    }
32 | 
33 | int Matings::LookupMating(int father, int mother)
34 |    {
35 |    int id = father * hash.Length() + mother;
36 |    int h  = father * MATING_HASH_PADDING;
37 | 
38 |    while (true)
39 |       {
40 |       if (hash[h] == -1)
41 |          {
42 |          hashId[h] = id;
43 |          return hash[h] = matingCount++;
44 |          }
45 | 
46 |       if (hashId[h] == id)
47 |          return hash[h];
48 | 
49 |       h++;
50 | 
51 |       if (h == hash.Length()) h = 0;
52 |       }
53 |    }
54 | 
55 | int Matings::LookupMating(Person & p)
56 |    {
57 |    return LookupMating(p.serial - founders);
58 |    }
59 | 
60 | int Matings::LookupMating(int serial)
61 |    {
62 |    return matingMap[serial - founders];
63 |    }
64 | 


--------------------------------------------------------------------------------
/king/MathVegas.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MATHVEGAS_H__
 2 | #define __MATHVEGAS_H__
 3 | 
 4 | #include "MathVector.h"
 5 | #include "MathMatrix.h"
 6 | #include "Random.h"
 7 | 
 8 | #define ALPH   1.5
 9 | #define NDMX   50       // Maximum number of increments for each axis
10 | #define MXDIM  10       // Maximum number of dimensions
11 | 
12 | // Monte-carlo integration of user supplied ndimensional
13 | // function, in a rectangular volume specified by matrix
14 | // Volume[2][ndim], consisting of lower and upper bounds
15 | // itmx iterations each with about ncall function calls
16 | // The sampling grid is refined iteratively. Produces
17 | // the integral tgral, with standard deviation sd, and
18 | // an indicator of integrity chi2a (should be less than 1).
19 | class Vegas
20 |    {
21 |    public:
22 |       int    itmx, ncall;
23 |       double tgral, sd, chi2a;
24 |       static Random rand;
25 |       VectorFunc * vfunc;
26 | 
27 |    Vegas();
28 |    ~Vegas();
29 | 
30 |    double func(Vector & point)
31 |       { return vfunc->Evaluate(point); }
32 | 
33 |    // Three levels of initialization possible
34 |    // 0 - Total reset
35 |    // 1 - Keep Grid, clear Estimates
36 |    // 2 - Keep Grid and Estimates
37 |    // 3 - Do additional iterations, no changes
38 |    void Init(Matrix & Volume, int level = 0);
39 | 
40 |    // Integrate the function
41 |    double Integrate(Matrix & Volume);
42 | 
43 |    private:
44 |       void   Rebin(double rc, Vector & xi);
45 | 
46 |       int    mds, nd, ndo, ng, npg, * ia, * kg;
47 |       double calls, dv2g, dxg, rc;
48 |       double wgt, xjac, xn, xnd, xo, schi, si, swgt;
49 |       Vector dt, dx, r, x, xin;
50 |       Matrix d, di, xi;
51 |    };
52 | 
53 | #endif
54 | 


--------------------------------------------------------------------------------
/king/MathAssoc.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MATH_ASSOC_H__
 2 | #define __MATH_ASSOC_H__
 3 | 
 4 | #include "MathVector.h"
 5 | 
 6 | // Measures of association based on chi-sq
 7 | //
 8 | 
 9 | class AssocChi
10 |    {
11 |    public:
12 |       double sum;          // values scored N
13 |       double chisq;        // chi-square value
14 |       double df;           // degrees freedom df
15 |       double prob;         // significance level p
16 |       double lop;          // -log10 of significance
17 |       double cramrv;       // between 0 and 1 - Cramer's V
18 |       double ccc;          // measure of association - depends on I and J
19 | 
20 |       int isValid;
21 | 
22 |    AssocChi();
23 | 
24 |    void Calc(int ** nn, int ni, int nj);
25 |    };
26 | 
27 | // Measures of Association based on entropy
28 | //
29 | 
30 | class AssocEntropy
31 |    {
32 |    public:
33 |       double   sum;     // values scored N
34 |       double   h;       // entropy of whole table
35 |       double   hx;      // entropy of the x distribution
36 |       double   hy;      // entropy of the y distribution
37 |       double   hygx;    // entropy of y given x
38 |       double   hxgy;    // entropy of x given y
39 |       double   uygx;    // dependency of x on y
40 |       double   uxgy;    // dependency of y on x
41 |       double   uxy;     // symmetrical dependency of x and y
42 | 
43 |       int   isValid;
44 | 
45 |    AssocEntropy();
46 | 
47 |    void Calc(int **nn, int ni, int nj);
48 |    };
49 | 
50 | // Spearman's Rank Correlation
51 | void Spearman(Vector & v1, Vector & v2,
52 |               double & rankD, double & zD, double & probD,
53 |               double & spearmanR, double & probR);
54 | 
55 | #endif
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/king/LongArray.h:
--------------------------------------------------------------------------------
 1 | #ifndef __LONGINTARRAY_H__
 2 | #define __LONGINTARRAY_H__
 3 | 
 4 | #include "LongInt.h"
 5 | 
 6 | class LongArray
 7 |    {
 8 |    private:
 9 |       longint * items;
10 |       int size, count;
11 | 
12 |       void Grow(int new_size);
13 |       static int Compare(int * a, int * b);
14 | 
15 |    public:
16 |       static int alloc;
17 | 
18 |       LongArray(int start_size = 0);
19 |       LongArray(LongArray & source);
20 |       ~LongArray();
21 | 
22 |       LongArray & operator = (const LongArray & rhs);
23 | 
24 |       longint & operator [] (int index) { return items[index]; }
25 | 
26 |       int  Append(longint value);
27 |       void Push(longint value)      { Append(value); }
28 |       longint Pop()                 { return items[--count]; }
29 |       longint Peek() const          { return items[count - 1]; }
30 |       longint &Last() const         { return items[count - 1]; }
31 | 
32 |       int  Delete(int index);
33 |       void InsertAt(int index, longint value);
34 | 
35 |       int  Find(longint value) const;
36 |       void Sort();
37 | 
38 |       void Zero();
39 |       void Set(longint value);
40 | 
41 |       int  Length()                 { return count; }
42 |       void Dimension(int new_count) { Grow(new_count); count = new_count; }
43 |       void Clear()                  { count = 0; }
44 | 
45 |       void Swap(int i, int j)
46 |            { longint tmp = items[i]; items[i] = items[j]; items[j] = tmp; }
47 | 
48 |       void Reverse();
49 | 
50 |       operator longint * () { return items; }
51 | 
52 |       bool operator == (const LongArray & rhs) const;
53 |       bool operator != (const LongArray & rhs) const;
54 | 
55 |       int Hash(int initval);
56 |    };
57 | 
58 | #endif /* __LONGINTARRAY_H */
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/king/MathDeriv.cpp:
--------------------------------------------------------------------------------
 1 | #include "MathDeriv.h"
 2 | #include "MathConstant.h"
 3 | 
 4 | #include <math.h>
 5 | 
 6 | #define   MAXROUNDS      20
 7 | #define   SQRT_HALF      (1.0/M_SQRT2)
 8 | #define   TWO            (M_SQRT2 * M_SQRT2)
 9 | 
10 | double dfunction(double (* func)(double), double x, double h, double & err)
11 |    {
12 |    double a[MAXROUNDS][MAXROUNDS];
13 | 
14 |    // Initial crude estimate
15 |    double result = a[0][0] = ((*func)(x+h) - (*func)(x-h)) / (2.0 * h);
16 | 
17 |    // Initial guess of error is large
18 |    err = 1e30;
19 | 
20 |    // At each round, update Neville tableau with smaller stepsize and higher
21 |    // order extrapolation ...
22 |    for (int i = 1; i < MAXROUNDS; i++)
23 |       {
24 |       // Decrease h
25 |       h *= SQRT_HALF;
26 | 
27 |       // Re-evaluate function
28 |       a[0][i] = ((*func)(x+h) - (*func)(x-h)) / (2.0 * h);
29 | 
30 |       // Calculate extrapolations of various orders ...
31 |       double factor = TWO, error;
32 | 
33 |       for (int j = 1; j <= i; j++)
34 |          {
35 |          a[j][i] = (a[j-1][i] * factor - a[j-1][i-1])/(factor - 1.0);
36 | 
37 |          factor *= TWO;
38 | 
39 |          error = max(fabs(a[j][i] - a[j-1][i]), fabs(a[j][i] - a[j-1][i-1]));
40 | 
41 |          // Did we improve solution?
42 |          if (error < err)
43 |             {
44 |             err = error;
45 |             result = a[j][i];
46 |             }
47 |          }
48 | 
49 |       // Stop if solution is deteriorating ...
50 |       if (fabs(a[i][i] - a[i-1][i-1]) >= 2.0 * err)
51 |          break;
52 |       }
53 | 
54 |    return result;
55 |    }
56 | 
57 | double dfunction(double (* func)(double), double x, double h)
58 |    {
59 |    double err;
60 | 
61 |    return dfunction(func, x, h, err);
62 |    }
63 | 


--------------------------------------------------------------------------------
/king/BasicHash.h:
--------------------------------------------------------------------------------
 1 | #ifndef __BASICHASH_H__
 2 | #define __BASICHASH_H__
 3 | 
 4 | #include <stdlib.h>
 5 | 
 6 | class BasicHash
 7 |    {
 8 |    protected:
 9 |       void          ** objects;
10 |       unsigned int      * keys;
11 |       unsigned int count, size;
12 |       unsigned int        mask;
13 | 
14 |    public:
15 |       BasicHash(int startsize = 32);
16 |       virtual ~BasicHash();
17 | 
18 |       void Grow()    { SetSize(size * 2); }
19 |       void Shrink()  { SetSize(size / 2); }
20 | 
21 |       void SetSize(int newsize);
22 | 
23 |       void Clear();
24 | 
25 |       int  Capacity() const { return size; }
26 |       int  Entries() const  { return count; }
27 | 
28 |       void * Object(int i) const { return objects[i]; }
29 | 
30 |       void SetObject(int i, void * object)
31 |          { objects[i] = object; }
32 | 
33 |       int Add    (int key, void * object = NULL);
34 |       int Find   (int key);
35 |       int Rehash (int key, int h);
36 | 
37 |       BasicHash & operator = (const BasicHash & rhs);
38 | 
39 |       void * operator [] (int i) const { return objects[i]; }
40 | 
41 |       void Delete(unsigned int index);
42 | 
43 |       bool SlotInUse(int index) { return objects[index] != NULL; }
44 | 
45 |    private:
46 |       unsigned int Iterate(unsigned int key) const
47 |          {
48 |          unsigned int h = key & mask;
49 | 
50 |          while (objects[h] != NULL && keys[h] != key)
51 |             h = (h + 1) & mask;
52 | 
53 |          return h;
54 |          }
55 | 
56 |       unsigned int ReIterate(unsigned int key, unsigned int h) const
57 |          {
58 |          h = (h + 1) & mask;
59 | 
60 |          while (objects[h] != NULL && keys[h] != key)
61 |             h = (h + 1) & mask;
62 | 
63 |          return h;
64 |          }
65 |    };
66 | 
67 | #endif
68 | 


--------------------------------------------------------------------------------
/king/VCLinear.h:
--------------------------------------------------------------------------------
 1 | //////////////////////////////////////////////////////////////////////
 2 | // VCLINEAR.h
 3 | // Author: Wei-Min Chen
 4 | // May 13, 2005
 5 | 
 6 | #ifndef _VC_LINEAR_H_
 7 | #define _VC_LINEAR_H_
 8 | 
 9 | #include "Pedigree.h"
10 | #include "IntArray.h"
11 | #include "MathMatrix.h"
12 | #include "MathVector.h"
13 | #include "MathCholesky.h"
14 | #include "VCGEE.h"
15 | 
16 | class GEEVC_LINEAR:public GEE{
17 | protected:
18 |    virtual void RefreshOD(int f);
19 | public:
20 |    double H2;
21 |    double seH2;
22 |    double totalVariance;
23 |    double stat;
24 |    double LOD;
25 |    double pvalue;
26 |    IntArray personValid;
27 | 
28 |    Matrix * varianceComponents;
29 |    Matrix PhiX;
30 |    Matrix PhiM;
31 |    GEEVC_LINEAR(Pedigree & pedigree);
32 |    ~GEEVC_LINEAR();
33 | 
34 |    void init();
35 |    void InitCoef();
36 |    virtual void summary();
37 |    void print();
38 |    double residual(int p);
39 | 
40 |    int trait;
41 |    IntArray mCovariate;
42 | };
43 | 
44 | class POLY:public GEEVC_LINEAR{
45 | protected:
46 |    void RefreshO(int f);
47 |    void RefreshOD(int f);
48 |    int StopRule();
49 | public:
50 |    void InitCoef();
51 |    POLY(Pedigree & pedigree):GEEVC_LINEAR(pedigree){}
52 |    ~POLY(){}
53 | };
54 | 
55 | class GEEVC_LINKAGE:public GEEVC_LINEAR{
56 | protected:
57 |    void RefreshO(int f);
58 | public:
59 |    double h2;
60 |    Vector *ibd;
61 |    void InitCoef();
62 |    void summary();
63 |    GEEVC_LINKAGE(Pedigree & pedigree):GEEVC_LINEAR(pedigree){ibd=NULL;}
64 |    ~GEEVC_LINKAGE(){if(ibd) delete []ibd;}
65 | };
66 | 
67 | class GEEVC_ASSOC:public GEEVC_LINKAGE{
68 | public:
69 |    Vector IBS;
70 |    void InitCoef();
71 |    void summary();
72 |    GEEVC_ASSOC(Pedigree & pedigree):GEEVC_LINKAGE(pedigree){/*IBS=new Vector[ped.familyCount];*/}
73 |    ~GEEVC_ASSOC(){/*if(IBS) delete []IBS;*/}
74 | };
75 | 
76 | #endif
77 | 


--------------------------------------------------------------------------------
/king/Genetics.cpp:
--------------------------------------------------------------------------------
 1 | #include "Genetics.h"
 2 | #include "Error.h"
 3 | 
 4 | #include <stdio.h>
 5 | 
 6 | void ImprintingParameter::Status()
 7 |    {
 8 |    char * msg;
 9 | 
10 |    switch (* (int *) var)
11 |       {
12 |       case I_NONE: msg = "NOT MODELLED"; break;
13 |       case I_MATERNAL: msg = "MATERNAL"; break;
14 |       case I_PATERNAL: msg = "PATERNAL"; break;
15 |       case I_FULL: msg = "FULLY MODELLED"; break;
16 |       case I_IMPRINTING: msg = "TEST IMPRINTING"; break;
17 |       }
18 | 
19 |    printf("%30s : %15s (-%c[+|-|f|i|m|p])\n", description, msg, ch);
20 |    }
21 | 
22 | void ImprintingParameter::Translate(char * value)
23 |    {
24 |    switch (tolower(*value))
25 |       {
26 |       case '-' : * (int *) var = I_NONE; break;
27 |       case 'm' : * (int *) var = I_MATERNAL; break;
28 |       case 'p' : * (int *) var = I_PATERNAL; break;
29 |       case 'f' :
30 |       case '+' : 
31 |       case  0  : * (int *) var = I_FULL; break;
32 |       case 'i' : * (int *) var = I_IMPRINTING; break;
33 |       default  : warning("unknown parameter %c%s\n", ch, value);
34 |       };
35 |    }
36 | 
37 | void GeneticModelParameter::Status()
38 |    {
39 |    char * msg;
40 | 
41 |    switch (* (int *) var)
42 |       {
43 |       case GM_FREE: msg = "FREE"; break;
44 |       case GM_RECESSIVE: msg = "RECESSIVE"; break;
45 |       case GM_ADDITIVE: msg = "ADDITIVE"; break;
46 |       case GM_DOMINANT: msg = "DOMINANT"; break;
47 |       }
48 | 
49 |    printf("%30s : %15s (-%c[a|d|f|r])\n", description, msg, ch);
50 |    }
51 | 
52 | void GeneticModelParameter::Translate(char * value)
53 |    {
54 |    switch (tolower(*value))
55 |       {
56 |       case 'a' : * (int *) var = GM_ADDITIVE; break;
57 |       case 'd' : * (int *) var = GM_DOMINANT; break;
58 |       case 'f' : * (int *) var = GM_FREE; break;
59 |       case 'r' : * (int *) var = GM_RECESSIVE; break;
60 |       default  : warning("unknown parameter %c%s\n", ch, value);
61 |       };
62 |    }
63 | 
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/king/Kinship.cpp:
--------------------------------------------------------------------------------
 1 | #include "Kinship.h"
 2 | 
 3 | #define MAX_TABLE    850
 4 | 
 5 | void Kinship::Setup(Family & f)
 6 |    {
 7 |    int count    = f.count > MAX_TABLE    ? MAX_TABLE : f.count;
 8 |    int founders = f.founders > MAX_TABLE ? MAX_TABLE : f.founders;
 9 | 
10 |    allPairs.Dimension(count, count);
11 | 
12 |    for (int i = 0; i < founders; i++)
13 |       {
14 |       for (int j = 0; j < founders; j++)
15 |          allPairs[i][j] = 0.0;
16 |       allPairs[i][i] = 0.5;
17 |       }
18 | 
19 |    for (int i = founders; i < count; i++)
20 |       {
21 |       Person * p = &(f.ped[f.path[i]]);
22 |       int k = p->father->traverse;
23 |       int l = p->mother->traverse;
24 | 
25 |       for (int j = 0; j < i; j++)
26 |          if (!p->isMzTwin(f.ped[f.path[j]]))
27 |             allPairs[i][j] = allPairs[j][i] =
28 |                (allPairs[k][j] + allPairs[l][j]) * 0.5;
29 |          else
30 |             allPairs[j][i] = allPairs[i][j] = 0.5 + allPairs[k][l] * 0.5;
31 | 
32 |       allPairs[i][i] = 0.5 + allPairs[k][l] * 0.5;
33 |       }
34 | 
35 |    fam = &f;
36 |    }
37 | 
38 | double Kinship::operator() (Person & p1, Person & p2)
39 |    {
40 |    int i = p1.traverse;
41 |    int j = p2.traverse;
42 | 
43 |    if (i >= MAX_TABLE || j >= MAX_TABLE)
44 |       {
45 |       if (p1.isFounder() && p2.isFounder())
46 |          return 0.0;
47 | 
48 |       if (i == j || p1.isMzTwin(p2))
49 |          return 0.5 + (*this)(*p1.father, *p1.mother) * 0.5;
50 | 
51 |       if (i < j)
52 |          return 0.5 * ((*this)(*p2.father, p1) + (*this)(*p2.mother, p1));
53 |       else
54 |          return 0.5 * ((*this)(*p1.father, p2) + (*this)(*p1.mother, p2));
55 |       }
56 | 
57 |    return allPairs[i][j];
58 |    }
59 | 
60 | bool Kinship::isInbred()
61 |    {
62 |    for (int i=0; i < allPairs.rows; i++)
63 |       if (allPairs[i][i] != 0.5)
64 |          return true;
65 | 
66 |    for (int i=allPairs.rows; i < fam->count; i++)
67 |       if ((*this)(fam->ped[fam->path[i]], fam->ped[fam->path[i]]) != 0.5)
68 |          return true;
69 | 
70 |    return false;
71 |    }
72 | 
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/king/MathStats.h:
--------------------------------------------------------------------------------
 1 | #ifndef _MATHSTATS_H_
 2 | #define _MATHSTATS_H_
 3 | 
 4 | #include "MathVector.h"
 5 | #include "MathMatrix.h"
 6 | 
 7 | // Normal distribution functions
 8 | //
 9 | double ndist (double x, bool upper = true);
10 | 
11 | // ninv(p) calculates X such that p = P(x >= X) for std normal dist
12 | //
13 | double ninv ( double p );
14 | 
15 | // Chi-Sq distribution function
16 | // P(Chi>=X) for v degrees of freedom
17 | //
18 | double chidist(double x, double v);
19 | double chidist(double x, double v, double ncp);
20 | 
21 | // F distribution function
22 | // P(F>=x) for v1 and v2 degrees freedom
23 | //
24 | double fdist(double x, double v1, double v2);
25 | 
26 | // P(T>=x) for v degrees freedom
27 | double tdist(double x, double v);
28 | 
29 | // Gamma distribution utility functions
30 | // (required for the chi-sq distribution)
31 | //
32 | 
33 | double erff (double x);             // the error function
34 | double erffc(double x);             // the complementary error function
35 | double erfcc(double x);             // heuristic version of erffc
36 | double gammln ( double xx );        // return the value of ln ( gamma ( xx ) ) | xx > 0
37 | double gammp ( double a, double x);    // return the incomplete gamma function P(a,x)
38 | double gammq ( double a, double x);    // return the incomplete gamma function Q(a,x) = 1 - P(a,x)
39 | 
40 | // Estimates P(a,x) by its series representation and gammln(a)
41 | void gser ( double * gamser, double a, double x, double * gln);
42 | // Estimates Q(a,x) by its continued fraction representation and gammln(a)
43 | void gcf ( double * gammcf, double a, double x, double * gln);
44 | 
45 | // Beta distribution utility functions
46 | //
47 | double betai(double a, double b, double x);     // Returns the incomplete
48 |                                                 // beta function Ix(a,b)
49 | double betacf(double a, double b, double x);    // Evaluates continued fraction
50 |                                                 // for incomplete beta function
51 |                                                 // by modified Lentz's method
52 | 
53 | // Rapid approximation to the sqrt for integers
54 | //
55 | 
56 | int introot(int n);
57 | 
58 | #endif
59 | 
60 | 


--------------------------------------------------------------------------------
/king/PedigreeFamily.h:
--------------------------------------------------------------------------------
 1 | ////////////////////////////////////////////////////////////////////// 
 2 | // libsrc/PedigreeFamily.h 
 3 | // (c) 2000-2007 Goncalo Abecasis
 4 | // 
 5 | // This file is distributed as part of the MERLIN source code package   
 6 | // and may not be redistributed in any form, without prior written    
 7 | // permission from the author. Permission is granted for you to       
 8 | // modify this file for your own personal use, but modified versions  
 9 | // must retain this copyright notice and must not be distributed.     
10 | // 
11 | // Permission is granted for you to use this file to compile MERLIN.    
12 | // 
13 | // All computer programs have bugs. Use this file at your own risk.   
14 | // 
15 | // Tuesday December 18, 2007
16 | // 
17 |  
18 | #ifndef __PEDFAMILY_H__
19 | #define __PEDFAMILY_H__
20 | 
21 | #include "PedigreeAlleles.h"
22 | #include "PedigreePerson.h"
23 | #include "StringBasics.h"
24 | 
25 | class Pedigree;
26 | 
27 | class Family
28 |    {
29 |    public:
30 |       Pedigree & ped;
31 |       String   famid;
32 |       int      serial;
33 |       int      first, last;    // sentinel family members
34 |       int      count;          // number of individuals in pedigree
35 |       int      founders;       // number of founders in pedigree
36 |       int      nonFounders;    // number of non-founders in pedigree
37 |       int      mzTwins;        // number of MZ twins, excluding 1st twin in set
38 |       int      * path;         // traverses the pedigree so that ancestors
39 |                                // preceed their descendants
40 | 
41 |       int      generations;    // Rough classification as:
42 |                                //  1 -- all individuals are unrelated
43 |                                //  2 -- two generations (inc. multiple couples)
44 |                                //  3 -- three or more generations
45 | 
46 |       bool   isNuclear()
47 |          { return (generations == 2) && (founders == 2); }
48 | 
49 |       Family(Pedigree & ped, int top, int bottom, int serial = 0);
50 |       ~Family();
51 | 
52 |       int  ConnectedGroups(IntArray * groupMembership = NULL);
53 | 
54 |    private:
55 |       void ShowInvalidCycles();
56 | 
57 |      Family & operator = (Family & rhs);
58 | //      void Mark(int who, int group, IntArray * stack, IntArray & group_id );
59 |    };
60 | 
61 | #endif
62 | 
63 |  
64 | 


--------------------------------------------------------------------------------
/king/PedigreeDescription.h:
--------------------------------------------------------------------------------
 1 | ////////////////////////////////////////////////////////////////////// 
 2 | // libsrc/PedigreeDescription.h 
 3 | // (c) 2000-2007 Goncalo Abecasis
 4 | // 
 5 | // This file is distributed as part of the MERLIN source code package   
 6 | // and may not be redistributed in any form, without prior written    
 7 | // permission from the author. Permission is granted for you to       
 8 | // modify this file for your own personal use, but modified versions  
 9 | // must retain this copyright notice and must not be distributed.     
10 | // 
11 | // Permission is granted for you to use this file to compile MERLIN.    
12 | // 
13 | // All computer programs have bugs. Use this file at your own risk.   
14 | // 
15 | // Tuesday December 18, 2007
16 | // 
17 |  
18 | #ifndef __PEDDESCRIBE_H__
19 | #define __PEDDESCRIBE_H__
20 | 
21 | #include "PedigreeGlobals.h"
22 | #include "PedigreePerson.h"
23 | #include "StringArray.h"
24 | #include "IntArray.h"
25 | 
26 | #include <stdio.h>
27 | 
28 | // Possible pedigree columns
29 | #define  pcSkip      0
30 | #define  pcMarker    1
31 | #define  pcTrait     2
32 | #define  pcAffection 3
33 | #define  pcCovariate 4
34 | #define  pcZygosity  5
35 | #define  pcEnd       6
36 | 
37 | // Undocumented pedigree column types -- not recommended
38 | #define  pcUndocumentedTraitCovariate   1001  
39 | 
40 | class PedigreeDescription : public PedigreeGlobals
41 |    {
42 |    public:
43 |       int      columnCount;
44 |       IntArray columns, columnHash;
45 | 
46 |       PedigreeDescription();
47 |       ~PedigreeDescription();
48 | 
49 |       void Load(IFILE & Input, bool warnIfLinkage = false);
50 |       void Load(const char * filename, bool warnIfLinkage = false);
51 | 
52 |       void LoadLinkageDataFile(IFILE & input);
53 |       void LoadLinkageDataFile(const char * filename);
54 | 
55 |       void LoadMendelDataFile(IFILE & input);
56 |       void LoadMendelDataFile(const char * filename);
57 | 
58 |       void LoadMap(IFILE & Input);
59 |       void LoadMap(const char * filename);
60 | 
61 |       PedigreeDescription & operator = (PedigreeDescription & rhs);
62 | 
63 |       int CountTextColumns();
64 | 
65 |       // returns a string summarizing column contents
66 |       const char * ColumnSummary(String & string);
67 | 
68 |       // Flag specifying Mendel format
69 |       bool mendelFormat;
70 | 
71 |       String filename;
72 | 
73 |    private:
74 |       int ReadLineHelper(IFILE & input, String & buffer, StringArray & tokens);
75 | 
76 |       int CountColumns(int type);
77 |       void UpdateSummary(String & string, int type, const char * label);
78 |    };
79 | 
80 | #endif
81 | 
82 |  
83 | 


--------------------------------------------------------------------------------
/king/VCGEE.h:
--------------------------------------------------------------------------------
 1 | //////////////////////////////////////////////////////////////////////
 2 | // GEE.h
 3 | // Author: Wei-Min Chen
 4 | // Oct 10, 2004
 5 | 
 6 | #ifndef __VCGEE_H__
 7 | #define __VCGEE_H__
 8 | 
 9 | #include "Pedigree.h"
10 | #include "IntArray.h"
11 | #include "MathMatrix.h"
12 | #include "MathVector.h"
13 | #include "MathCholesky.h"
14 | 
15 | class GEE{
16 | protected:
17 |    Vector delta;
18 |    Vector delta2;
19 |    Matrix W2, B[5];
20 | 
21 |    virtual void GetPhi(int f);
22 | 
23 |    Vector SEvariances, SEvariances_R;
24 | 
25 |    Matrix DVD;
26 |    Vector SEcoef, SEcoef_R;
27 |    Matrix CovCoef;
28 |    Matrix CovCoef_R;
29 |    Cholesky chol;
30 |    Matrix D;            // D for GEE
31 |    Matrix Omega;        // variace-covariance matrix of trait
32 |    Matrix OmegaInv;
33 |    Matrix *OD;
34 |    Matrix Phi;
35 |    Matrix Delta;
36 |    int parCount;        // number of variance components
37 |    int coefCount;       // number of regression coefficients
38 |    int size;            // size of score
39 | 
40 |    inline int Index(int u, int v){
41 |       if(u==v) return u;
42 |       else if(u<v) return size + v*(v-1)/2 + u;
43 |       else return size + u*(u-1)/2 + v;
44 |    }
45 |    virtual void RefreshO(int f){}
46 |    virtual void RefreshOD(int f){}
47 |    void Refresh(int f);
48 |    virtual void InitCoef(){}
49 |    virtual void summary(){}
50 |    virtual int constraint();
51 |    virtual int StopRule();
52 | public:
53 |    Matrix covariance;   // variance-covariance matrix of vc
54 |    Matrix covariance_R;
55 | 
56 |    FILE *polyfp;
57 |    String prefix;
58 |    bool moreFlag;
59 |    bool polyFlag;
60 |    bool saturatedMean;
61 |    Vector meanPerFamily;
62 |    int ValidFamilies;
63 |    int ValidPersons;
64 |    IntArray isNuclear;
65 |    IntArray nuclearP1;
66 |    IntArray nuclearP2;
67 | 
68 |    Vector *traits;
69 |    Matrix *covariates;
70 | 
71 |    double deltaScale;
72 |    int AtBorder;
73 |    IntArray borderIndex;
74 |    Matrix newDVD;
75 | 
76 |    Vector coef;         // regression coefficients
77 |    Vector variances;    // variance components
78 |    IntArray * pheno;
79 | 
80 |    int LoopCount;
81 |    double Epsilon;
82 |    double loglik;
83 | 
84 | //   inline Matrix OmegaInverse(Matrix & M);
85 | //   Matrix BlockInverse(Matrix & M, int blockCount, int extra);
86 | //   void Block2Inverse(Matrix & M);
87 | 
88 | //   Matrix sProductPhi(Matrix & M);
89 |    Pedigree & ped;
90 |    GEE(Pedigree & pedigree);
91 |    virtual void print(){}
92 |    virtual void solve();
93 |    virtual ~GEE();
94 | };
95 | 
96 | 
97 | #endif
98 | 


--------------------------------------------------------------------------------
/king/IBD.h:
--------------------------------------------------------------------------------
  1 | #ifndef __IBD_H__
  2 | #define __IBD_H__
  3 | 
  4 | #include "Pedigree.h"
  5 | 
  6 | #include <stdio.h>
  7 | 
  8 | class IBD
  9 |    {
 10 |    public:
 11 |       double p0, p1, p2;
 12 | 
 13 |    IBD()
 14 |       { p0 = p1 = p2 = 0.0; }
 15 |    IBD(double zero, double one, double two)
 16 |       { p0 = zero; p1 = one; p2 = two; }
 17 | 
 18 |    void defaultSib()
 19 |       { p0 = p2 = 0.25; p1 = 0.5; }
 20 | 
 21 |    void defaultSelf()
 22 |       { p0 = p1 = 0; p2 = 1.0; }
 23 | 
 24 |    void defaultUnrelated()
 25 |       { p0 = 1.0; p1 = p2 = 0.0; }
 26 | 
 27 |    void defaultFounderOffspring()
 28 |       { p0 = p2 = 0.0; p1 = 1.0; }
 29 | 
 30 |    double expected()
 31 |       { return 0.5 * p1 + p2; }
 32 | 
 33 |    bool isValid()
 34 |       { return (p0 + p1 + p2) == 1.0; }
 35 | 
 36 |    IBD & operator = (IBD & rhs)
 37 |       { p0 = rhs.p0;
 38 |         p1 = rhs.p1;
 39 |         p2 = rhs.p2;
 40 |         return (*this); }
 41 | 
 42 |    bool operator == (IBD & rhs);
 43 |    bool operator != (IBD & rhs);
 44 | 
 45 |    IBD * SimpleIBD(int marker, Person & p1, Person & p2);
 46 |    };
 47 | 
 48 | struct IBDKey
 49 |    {
 50 |    int serialLo;
 51 |    int serialHi;
 52 | 
 53 |    void SelectPair(Person & p1, Person & p2);
 54 |    };
 55 | 
 56 | struct IBDPair
 57 |    {
 58 |    int   serialLo;
 59 |    int   serialHi;
 60 |    IBD   ibd;
 61 | 
 62 |    void Assign(IBDKey & key, IBD & i)
 63 |       {
 64 |       serialLo = key.serialLo;
 65 |       serialHi = key.serialHi;
 66 |       ibd = i;
 67 |       }
 68 |    };
 69 | 
 70 | class IBDList
 71 |    {
 72 |    public:
 73 |       IBDPair * list;
 74 |       int       size, count;
 75 | 
 76 |       IBDList();
 77 |       ~IBDList();
 78 | 
 79 |       IBD * Lookup(Person & p1, Person & p2);
 80 |       void  Append(Person & p1, Person & p2, IBD & ibd);
 81 |       void  Sort(Pedigree & ped);
 82 |       bool  IsRangeEmpty(int low, int high);
 83 | 
 84 |    private:
 85 |       void Grow();
 86 |    };
 87 | 
 88 | class IBDTable
 89 |    {
 90 |    public:
 91 |       IBDList * markers;
 92 | 
 93 |       IBDTable();
 94 |       ~IBDTable();
 95 | 
 96 |       void Load(Pedigree & ped, FILE * f);
 97 |       void Load(Pedigree & ped, const char * filename);
 98 |       void Load(Pedigree & ped, const char * filename, Vector & LocusMap);
 99 | 
100 |       IBD * Lookup(int marker, Person & p1, Person & p2);
101 | 
102 |       bool  HaveFamily(int marker, Family * f);
103 | 
104 |       bool isEmpty()
105 |          { return markers == NULL; }
106 |    };
107 | 
108 | 
109 | #endif
110 | 


--------------------------------------------------------------------------------
/king/MathSobol.cpp:
--------------------------------------------------------------------------------
 1 | #include "MathSobol.h"
 2 | #include "Random.h"
 3 | #include "Error.h"
 4 | 
 5 | #include "stdlib.h"
 6 | 
 7 | int SobolSequence::poly_degrees[POLY_COUNT] =
 8 |    { 1,  2,  3,  3,  4,  4,  5,  5,  5,  5,  5,  5,
 9 |      6,  6,  6,  6,  6,  6,  7,  7,  7,  7,  7,  7,
10 |      7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7
11 |    };
12 | 
13 | int SobolSequence::poly_integers[POLY_COUNT] =
14 |    {  0, 1,   1,  2,  1,  4,  2,  4,  7, 11, 13, 14,
15 |       1, 13, 16, 19, 22, 25,  1,  4,  7,  8, 14, 19,
16 |      21, 28, 31, 32, 37, 41, 42, 50, 55, 56, 59, 62
17 |    };
18 | 
19 | SobolSequence::SobolSequence()
20 |    {
21 |    bits = NULL;
22 |    }
23 | 
24 | SobolSequence::~SobolSequence()
25 |    {
26 |    if (bits != NULL) delete [] bits;
27 |    }
28 | 
29 | void SobolSequence::Init(int dimensions)
30 |    {
31 |    if (dimensions > POLY_COUNT)
32 |       numerror("Sobol sequences of > %d dimensions not supported", POLY_COUNT);
33 | 
34 |    x.Dimension(dim = dimensions);
35 |    x.Set(0);
36 |    bits = new IntArray[SOBOL_BITS];
37 | 
38 |    for (int i = 0; i < SOBOL_BITS; i++)
39 |       bits[i].Dimension(dim);
40 | 
41 |    unsigned long seed = 0;
42 | 
43 |    for (int k = 0; k < dim; k++)
44 |       {
45 |       int degrees = poly_degrees[k];
46 | 
47 |       for (int j = 0; j < degrees; j++)
48 |          // initialize the 0 to kth bit as random odd number <= 2^j - 1
49 |          // and apply a left shift by SOBOL_BITS - j - 1
50 |          {
51 |          bits[j][k] = (RAND(seed) % (1 << j) * 2 | 1);
52 |          bits[j][k] <<= (SOBOL_BITS - j - 1);
53 |          }
54 | 
55 |       for (int j = degrees; j < SOBOL_BITS; j++)
56 |          // Fill in the remaining values using recurrence
57 |          {
58 |          long poly = poly_integers[k];
59 | 
60 |          long i = bits[j - degrees][k];
61 |          i ^= (i >> poly_degrees[k]);
62 | 
63 |          for (int l = j - degrees + 1; l < j; l++)
64 |             {
65 |             if (poly & 1) i ^= bits[l][k];
66 |             poly >>= 1;
67 |             }
68 | 
69 |          bits[j][k] = i;
70 |          }
71 |       }
72 |    counter = 0;
73 |    }
74 | 
75 | Vector & SobolSequence::Next(Vector & point)
76 |    {
77 |    long  i = counter, bit;
78 | 
79 |    for (bit = 0; bit < SOBOL_BITS; bit++)
80 |       {
81 |       if (!(i & 1)) break;
82 |       i >>= 1;
83 |       }
84 | 
85 |    if (bit == SOBOL_BITS) numerror("SobolSequence is too short");
86 | 
87 |    for (int k = 0; k < dim; k++)
88 |       {
89 |       x[k] ^= bits[bit][k];
90 |       point[k] = x[k] * SOBOL_FACTOR;
91 |       }
92 | 
93 |    counter++;
94 | 
95 |    return point;
96 |    }
97 | 
98 | 
99 | 


--------------------------------------------------------------------------------
/scripts/run-paste-genotype-local.cmd:
--------------------------------------------------------------------------------
 1 | # out : log/paste-geno
 2 | # list : INTERVAL : index/intervals/b38.intervals.X.10Mb.1Mb.txt
 3 | # var : ROOT : ..
 4 | # var : PREFIX : $INTERVAL$1$/merged.$INTERVAL$1$_$INTERVAL$4$_$INTERVAL$5$
 5 | # name : example-paste-genotype
 6 | # target : out/genotypes/merged/$PREFIX$.genotypes.bcf out/genotypes/merged/$PREFIX$.genotypes.bcf.csi out/genotypes/minDP0/$PREFIX$.gtonly.minDP0.bcf out/genotypes/minDP0/$PREFIX$.gtonly.minDP0.bcf.csi out/genotypes/minDP10/$PREFIX$.gtonly.minDP10.bcf out/genotypes/minDP10/$PREFIX$.gtonly.minDP10.bcf.csi out/genotypes/hgdp/$PREFIX$.gtonly.minDP0.hgdp.bcf out/genotypes/hgdp/$PREFIX$.gtonly.minDP0.hgdp.bcf.csi
 7 | mkdir -p out/genotypes/merged/$INTERVAL$1$/
 8 | mkdir -p out/genotypes/minDP0/$INTERVAL$1$/
 9 | mkdir -p out/genotypes/minDP10/$INTERVAL$1$/
10 | mkdir -p out/genotypes/hgdp/$INTERVAL$1$/
11 | cut -f 1,20 out/index/list.107.local.crams.vb_xy.index | tail -n +2 > out/genotypes/merged/$PREFIX$.sex_map.txt
12 | cat index/seq.batches.by.20.txt | xargs -I {} echo 'out/genotypes/batches/{}/b{}.$INTERVAL$1$_$INTERVAL$2$_$INTERVAL$3$.genotypes.bcf' > out/genotypes/merged/$PREFIX$.bcflist.txt
13 | $ROOT$/cramore/cramore vcf-paste-calls --vcf-list out/genotypes/merged/$PREFIX$.bcflist.txt --num-pc 0 --sex-map out/genotypes/merged/$PREFIX$.sex_map.txt --xLabel chrX --yLabel chrY --mtLabel chrM --xStart 2781479 --xStop 155701383 --skip-tmp-info --region $INTERVAL$1$:$INTERVAL$4$-$INTERVAL$5$ --out out/genotypes/merged/$PREFIX$.genotypes.bcf > out/genotypes/merged/$PREFIX$.genotypes.bcf.out 2> out/genotypes/merged/$PREFIX$.genotypes.bcf.err
14 | $ROOT$/bcftools/bcftools index -f out/genotypes/merged/$PREFIX$.genotypes.bcf
15 | $ROOT$/cramore/cramore vcf-squeeze --in out/genotypes/merged/$PREFIX$.genotypes.bcf --minDP 0 --out out/genotypes/minDP0/$PREFIX$.gtonly.minDP0.bcf > out/genotypes/minDP0/$PREFIX$.gtonly.minDP0.bcf.out 2> out/genotypes/minDP0/$PREFIX$.gtonly.minDP0.bcf.err
16 | $ROOT$/bcftools/bcftools index -f out/genotypes/minDP0/$PREFIX$.gtonly.minDP0.bcf
17 | $ROOT$/cramore/cramore vcf-squeeze --in out/genotypes/merged/$PREFIX$.genotypes.bcf --minDP 10 --out out/genotypes/minDP10/$PREFIX$.gtonly.minDP10.bcf > out/genotypes/minDP10/$PREFIX$.gtonly.minDP10.bcf.out 2> out/genotypes/minDP10/$PREFIX$.gtonly.minDP10.bcf.err
18 | $ROOT$/bcftools/bcftools index -f out/genotypes/minDP10/$PREFIX$.gtonly.minDP10.bcf
19 | $ROOT$/cramore/cramore vcf-extract --vcf out/genotypes/minDP0/$PREFIX$.gtonly.minDP0.bcf --site resources/ref/HGDP_938.hg38.sites.vcf.gz --out out/genotypes/hgdp/$PREFIX$.gtonly.minDP0.hgdp.bcf > out/genotypes/hgdp/$PREFIX$.gtonly.minDP0.hgdp.bcf.out 2>  out/genotypes/hgdp/$PREFIX$.gtonly.minDP0.hgdp.bcf.err
20 | $ROOT$/bcftools/bcftools index -f out/genotypes/hgdp/$PREFIX$.gtonly.minDP0.hgdp.bcf
21 | 


--------------------------------------------------------------------------------
/king/PeelerNodes.cpp:
--------------------------------------------------------------------------------
 1 | #include "PeelerNodes.h"
 2 | 
 3 | Vector PeelerNode::scratch;
 4 | 
 5 | PeelerNode::~PeelerNode()
 6 |    {
 7 |    }
 8 | 
 9 | void PersonNode::PeelAncestors(MatingNode * mating, double (* trans)(int, int, int))
10 |    {
11 |    for (int j = 0; j < states.Length(); j++)
12 |       {
13 |       double p = 0.0;
14 | 
15 |       for (int i = 0; i < mating->mstates.Length(); i++)
16 |          p += trans(mating->father->states[mating->pstates[i]],
17 |                     mating->mother->states[mating->mstates[i]],
18 |                     states[j]) * mating->probabilities[i];
19 | 
20 |       probabilities[j] *= p;
21 |       }
22 |    }
23 | 
24 | void PersonNode::PeelDescendants(MatingNode * mating, double (* trans)(int, int, int))
25 |    {
26 |    scratch.Dimension(states.Length());
27 |    scratch.Zero();
28 | 
29 |    IntArray & index = person->sex == SEX_MALE ? mating->pstates : mating->mstates;
30 | 
31 |    for (int i = 0; i < index.Length(); i++)
32 |       scratch[index[i]] += mating->probabilities[i];
33 | 
34 |    for (int j = 0; j < states.Length(); j++)
35 |       probabilities[j] *= scratch[j];
36 |    }
37 | 
38 | void PersonNode::Clear()
39 |    {
40 |    states.Clear();
41 |    probabilities.Clear();
42 |    }
43 | 
44 | void MatingNode::PeelFather()
45 |    {
46 |    for (int j = 0; j < pstates.Length(); j++)
47 |       probabilities[j] *= father->probabilities[pstates[j]];
48 |    }
49 | 
50 | void MatingNode::PeelMother()
51 |    {
52 |    for (int j = 0; j < mstates.Length(); j++)
53 |       probabilities[j] *= mother->probabilities[mstates[j]];
54 |    }
55 | 
56 | void MatingNode::PeelOffspring(PersonNode * child, double (*trans) (int, int, int))
57 |    {
58 |    for (int i = pstates.Length() - 1; i >= 0; i--)
59 |       {
60 |       double p = 0.0;
61 | 
62 |       for (int j = 0; j < child->states.Length(); j++)
63 |          p += trans(pstates[i], mstates[i], child->states[j]) *
64 |               child->probabilities[j];
65 | 
66 |       if (p > 0.0)
67 |          probabilities[i] *= p;
68 |       else
69 |          mstates.Delete(i),
70 |          pstates.Delete(i),
71 |          probabilities.Delete(i);
72 |       }
73 |    }
74 | 
75 | void MatingNode::Initialize(PersonNode * father, PersonNode * mother)
76 |    {
77 |    mstates.Dimension(father->states.Length() * mother->states.Length());
78 |    pstates.Dimension(mstates.Length());
79 | 
80 |    probabilities.Dimension(mstates.Length());
81 |    probabilities.Set(1.0);
82 | 
83 |    for (int i = 0; i < father->states.Length(); i++)
84 |       for (int j = 0; j < mother->states.Length(); j++)
85 |          pstates[i] = father->states[i],
86 |          mstates[j] = mother->states[j];
87 |    }
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/libsvm/tools/easy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | import os
 5 | from subprocess import *
 6 | 
 7 | if len(sys.argv) <= 1:
 8 | 	print('Usage: {0} training_file [testing_file]'.format(sys.argv[0]))
 9 | 	raise SystemExit
10 | 
11 | # svm, grid, and gnuplot executable files
12 | 
13 | is_win32 = (sys.platform == 'win32')
14 | if not is_win32:
15 | 	svmscale_exe = "../svm-scale"
16 | 	svmtrain_exe = "../svm-train"
17 | 	svmpredict_exe = "../svm-predict"
18 | 	grid_py = "./grid.py"
19 | 	gnuplot_exe = "/usr/bin/gnuplot"
20 | else:
21 |         # example for windows
22 | 	svmscale_exe = r"..\windows\svm-scale.exe"
23 | 	svmtrain_exe = r"..\windows\svm-train.exe"
24 | 	svmpredict_exe = r"..\windows\svm-predict.exe"
25 | 	gnuplot_exe = r"c:\tmp\gnuplot\bin\pgnuplot.exe"
26 | 	grid_py = r".\grid.py"
27 | 
28 | assert os.path.exists(svmscale_exe),"svm-scale executable not found"
29 | assert os.path.exists(svmtrain_exe),"svm-train executable not found"
30 | assert os.path.exists(svmpredict_exe),"svm-predict executable not found"
31 | assert os.path.exists(gnuplot_exe),"gnuplot executable not found"
32 | assert os.path.exists(grid_py),"grid.py not found"
33 | 
34 | train_pathname = sys.argv[1]
35 | assert os.path.exists(train_pathname),"training file not found"
36 | file_name = os.path.split(train_pathname)[1]
37 | scaled_file = file_name + ".scale"
38 | model_file = file_name + ".model"
39 | range_file = file_name + ".range"
40 | 
41 | if len(sys.argv) > 2:
42 | 	test_pathname = sys.argv[2]
43 | 	file_name = os.path.split(test_pathname)[1]
44 | 	assert os.path.exists(test_pathname),"testing file not found"
45 | 	scaled_test_file = file_name + ".scale"
46 | 	predict_test_file = file_name + ".predict"
47 | 
48 | cmd = '{0} -s "{1}" "{2}" > "{3}"'.format(svmscale_exe, range_file, train_pathname, scaled_file)
49 | print('Scaling training data...')
50 | Popen(cmd, shell = True, stdout = PIPE).communicate()	
51 | 
52 | cmd = '{0} -svmtrain "{1}" -gnuplot "{2}" "{3}"'.format(grid_py, svmtrain_exe, gnuplot_exe, scaled_file)
53 | print('Cross validation...')
54 | f = Popen(cmd, shell = True, stdout = PIPE).stdout
55 | 
56 | line = ''
57 | while True:
58 | 	last_line = line
59 | 	line = f.readline()
60 | 	if not line: break
61 | c,g,rate = map(float,last_line.split())
62 | 
63 | print('Best c={0}, g={1} CV rate={2}'.format(c,g,rate))
64 | 
65 | cmd = '{0} -c {1} -g {2} "{3}" "{4}"'.format(svmtrain_exe,c,g,scaled_file,model_file)
66 | print('Training...')
67 | Popen(cmd, shell = True, stdout = PIPE).communicate()
68 | 
69 | print('Output model: {0}'.format(model_file))
70 | if len(sys.argv) > 2:
71 | 	cmd = '{0} -r "{1}" "{2}" > "{3}"'.format(svmscale_exe, range_file, test_pathname, scaled_test_file)
72 | 	print('Scaling testing data...')
73 | 	Popen(cmd, shell = True, stdout = PIPE).communicate()	
74 | 
75 | 	cmd = '{0} "{1}" "{2}" "{3}"'.format(svmpredict_exe, scaled_test_file, model_file, predict_test_file)
76 | 	print('Testing...')
77 | 	Popen(cmd, shell = True).communicate()	
78 | 
79 | 	print('Output prediction: {0}'.format(predict_test_file))
80 | 


--------------------------------------------------------------------------------
/libsvm/tools/checkdata.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | #
  4 | # A format checker for LIBSVM
  5 | #
  6 | 
  7 | #
  8 | # Copyright (c) 2007, Rong-En Fan
  9 | #
 10 | # All rights reserved.
 11 | #
 12 | # This program is distributed under the same license of the LIBSVM package.
 13 | # 
 14 | 
 15 | from sys import argv, exit
 16 | import os.path
 17 | 
 18 | def err(line_no, msg):
 19 | 	print("line {0}: {1}".format(line_no, msg))
 20 | 
 21 | # works like float() but does not accept nan and inf
 22 | def my_float(x):
 23 | 	if x.lower().find("nan") != -1 or x.lower().find("inf") != -1:
 24 | 		raise ValueError
 25 | 
 26 | 	return float(x)
 27 | 
 28 | def main():
 29 | 	if len(argv) != 2:
 30 | 		print("Usage: {0} dataset".format(argv[0]))
 31 | 		exit(1)
 32 | 
 33 | 	dataset = argv[1]
 34 | 
 35 | 	if not os.path.exists(dataset):
 36 | 		print("dataset {0} not found".format(dataset))
 37 | 		exit(1)
 38 | 
 39 | 	line_no = 1
 40 | 	error_line_count = 0
 41 | 	for line in open(dataset, 'r'):
 42 | 		line_error = False
 43 | 
 44 | 		# each line must end with a newline character
 45 | 		if line[-1] != '\n':
 46 | 			err(line_no, "missing a newline character in the end")
 47 | 			line_error = True
 48 | 
 49 | 		nodes = line.split()
 50 | 
 51 | 		# check label
 52 | 		try:
 53 | 			label = nodes.pop(0)
 54 | 			
 55 | 			if label.find(',') != -1:
 56 | 				# multi-label format
 57 | 				try:
 58 | 					for l in label.split(','):
 59 | 						l = my_float(l)
 60 | 				except:
 61 | 					err(line_no, "label {0} is not a valid multi-label form".format(label))
 62 | 					line_error = True
 63 | 			else:
 64 | 				try:
 65 | 					label = my_float(label)
 66 | 				except:
 67 | 					err(line_no, "label {0} is not a number".format(label))
 68 | 					line_error = True
 69 | 		except:
 70 | 			err(line_no, "missing label, perhaps an empty line?")
 71 | 			line_error = True
 72 | 
 73 | 		# check features
 74 | 		prev_index = -1
 75 | 		for i in range(len(nodes)):
 76 | 			try:
 77 | 				(index, value) =  nodes[i].split(':')
 78 | 
 79 | 				index = int(index)
 80 | 				value = my_float(value)
 81 | 
 82 | 				# precomputed kernel's index starts from 0 and LIBSVM
 83 | 				# checks it. Hence, don't treat index 0 as an error.
 84 | 				if index < 0:
 85 | 					err(line_no, "feature index must be positive; wrong feature {0}".format(nodes[i]))
 86 | 					line_error = True
 87 | 				elif index < prev_index:
 88 | 					err(line_no, "feature indices must be in an ascending order, previous/current features {0} {1}".format(nodes[i-1], nodes[i]))
 89 | 					line_error = True
 90 | 				prev_index = index
 91 | 			except:
 92 | 				err(line_no, "feature '{0}' not an <index>:<value> pair, <index> integer, <value> real number ".format(nodes[i]))
 93 | 				line_error = True
 94 | 
 95 | 		line_no += 1
 96 | 
 97 | 		if line_error:
 98 | 			error_line_count += 1
 99 | 	
100 | 	if error_line_count > 0:
101 | 		print("Found {0} lines with error.".format(error_line_count))
102 | 		return 1
103 | 	else:
104 | 		print("No error.")
105 | 		return 0
106 | 
107 | if __name__ == "__main__":
108 | 	exit(main())
109 | 


--------------------------------------------------------------------------------
/king/TraitTransformations.cpp:
--------------------------------------------------------------------------------
  1 | #include "TraitTransformations.h"
  2 | #include "QuickIndex.h"
  3 | #include "MathStats.h"
  4 | 
  5 | void InverseNormalTransform(Pedigree & ped)
  6 |    {
  7 |    Vector     phenotypes;
  8 |    IntArray   individuals;
  9 |    QuickIndex index;
 10 | 
 11 |    phenotypes.Dimension(ped.count);
 12 |    individuals.Dimension(ped.count);
 13 | 
 14 |    for (int trait = 0; trait < ped.traitCount; trait++)
 15 |       {
 16 |       phenotypes.Dimension(0);
 17 |       individuals.Dimension(0);
 18 | 
 19 |       for (int i = 0; i < ped.count; i++)
 20 |          if (ped[i].traits[trait] != _NAN_)
 21 |             {
 22 |             phenotypes.Push(ped[i].traits[trait]);
 23 |             individuals.Push(i);
 24 |             }
 25 | 
 26 |       int count = individuals.Length();
 27 | 
 28 |       if (count == 0) continue;
 29 | 
 30 |       index.Index(phenotypes);
 31 | 
 32 |       double scale = 1.0 / count;
 33 | 
 34 |       for (int i = 0, j; i < index.Length(); i++)
 35 |          {
 36 |          for (j = i; j + 1 < index.Length(); j++)
 37 |             if (ped[individuals[index[i]]].traits[trait] !=
 38 |                 ped[individuals[index[j]]].traits[trait] )
 39 |                 break;
 40 | 
 41 |          if (ped[individuals[index[i]]].traits[trait] !=
 42 |              ped[individuals[index[j]]].traits[trait] )
 43 |              j--;
 44 | 
 45 |          double z = ninv(((i + j) * 0.5 + 0.5) * scale);
 46 | 
 47 |          for (int k = i; k <= j; k++)
 48 |             ped[individuals[index[k]]].traits[trait] = z;
 49 | 
 50 |          i = j;
 51 |          }
 52 |       }
 53 |    }
 54 | 
 55 | void InverseNormalTransform(Pedigree & ped, int trait)
 56 |    {
 57 |    Vector     phenotypes;
 58 |    IntArray   individuals;
 59 |    QuickIndex index;
 60 | 
 61 |    phenotypes.Dimension(ped.count);
 62 |    phenotypes.Dimension(0);
 63 | 
 64 |    individuals.Dimension(ped.count);
 65 |    individuals.Dimension(0);
 66 | 
 67 |    for (int i = 0; i < ped.count; i++)
 68 |       if (ped[i].traits[trait] != _NAN_)
 69 |          {
 70 |          phenotypes.Push(ped[i].traits[trait]);
 71 |          individuals.Push(i);
 72 |          }
 73 | 
 74 |    int count = individuals.Length();
 75 | 
 76 |    if (count == 0) return;
 77 | 
 78 |    index.Index(phenotypes);
 79 | 
 80 |    double scale = 1.0 / count;
 81 | 
 82 |    for (int i = 0, j; i < index.Length(); i++)
 83 |       {
 84 |       for (j = i; j + 1 < index.Length(); j++)
 85 |          if (ped[individuals[index[i]]].traits[trait] !=
 86 |              ped[individuals[index[j]]].traits[trait] )
 87 |              break;
 88 | 
 89 |       if (ped[individuals[index[i]]].traits[trait] !=
 90 |           ped[individuals[index[j]]].traits[trait] )
 91 |           j--;
 92 | 
 93 |       double z = ninv(((i + j) * 0.5 + 0.5) * scale);
 94 | 
 95 |       for (int k = i; k <= j; k++)
 96 |          ped[individuals[index[k]]].traits[trait] = z;
 97 | 
 98 |       i = j;
 99 |       }
100 |    }
101 | 
102 | 
103 | 
104 | 


--------------------------------------------------------------------------------
/king/LongArray.cpp:
--------------------------------------------------------------------------------
  1 | #include "LongArray.h"
  2 | #include "Hash.h"
  3 | #include "Sort.h"
  4 | 
  5 | #include <string.h>
  6 | 
  7 | int LongArray::alloc = 4;
  8 | 
  9 | LongArray::LongArray(int start_size)
 10 |    {
 11 |    count = start_size;
 12 |    size = (count + alloc) / alloc * alloc;
 13 |    items = new longint [size];
 14 |    }
 15 | 
 16 | LongArray::LongArray(LongArray & source)
 17 |    {
 18 |    count = source.count;
 19 |    size = source.size;
 20 |    items = new longint [size];
 21 | 
 22 |    for (int i = 0; i < count; i++)
 23 |       items[i] = source.items[i];
 24 |    }
 25 | 
 26 | LongArray::~LongArray()
 27 |    {
 28 |    delete [] items;
 29 |    }
 30 | 
 31 | void LongArray::Grow(int new_size)
 32 |    {
 33 |    if (new_size > size)
 34 |       {
 35 |       if ((new_size >> 1) >= size)
 36 |          size = (new_size + alloc) / alloc * alloc;
 37 |       else
 38 |          {
 39 |          size = alloc;
 40 |          while (size <= new_size)
 41 |             size *= 2;
 42 |          }
 43 | 
 44 |       longint * new_items = new longint [size];
 45 |       for (int i = 0; i < count; i++)
 46 |          new_items[i] = items[i];
 47 |       delete [] items;
 48 |       items = new_items;
 49 |       }
 50 |    }
 51 | 
 52 | int LongArray::Append(longint value)
 53 |    {
 54 |    Grow(count + 1);
 55 |    items[count++] = value;
 56 |    return count;
 57 |    }
 58 | 
 59 | void LongArray::Set(longint value)
 60 |    {
 61 |    for (int i = 0; i < count; i++)
 62 |       items[i] = value;
 63 |    }
 64 | 
 65 | int LongArray::Delete(int index)
 66 |    {
 67 |    count--;
 68 |    if (count - index)
 69 |       memmove(items + index, items + index + 1, sizeof(longint) * (count - index));
 70 |    return count;
 71 |    }
 72 | 
 73 | void LongArray::InsertAt(int index, longint value)
 74 |    {
 75 |    Grow(count + 1);
 76 |    memmove(items + index + 1, items + index, sizeof(longint) * (count - index));
 77 |    items[index] = value;
 78 |    count++;
 79 |    }
 80 | 
 81 | LongArray & LongArray::operator = (const LongArray & rhs)
 82 |    {
 83 |    Grow(rhs.count);
 84 |    count = rhs.count;
 85 |    for (int i = 0; i < count; i++)
 86 |       items[i] = rhs.items[i];
 87 |    return *this;
 88 |    }
 89 | 
 90 | int LongArray::Find(longint value) const
 91 |    {
 92 |    for (int i = 0; i < count; i++)
 93 |       if (value == items[i])
 94 |          return i;
 95 |    return -1;
 96 |    }
 97 | 
 98 | void LongArray::Zero()
 99 |    {
100 |    for (int i = 0; i < count; i++)
101 |       items[i] = 0;
102 |    }
103 | 
104 | void LongArray::Reverse()
105 |    {
106 |    for (int i = 0, j = count - 1; i < j; i++, j--)
107 |       Swap(i, j);
108 |    }
109 | 
110 | bool LongArray::operator == (const LongArray & rhs) const
111 |    {
112 |    if (count != rhs.count)
113 |       return false;
114 | 
115 |    for (int i = 0; i < rhs.count; i++)
116 |       if (items[i] != rhs.items[i])
117 |          return false;
118 | 
119 |    return true;
120 |    }
121 | 
122 | bool LongArray::operator != (const LongArray & rhs) const
123 |    {
124 |    return !(*this == rhs);
125 |    }
126 | 
127 | int LongArray::Hash(int initval)
128 |    {
129 |    return hash((unsigned char *) items, sizeof(longint) * count, initval);
130 |    }
131 | 


--------------------------------------------------------------------------------
/king/FortranFormat.h:
--------------------------------------------------------------------------------
  1 | ////////////////////////////////////////////////////////////////////// 
  2 | // libsrc/FortranFormat.h 
  3 | // (c) 2000-2007 Goncalo Abecasis
  4 | // 
  5 | // This file is distributed as part of the MERLIN source code package   
  6 | // and may not be redistributed in any form, without prior written    
  7 | // permission from the author. Permission is granted for you to       
  8 | // modify this file for your own personal use, but modified versions  
  9 | // must retain this copyright notice and must not be distributed.     
 10 | // 
 11 | // Permission is granted for you to use this file to compile MERLIN.    
 12 | // 
 13 | // All computer programs have bugs. Use this file at your own risk.   
 14 | // 
 15 | // Tuesday December 18, 2007
 16 | // 
 17 |  
 18 | #ifndef __FORTRAN_FORMAT__
 19 | #define __FORTRAN_FORMAT__
 20 | 
 21 | #include "StringBasics.h"
 22 | #include "IntArray.h"
 23 | 
 24 | class FortranFormat
 25 |    {
 26 |    public:
 27 |       // This class reads a user specified input file, one line at a time,
 28 |       // and returns individual fields according to a user specified format
 29 |       // statement
 30 |       FortranFormat();
 31 | 
 32 |       // Set the fortran format statement
 33 |       void SetFormat(const String & formatString);
 34 | 
 35 |       // Set the input file
 36 |       void SetInputFile(IFILE & file);
 37 | 
 38 |       // Read one field from input file
 39 |       void GetNextField(String & field);
 40 |       int  GetNextInteger();
 41 |       char GetNextCharacter();
 42 | 
 43 |       // Process a token in format statement and return true
 44 |       // if token corresponds to input field. Return false if
 45 |       // token led to processing of white-space or input line
 46 |       // positioning
 47 |       bool ProcessToken(String & field);
 48 | 
 49 |       // Flush the pattern -- this finishes processing the current
 50 |       // pattern and ensures that all trailing new-lines, etc. are
 51 |       // handled correctly
 52 |       void Flush();
 53 | 
 54 |    private:
 55 |       // The input line and current position along it
 56 |       String inputLine;
 57 |       int inputPos;
 58 | 
 59 |       // The Fortran format statement and current position along it
 60 |       String format;
 61 |       int formatPos;
 62 | 
 63 |       // The position of the pattern we are repeating, if any
 64 |       int repeatCount;
 65 | 
 66 |       // Returns an integer from the current format statement, if any
 67 |       int GetIntegerFromFormat();
 68 | 
 69 |       // These functions check the next character in format string
 70 |       bool DigitFollows();
 71 |       bool CharacterFollows();
 72 | 
 73 |       // This function finish the input field
 74 |       void FinishField(bool haveSlash = false);
 75 | 
 76 |       // Reject width were appropriate
 77 |       void RejectWidth(char type);
 78 | 
 79 |       // The input file
 80 |       IFILE input;
 81 | 
 82 |       // Stacks to keep track of nested parenthesis
 83 |       IntArray bracketStack;
 84 |       IntArray bracketCount;
 85 |       IntArray bracketCounter;
 86 | 
 87 |       int lastBracket;
 88 |       int lastCount;
 89 | 
 90 |       // Buffer for reading fields
 91 |       String buffer;
 92 | 
 93 |       // Flag that indicates whether we have reached end-of-pattern
 94 |       bool   endOfPattern;
 95 |    };
 96 | 
 97 | #endif
 98 | 
 99 | 
100 |  
101 | 


--------------------------------------------------------------------------------
/king/PedigreePerson.h:
--------------------------------------------------------------------------------
  1 | #ifndef __PEDPERSON_H__
  2 | #define __PEDPERSON_H__
  3 | 
  4 | #include "Constant.h"
  5 | #include "PedigreeAlleles.h"
  6 | #include "PedigreeGlobals.h"
  7 | #include "StringArray.h"
  8 | #include "IntArray.h"
  9 | 
 10 | #define  SEX_MALE       1
 11 | #define  SEX_FEMALE     2
 12 | #define  SEX_UNKNOWN    0
 13 | 
 14 | class Person : public PedigreeGlobals
 15 |    {
 16 |    public:
 17 |       String      famid;
 18 |       String      pid;
 19 |       String      motid;
 20 |       String      fatid;
 21 |       int         sex;
 22 |       int         zygosity;
 23 |       int         serial, traverse;
 24 | 
 25 |       Alleles *   markers;
 26 | //      double *    traits;
 27 |       Vector traits;
 28 |       char *      affections;
 29 | //      double *    covariates;
 30 |       Vector covariates;
 31 | 
 32 |       Person *    father;
 33 |       Person *    mother;
 34 | 
 35 |       int         sibCount;
 36 |       Person **   sibs;
 37 | 
 38 |       int         ngeno;
 39 | 
 40 |       bool        filter;
 41 | 
 42 |       Person();
 43 |       ~Person();
 44 | 
 45 |       bool isHalfSib(Person & sib)
 46 |          {
 47 |          return hasBothParents &&
 48 |             ((sib.father == father) ^ (sib.mother == mother));
 49 |          }
 50 | 
 51 |       bool isSib(Person & sib)
 52 |          {
 53 |          return hasBothParents &&
 54 |                 (sib.father == father) && (sib.mother == mother);
 55 |          }
 56 | 
 57 |       bool isTwin(Person & twin)
 58 |          {
 59 |          return (zygosity != 0) && (zygosity == twin.zygosity) && isSib(twin);
 60 |          }
 61 | 
 62 |       bool isMzTwin(Person & mzTwin)
 63 |          {
 64 |          return (zygosity & 1) && (zygosity == mzTwin.zygosity) && isSib(mzTwin);
 65 |          }
 66 | 
 67 |       // Check that both parents or none are available
 68 |       // Verify that fathers are male and mothers are female
 69 |       bool CheckParents();
 70 | 
 71 |       // Assess status before using quick diagnostics functions
 72 |       void AssessStatus();
 73 | 
 74 |       // Quick diagnostics
 75 |       bool isFounder()
 76 |          { return !hasBothParents; }
 77 |       bool isSexed()
 78 |          { return sex != 0; }
 79 |       bool isGenotyped(int m)
 80 |          { return markers[m].isKnown(); }
 81 |       bool isFullyGenotyped()
 82 |          { return ngeno == markerCount; }
 83 |       bool isControlled(int c)
 84 |          { return covariates[c] != _NAN_; }
 85 |       bool isFullyControlled()
 86 |          { return hasAllCovariates; }
 87 |       bool isPhenotyped(int t)
 88 |          { return traits[t] != _NAN_; }
 89 |       bool isFullyPhenotyped()
 90 |          { return hasAllTraits; }
 91 |       bool isDiagnosed(int a)
 92 |          { return affections[a] != 0; }
 93 |       bool isFullyDiagnosed()
 94 |          { return hasAllAffections; }
 95 |       bool haveData();
 96 |       bool isAncestor(Person * descendant);
 97 | 
 98 |       int GenotypedMarkers();
 99 | 
100 |       static void Order(Person * & p1, Person * & p2);
101 | 
102 |       void Copy(Person & rhs);
103 |       void CopyIDs(Person & rhs);
104 |       void CopyPhenotypes(Person & rhs);
105 |       void WipePhenotypes(bool remove_genotypes = true);
106 | 
107 |    private:
108 | 
109 |       bool hasAllCovariates, hasAllTraits,
110 |            hasAllAffections, hasBothParents;
111 |    };
112 | 
113 | #endif
114 | 
115 | 
116 | 
117 | 
118 | 


--------------------------------------------------------------------------------
/singularity.def:
--------------------------------------------------------------------------------
  1 | Bootstrap: library
  2 | From: ubuntu:16.04
  3 | 
  4 | %files
  5 |   .git /topmed_variant_calling/.git
  6 |   .gitmodules /topmed_variant_calling/.gitmodules
  7 |   apigenome /topmed_variant_calling/apigenome
  8 |   bamUtil /topmed_variant_calling/bamUtil
  9 |   bcftools /topmed_variant_calling/bcftools
 10 |   cramore /topmed_variant_calling/cramore
 11 |   htslib /topmed_variant_calling/htslib
 12 |   invNorm /topmed_variant_calling/invNorm
 13 |   king /topmed_variant_calling/king
 14 |   libStatGen /topmed_variant_calling/libStatGen
 15 |   libsvm /topmed_variant_calling/libsvm
 16 |   samtools /topmed_variant_calling/samtools
 17 |   scripts /topmed_variant_calling/scripts
 18 |   vt-topmed /topmed_variant_calling/vt-topmed
 19 | 
 20 | %environment
 21 |   export LC_ALL=C
 22 | 
 23 | %post
 24 |   set -eu
 25 | 
 26 |   apt-get update && apt-get install -y \
 27 |     apt-utils \
 28 |     automake \
 29 |     autoconf \
 30 |     build-essential \
 31 |     cmake \
 32 |     default-jre \
 33 |     default-jdk \
 34 |     gdb \
 35 |     git \
 36 |     ghostscript \
 37 |     gnuplot \
 38 |     groff \
 39 |     libcurl4-gnutls-dev \
 40 |     liblzma-dev \
 41 |     libncurses5-dev \
 42 |     libssl-dev \
 43 |     libzstd-dev \
 44 |     python3 \
 45 |     r-base \
 46 |     wget \
 47 |     zlib1g-dev
 48 | 
 49 |   cd /topmed_variant_calling
 50 | 
 51 |   git submodule init
 52 |   git submodule update
 53 | 
 54 |   git clone https://github.com/samtools/htslib htslib-1.13
 55 |   cd htslib-1.13
 56 |   git checkout 1.13
 57 |   git submodule update --init --recursive
 58 |   autoreconf -i
 59 |   ./configure --disable-libcurl
 60 |   make
 61 |   cd ..
 62 | 
 63 |   mkdir plink
 64 |   cd plink/
 65 |   wget http://s3.amazonaws.com/plink1-assets/plink_linux_x86_64_20190617.zip
 66 |   unzip plink_linux_x86_64_20190617.zip
 67 |   rm plink_linux_x86_64_20190617.zip
 68 |   install -T plink /usr/local/bin/plink-1.9
 69 |   cd ..
 70 | 
 71 |   wget https://sourceforge.net/projects/snpeff/files/snpEff_v4_3t_core.zip/download
 72 |   unzip download
 73 |   rm download
 74 |   rm -r clinEff/
 75 | 
 76 |   cd libsvm/
 77 |   make clean
 78 |   make
 79 |   cd ..
 80 | 
 81 |   cd apigenome
 82 |   git clean -fdx
 83 |   autoreconf -vfi
 84 |   ./configure --prefix $PWD
 85 |   make
 86 |   make install
 87 |   cd ..
 88 |  
 89 |   cd libStatGen
 90 |   git clean -fdx
 91 |   make
 92 |   cd ..
 93 |  
 94 |   cd bamUtil
 95 |   git clean -fdx
 96 |   make
 97 |   cd ..
 98 |   
 99 |   cd invNorm
100 |   git clean -fdx
101 |   make
102 |   cd ..
103 | 
104 |   cd htslib
105 |   git clean -fdx
106 |   autoheader
107 |   autoconf
108 |   ./configure --disable-libcurl
109 |   make
110 |   install bgzip /usr/local/bin
111 |   install tabix /usr/local/bin
112 |   cd ..
113 | 
114 |   cd vt-topmed
115 |   git clean -fdx
116 |   make
117 |   cd ..
118 |   
119 |   cd cramore
120 |   git clean -fdx
121 |   mkdir build
122 |   cd build
123 |   cmake -DCMAKE_BUILD_TYPE=Release -DHTS_INCLUDE_DIRS=/topmed_variant_calling/htslib-1.13  -DHTS_LIBRARIES=/topmed_variant_calling/htslib-1.13/libhts.a ..
124 |   make
125 |   cd ../..
126 |   
127 |   cd samtools
128 |   git clean -fdx
129 |   autoheader
130 |   autoconf -Wno-syntax
131 |   ./configure
132 |   make
133 |   make install
134 |   cd ..
135 |   
136 |   cd bcftools
137 |   git clean -fdx
138 |   make
139 |   make install
140 |   cd ..
141 | 
142 |   cd king
143 |   rm -f king *.o
144 |   g++ -O3 -fopenmp -o king *.cpp -lm -lz
145 |   cd ..
146 | 
147 | 


--------------------------------------------------------------------------------
/scripts/e05-whitelist-gwas-variants.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | 
 3 | use strict;
 4 | 
 5 | my $chr = $ARGV[0];
 6 | 
 7 | my %hw = ();
 8 | my $nw = 0;
 9 | open(IN,"resources/gwascatalog.20200204.uniq.rsid.entries.tsv") || die "Cannot open file\n";
10 | while(<IN>) {
11 |     my ($build,$rsno,@F) = split;
12 |     next unless ( $F[0] eq "$chr" );
13 |     my @alts = split(/,/,$F[4]);
14 |     next unless ( $F[7] =~ /;CAF=/ );
15 |     my @cafs = split(/,/,$1) if ( $F[7] =~ /;CAF=([^;]+);/ );
16 |     my ($maxaf,$imax) = (0,0);
17 |     for(my $i=1; $i < @cafs; ++$i) {
18 | 	if ( ( $cafs[$i] ne "." ) && ( $cafs[$i] > $maxaf ) ) {
19 | 	    $imax = $i;
20 | 	    $maxaf = $cafs[$i];
21 | 	}
22 |     }
23 |     next if ( $imax == 0 );
24 |     $hw{"$F[1]:$F[3]:$alts[$imax-1]"} = 1;
25 |     ++$nw;
26 | }
27 | close IN;
28 | 
29 | print STDERR "Finished loading $nw variants to be whitelisted\n";
30 | 
31 | if ($nw == 0) {
32 |   die("Error: no variants to be whitelisted\n");
33 | }
34 | 
35 | my $vcf = $ARGV[1]; #"release/sites/nowhite/freeze9.merged.chr$chr.filtered.anno.sites.vcf.gz";
36 | my $outprefix  = $ARGV[2]; #"release/sites/freeze9.merged.chr$chr.filtered.anno.gwas.sites";
37 | my $vcfsummary2 = "$ENV{'EXE_PREFIX'}/apigenome/bin/vcf-summary-v2";
38 | my $ref = "resources/ref/hs38DH.fa";
39 | my $dbsnp = "resources/ref/dbsnp_142.b38.vcf.gz";
40 | my @posVcfs = qw(resources/ref/hapmap_3.3.b38.sites.vcf.gz resources/ref/1000G_omni2.5.b38.sites.PASS.vcf.gz);
41 |     
42 | open(VCF,"zcat $vcf |") || die "Cannot open file\n";
43 | open(OUT1," | bgzip -c > $outprefix.vcf.gz") || die "Cannot open file\n";
44 | open(OUT2, "| $vcfsummary2 --ref $ref --db $dbsnp --FNRvcf $posVcfs[0] --chr $chr --tabix tabix --bgzip bgzip > $outprefix.summary_v2") || die "Cannot open file\n";
45 | 
46 | my ($ngwasPass,$ngwasKeep,$ngwasSwitch) = (0,0,0);
47 | while(<VCF>) {
48 |     if ( /^#/ ) {
49 | 	next if ( /^INFO=<ID=GC/ );
50 | 	next if ( /^INFO=<ID=GN/ );	
51 | 	next if ( /^INFO=<ID=AF/ );	
52 | 	next if ( /^INFO=<ID=HWEAF_P/ );
53 | 	next if ( /^INFO=<ID=AVG_IF/ );
54 | 	print OUT1 $_;
55 | 	if ( /^##INFO=<ID=NM1,/ ) {
56 | 	    print OUT1 "##INFO=<ID=GWAS,Number=.,Type=String,Description=\"Whitelisted GWAS catalog variant. Value is the original FILTER column, separated by comma instead of semicolon\">\n";
57 | 	}
58 |     }
59 |     else {
60 | 	my @F = split(/[\t\r\n]/);
61 | 	$F[7] =~ s/;AF=.*;HWEAF_P=[^;]+;/;/;
62 | 	$F[7] =~ s/;AVG_IF=[^;]+;/;/;
63 | 	my $key = "$F[1]:$F[3]:$F[4]";
64 | 
65 | 	if ( defined($hw{$key}) ) {
66 | 	    my $oldFilt = $F[6];
67 | 	    $oldFilt =~ s/;/,/g;  ## oldFilt contains the old filters
68 | 	    my $newFilt = "PASS"; ## newFilt should be PASS in most cases
69 | 	    if ( $oldFilt eq "PASS" ) { ++$ngwasPass; } ## if already PASS, that is fine still pass
70 | 	    elsif ( ( $oldFilt =~ /SVM/ ) || ( $oldFilt =~ /CEN/ ) || ( $oldFilt =~ /EXHET/ ) || ( $oldFilt =~ /DISC/ ) || ( $oldFilt =~ /CHRXHET/ ) ) {  ## if failed by existing filters
71 | 		++$ngwasKeep; ## keep the current filter
72 | 		$newFilt = $F[6];
73 | 	    }
74 | 	    else { ## must be only MIS2,DUP2,TRI2
75 | 		++$ngwasSwitch;
76 | 	    }
77 | 	    $F[7] .= ";GWAS=$oldFilt";
78 | 	    $F[6] = $newFilt; #"PASS";
79 | 	    print STDERR "$F[0]:$F[1]:$F[3]:$F[4] $ngwasPass $ngwasKeep $ngwasSwitch\n" if ( rand() < 0.01 );
80 | 	}
81 | 	print OUT1 join("\t",@F)."\n";
82 | 	print OUT2 join("\t",@F)."\n";
83 |     }
84 | }
85 | close OUT1;
86 | close OUT2;
87 | close VCF;
88 | 
89 | print STDERR "Finished $ngwasPass $ngwasKeep $ngwasSwitch\n";
90 | 
91 | print `tabix -f -pvcf $outprefix.vcf.gz`;
92 | 


--------------------------------------------------------------------------------
/king/MiniDeflate.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MINIDEFLATE_H__
 2 | #define __MINIDEFLATE_H__
 3 | 
 4 | #include <stdio.h>
 5 | 
 6 | // MiniDeflate reads and writes files in a simple Deflate like format
 7 | // A quick overview of this format follows, at the bottom of this file
 8 | //
 9 | 
10 | // Performance tuning constants
11 | //
12 | 
13 | // Hash table size is HASH_SIZE (a prime)
14 | #define HASH_SIZE    4093
15 | // Hash table depth is HASH_DEPTH (a power of 2)
16 | #define HASH_DEPTH   8
17 | // Matches that are not at least OKAY_MATCH chars are added to hash table
18 | #define OKAY_MATCH   32
19 | // Buffer size for FILE I/O
20 | #define BUFFER_SIZE  (32 * 1024)
21 | 
22 | class MiniDeflate
23 |    {
24 |    public:
25 |       MiniDeflate();
26 |       ~MiniDeflate();
27 | 
28 |       void Deflate(FILE * output, void * input, size_t bytes);
29 |       void Inflate(FILE * input, void * ouput, size_t bytes);
30 | 
31 |    private:
32 |       unsigned char *  buffer;
33 |       unsigned char *  hash_keys;
34 |       unsigned char ** hash_values;
35 | 
36 |       // Inline functions used during file compression
37 |       inline void EvaluateMatch(unsigned char * in, int len, int hash,
38 |                                 unsigned char * & best_pos, int & best_match);
39 |       inline void QuoteLiterals(unsigned char * & in, int literal,
40 |                                 unsigned char * & out, int & buffer_len,
41 |                                 FILE * output);
42 |       inline void OutputLiterals(unsigned char * & in, int literal,
43 |                                  unsigned char * & out, int & buffer_len,
44 |                                  FILE * output);
45 |       inline void CiteLiteral(unsigned char * & out, int literal,
46 |                               unsigned char * & in, int & buffer_len,
47 |                               FILE * input);
48 |    };
49 | 
50 | // Format specification for deflate files
51 | //
52 | // A compressed file is a sequence of bytes {0 .. N}.
53 | // Each byte is a sequence of bits [0 .. 7] with 0 as the Most Significant Bit.
54 | //
55 | // The following tokens are recognized:
56 | //
57 | // Literal quotes -- refer to unique strings
58 | //
59 | //   BYTE0    BYTE1     BYTE2       Description
60 | //     0       HI        LO         Quote of 31 bytes of more
61 | //                                  Followed by (HI << 8 + LO + 31) quoted chars
62 | //    0:4|LEN                       Quote of up to 1-15 bytes
63 | //                                  Followed by LEN quoted chars
64 | //
65 | // String matches -- refer to previous strings in the input stream
66 | //
67 | //   BYTE0    BYTE1     BYTE2     BYTE3   BYTE4     Description
68 | //  1:4|OFF   OFF1     OFF2:2|0    HI      LO       Long match of > 66 bytes
69 | //                                                  Offset of OFF|OFF1|OFF2 + 1
70 | //                                                  Length of HI|LO + 66
71 | //  1:4|OFF   OFF1     OFF2:2|LEN                   Distant match of < 66 bytes
72 | //                                                  Offset of OFF|OFF1|OFF2 + 1
73 | //                                                  Length of LEN + 2
74 | //  LEN|OFF   OFF1                                  Nearby short match
75 | //                                                  Offset OFF|OFF1 + 1
76 | //                                                  Length LEN
77 | //
78 | 
79 | // NOTE: When partitioning bytes, I use the notation X:n|Y so that
80 | // X takes the n MSB bits of byte and Y takes the remaining bits.
81 | 
82 | 
83 | #endif
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/libsvm/svm.h:
--------------------------------------------------------------------------------
  1 | #ifndef _LIBSVM_H
  2 | #define _LIBSVM_H
  3 | 
  4 | #define LIBSVM_VERSION 310
  5 | 
  6 | #ifdef __cplusplus
  7 | extern "C" {
  8 | #endif
  9 | 
 10 | extern int libsvm_version;
 11 | 
 12 | struct svm_node
 13 | {
 14 | 	int index;
 15 | 	double value;
 16 | };
 17 | 
 18 | struct svm_problem
 19 | {
 20 | 	int l;
 21 | 	double *y;
 22 | 	struct svm_node **x;
 23 | };
 24 | 
 25 | enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR };	/* svm_type */
 26 | enum { LINEAR, POLY, RBF, SIGMOID, PRECOMPUTED }; /* kernel_type */
 27 | 
 28 | struct svm_parameter
 29 | {
 30 | 	int svm_type;
 31 | 	int kernel_type;
 32 | 	int degree;	/* for poly */
 33 | 	double gamma;	/* for poly/rbf/sigmoid */
 34 | 	double coef0;	/* for poly/sigmoid */
 35 | 
 36 | 	/* these are for training only */
 37 | 	double cache_size; /* in MB */
 38 | 	double eps;	/* stopping criteria */
 39 | 	double C;	/* for C_SVC, EPSILON_SVR and NU_SVR */
 40 | 	int nr_weight;		/* for C_SVC */
 41 | 	int *weight_label;	/* for C_SVC */
 42 | 	double* weight;		/* for C_SVC */
 43 | 	double nu;	/* for NU_SVC, ONE_CLASS, and NU_SVR */
 44 | 	double p;	/* for EPSILON_SVR */
 45 | 	int shrinking;	/* use the shrinking heuristics */
 46 | 	int probability; /* do probability estimates */
 47 | };
 48 | 
 49 | //
 50 | // svm_model
 51 | // 
 52 | struct svm_model
 53 | {
 54 | 	struct svm_parameter param;	/* parameter */
 55 | 	int nr_class;		/* number of classes, = 2 in regression/one class svm */
 56 | 	int l;			/* total #SV */
 57 | 	struct svm_node **SV;		/* SVs (SV[l]) */
 58 | 	double **sv_coef;	/* coefficients for SVs in decision functions (sv_coef[k-1][l]) */
 59 | 	double *rho;		/* constants in decision functions (rho[k*(k-1)/2]) */
 60 | 	double *probA;		/* pariwise probability information */
 61 | 	double *probB;
 62 | 
 63 | 	/* for classification only */
 64 | 
 65 | 	int *label;		/* label of each class (label[k]) */
 66 | 	int *nSV;		/* number of SVs for each class (nSV[k]) */
 67 | 				/* nSV[0] + nSV[1] + ... + nSV[k-1] = l */
 68 | 	/* XXX */
 69 | 	int free_sv;		/* 1 if svm_model is created by svm_load_model*/
 70 | 				/* 0 if svm_model is created by svm_train */
 71 | };
 72 | 
 73 | struct svm_model *svm_train(const struct svm_problem *prob, const struct svm_parameter *param);
 74 | void svm_cross_validation(const struct svm_problem *prob, const struct svm_parameter *param, int nr_fold, double *target);
 75 | 
 76 | int svm_save_model(const char *model_file_name, const struct svm_model *model);
 77 | struct svm_model *svm_load_model(const char *model_file_name);
 78 | 
 79 | int svm_get_svm_type(const struct svm_model *model);
 80 | int svm_get_nr_class(const struct svm_model *model);
 81 | void svm_get_labels(const struct svm_model *model, int *label);
 82 | double svm_get_svr_probability(const struct svm_model *model);
 83 | 
 84 | double svm_predict_values(const struct svm_model *model, const struct svm_node *x, double* dec_values);
 85 | double svm_predict(const struct svm_model *model, const struct svm_node *x);
 86 | double svm_predict_probability(const struct svm_model *model, const struct svm_node *x, double* prob_estimates);
 87 | 
 88 | void svm_free_model_content(struct svm_model *model_ptr);
 89 | void svm_free_and_destroy_model(struct svm_model **model_ptr_ptr);
 90 | void svm_destroy_param(struct svm_parameter *param);
 91 | 
 92 | const char *svm_check_parameter(const struct svm_problem *prob, const struct svm_parameter *param);
 93 | int svm_check_probability_model(const struct svm_model *model);
 94 | 
 95 | void svm_set_print_string_function(void (*print_func)(const char *));
 96 | 
 97 | #ifdef __cplusplus
 98 | }
 99 | #endif
100 | 
101 | #endif /* _LIBSVM_H */
102 | 


--------------------------------------------------------------------------------
/king/MemoryAllocators.cpp:
--------------------------------------------------------------------------------
  1 | #include "MemoryAllocators.h"
  2 | 
  3 | #include <stdlib.h>
  4 | 
  5 | char *** AllocateCharCube(int n, int rows, int cols)
  6 |    {
  7 |    char *** cube = new char ** [n];
  8 | 
  9 |    // Stop early if we are out of memory
 10 |    if (cube == NULL)
 11 |       return NULL;
 12 | 
 13 |    for (int i = 0; i < n; i++)
 14 |       {
 15 |       cube[i] = AllocateCharMatrix(rows, cols);
 16 | 
 17 |       // Safely unravel allocation if we run out of memory
 18 |       if (cube[i] == NULL)
 19 |          {
 20 |          while (i--)
 21 |             FreeCharMatrix(cube[i], rows);
 22 | 
 23 |          delete [] cube;
 24 | 
 25 |          return NULL;
 26 |          }
 27 |       }
 28 | 
 29 |    return cube;
 30 |    }
 31 | 
 32 | int ** AllocateIntMatrix(int rows, int cols)
 33 |    {
 34 |    int ** matrix = new int * [rows];
 35 | 
 36 |    // Stop early if we are out of memory
 37 |    if (matrix == NULL)
 38 |       return NULL;
 39 | 
 40 |    for (int i = 0; i < rows; i++)
 41 |       {
 42 |       matrix[i] = new int [cols];
 43 | 
 44 |       // Safely unravel allocation if we run out of memory
 45 |       if (matrix[i] == NULL)
 46 |          {
 47 |          while (i--)
 48 |             delete [] matrix[i];
 49 | 
 50 |          delete [] matrix;
 51 | 
 52 |          return NULL;
 53 |          }
 54 |       }
 55 | 
 56 |    return matrix;
 57 |    }
 58 | 
 59 | char ** AllocateCharMatrix(int rows, int cols)
 60 |    {
 61 |    char ** matrix = new char * [rows];
 62 | 
 63 |    // Stop early if we are out of memory
 64 |    if (matrix == NULL)
 65 |       return NULL;
 66 | 
 67 |    for (int i = 0; i < rows; i++)
 68 |       {
 69 |       matrix[i] = new char [cols];
 70 | 
 71 |       // Safely unravel allocation if we run out of memory
 72 |       if (matrix[i] == NULL)
 73 |          {
 74 |          while (i--)
 75 |             delete [] matrix[i];
 76 | 
 77 |          delete [] matrix;
 78 | 
 79 |          return NULL;
 80 |          }
 81 |       }
 82 | 
 83 |    return matrix;
 84 |    }
 85 | 
 86 | float ** AllocateFloatMatrix(int rows, int cols)
 87 |    {
 88 |    float ** matrix = new float * [rows];
 89 | 
 90 |    // Stop early if we are out of memory
 91 |    if (matrix == NULL)
 92 |       return NULL;
 93 | 
 94 |    for (int i = 0; i < rows; i++)
 95 |       {
 96 |       matrix[i] = new float [cols];
 97 | 
 98 |       // Safely unravel allocation if we run out of memory
 99 |       if (matrix[i] == NULL)
100 |          {
101 |          while (i--)
102 |             delete [] matrix[i];
103 | 
104 |          delete [] matrix;
105 | 
106 |          return NULL;
107 |          }
108 |       }
109 | 
110 |    return matrix;
111 |    }
112 | 
113 | void FreeCharCube(char *** & cube, int n, int rows)
114 |    {
115 |    for (int i = 0; i < n; i++)
116 |       FreeCharMatrix(cube[i], rows);
117 | 
118 |    delete [] cube;
119 | 
120 |    cube = NULL;
121 |    }
122 | 
123 | void FreeCharMatrix(char ** & matrix, int rows)
124 |    {
125 |    for (int i = 0; i < rows; i++)
126 |       delete [] matrix[i];
127 | 
128 |    delete [] matrix;
129 | 
130 |    matrix = NULL;
131 |    }
132 | 
133 | void FreeFloatMatrix(float ** & matrix, int rows)
134 |    {
135 |    for (int i = 0; i < rows; i++)
136 |       delete [] matrix[i];
137 | 
138 |    delete [] matrix;
139 | 
140 |    matrix = NULL;
141 |    }
142 | 
143 | void FreeIntMatrix(int ** & matrix, int rows)
144 |    {
145 |    for (int i = 0; i < rows; i++)
146 |       delete [] matrix[i];
147 | 
148 |    delete [] matrix;
149 | 
150 |    matrix = NULL;
151 |    }
152 | 
153 | 
154 | 
155 | 


--------------------------------------------------------------------------------
/king/Input.cpp:
--------------------------------------------------------------------------------
  1 | #include "Input.h"
  2 | #include "Error.h"
  3 | #include "Constant.h"
  4 | 
  5 | #include <stdio.h>
  6 | #include <string.h>
  7 | 
  8 | int InputPromptWidth = 25;
  9 | 
 10 | void safe_gets(char * buffer, int n)
 11 |    {
 12 |    buffer[0] = 0;
 13 | 
 14 |    fgets(buffer, n, stdin);
 15 | 
 16 |    for (char * ptr = buffer; *ptr != 0; ptr++)
 17 |       if (*ptr == '\n') 
 18 |          *ptr = 0;
 19 |    }
 20 |       
 21 | void Input(const char * prompt, int & n, int _default)
 22 |    {
 23 |    char buffer[BUFSIZE];
 24 | 
 25 |    int success;
 26 |    do {
 27 |       printf("%*s [%8d]: ", InputPromptWidth, prompt, _default);
 28 |       safe_gets(buffer, BUFSIZE);
 29 |       success = sscanf(buffer, "%d", &n);
 30 |       if (success == EOF)
 31 |          n = _default;
 32 |    } while (success == 0);
 33 |    }
 34 | 
 35 | void Input(const char * prompt, char & ch, char _default)
 36 |    {
 37 |    char buffer[BUFSIZE];
 38 | 
 39 |    int success;
 40 |    do {
 41 |       printf("%*s [%8c]: ", InputPromptWidth, prompt, _default);
 42 |       safe_gets(buffer, BUFSIZE);
 43 |       success = sscanf(buffer, "%c", &ch);
 44 |       if (success == EOF)
 45 |          ch = _default;
 46 |    } while (success == 0);
 47 |    }
 48 | 
 49 | void Input(const char * prompt, double & d, double _default)
 50 |    {
 51 |    char buffer[BUFSIZE];
 52 | 
 53 |    int success;
 54 |    do {
 55 |       printf("%*s [%8.2f]: ", InputPromptWidth, prompt, _default);
 56 |       safe_gets(buffer, BUFSIZE);
 57 |       success = sscanf(buffer, "%lf", &d);
 58 |       if (success == EOF)
 59 |          d = _default;
 60 |    } while (success == 0);
 61 |    }
 62 | 
 63 | void Input(const char * prompt, bool & b, bool _default)
 64 |    {
 65 |    char buffer[BUFSIZE];
 66 |    int success;
 67 |    char c;
 68 | 
 69 |    do {
 70 |       printf("%*s [%8s]: ", InputPromptWidth, prompt, _default ? "Y/n" : "y/N");
 71 |       safe_gets(buffer, BUFSIZE);
 72 |       success = sscanf(buffer, "%c", &c);
 73 |       if (success == EOF)
 74 |          b = _default;
 75 |       else
 76 |          switch (c)
 77 |             {
 78 |             case 'y' :
 79 |             case 'Y' :
 80 |                b = true;
 81 |                break;
 82 |             case 'n' :
 83 |             case 'N' :
 84 |                b = false;
 85 |                break;
 86 |             default :
 87 |                success = 0;
 88 |             }
 89 |    } while (success == 0);
 90 |    }
 91 | 
 92 | 
 93 | void Input(const char * prompt, char * s, char * _default)
 94 |    {
 95 |    char buffer[BUFSIZE];
 96 | 
 97 |    int success;
 98 |    do {
 99 |       printf("%*s [%8s]: ", InputPromptWidth, prompt, _default);
100 |       safe_gets(buffer, BUFSIZE);
101 |       success = sscanf(buffer, " %[^\n]", s);
102 |       if (success == EOF)
103 |          strcpy(s, _default);
104 |    } while (success == 0);
105 |    }
106 | 
107 | void InputBounds(const char * prompt, int & n, int  min, int max,
108 |                  int _default)
109 |    {
110 |    Input(prompt, n, _default);
111 |    while ((n < min) || (n > max))
112 |       {
113 |       printf("\n*** Input value must be between %d and %d ***\n", min, max);
114 |       Input(prompt, n, _default);
115 |       }
116 |    }
117 | 
118 | void InputBounds(const char * prompt, double & d, double min, double max,
119 |                  double _default)
120 |    {
121 |    Input(prompt, d, _default);
122 |    while ((d < min) || (d > max))
123 |       {
124 |       printf("\n*** Input value must be between %.2f and %.2f ***\n", min, max);
125 |       Input(prompt, d, _default);
126 |       }
127 |    }
128 | 
129 | 
130 | 


--------------------------------------------------------------------------------
/libsvm/tools/subset.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from sys import argv, exit, stdout, stderr
  3 | from random import randint
  4 | 
  5 | method = 0
  6 | global n
  7 | global dataset_filename
  8 | subset_filename = ""
  9 | rest_filename = ""
 10 | 
 11 | def exit_with_help():
 12 | 	print("""\
 13 | Usage: {0} [options] dataset number [output1] [output2]
 14 | 
 15 | This script selects a subset of the given dataset.
 16 | 
 17 | options:
 18 | -s method : method of selection (default 0)
 19 |      0 -- stratified selection (classification only)
 20 |      1 -- random selection
 21 | 
 22 | output1 : the subset (optional)
 23 | output2 : rest of the data (optional)
 24 | If output1 is omitted, the subset will be printed on the screen.""".format(argv[0]))
 25 | 	exit(1)
 26 | 
 27 | def process_options():
 28 | 	global method, n
 29 | 	global dataset_filename, subset_filename, rest_filename
 30 | 	
 31 | 	argc = len(argv)
 32 | 	if argc < 3:
 33 | 		exit_with_help()
 34 | 
 35 | 	i = 1
 36 | 	while i < len(argv):
 37 | 		if argv[i][0] != "-":
 38 | 			break
 39 | 		if argv[i] == "-s":
 40 | 			i = i + 1
 41 | 			method = int(argv[i])
 42 | 			if method < 0 or method > 1:
 43 | 				print("Unknown selection method {0}".format(method))
 44 | 				exit_with_help()
 45 | 		i = i + 1
 46 | 
 47 | 	dataset_filename = argv[i]
 48 | 	n = int(argv[i+1])
 49 | 	if i+2 < argc:
 50 | 		subset_filename = argv[i+2]
 51 | 	if i+3 < argc:
 52 | 		rest_filename = argv[i+3]
 53 | 
 54 | def main():
 55 | 	class Label:
 56 | 		def __init__(self, label, index, selected):
 57 | 			self.label = label
 58 | 			self.index = index
 59 | 			self.selected = selected
 60 | 
 61 | 	process_options()
 62 | 	
 63 | 	# get labels
 64 | 	i = 0
 65 | 	labels = []
 66 | 	f = open(dataset_filename, 'r')
 67 | 	for line in f:
 68 | 		labels.append(Label(float((line.split())[0]), i, 0))
 69 | 		i = i + 1
 70 | 	f.close()
 71 | 	l = i
 72 | 	
 73 | 	# determine where to output
 74 | 	if subset_filename != "":
 75 | 		file1 = open(subset_filename, 'w')
 76 | 	else:
 77 | 		file1 = stdout
 78 | 	split = 0
 79 | 	if rest_filename != "":
 80 | 		split = 1	
 81 | 		file2 = open(rest_filename, 'w')
 82 | 	
 83 | 	# select the subset
 84 | 	warning = 0
 85 | 	if method == 0: # stratified
 86 | 		labels.sort(key = lambda x: x.label)
 87 | 		
 88 | 		label_end = labels[l-1].label + 1
 89 | 		labels.append(Label(label_end, l, 0))
 90 | 
 91 | 		begin = 0
 92 | 		label = labels[begin].label
 93 | 		for i in range(l+1):
 94 | 			new_label = labels[i].label
 95 | 			if new_label != label:
 96 | 				nr_class = i - begin
 97 | 				k = i*n//l - begin*n//l
 98 | 				# at least one instance per class
 99 | 				if k == 0:
100 | 					k = 1
101 | 					warning = warning + 1
102 | 				for j in range(nr_class):
103 | 					if randint(0, nr_class-j-1) < k:
104 | 						labels[begin+j].selected = 1
105 | 						k = k - 1
106 | 				begin = i
107 | 				label = new_label
108 | 	elif method == 1: # random
109 | 		k = n
110 | 		for i in range(l):
111 | 			if randint(0,l-i-1) < k:
112 | 				labels[i].selected = 1
113 | 				k = k - 1
114 | 			i = i + 1
115 | 
116 | 	# output
117 | 	i = 0
118 | 	if method == 0:
119 | 		labels.sort(key = lambda x: int(x.index))
120 | 	
121 | 	f = open(dataset_filename, 'r')
122 | 	for line in f:
123 | 		if labels[i].selected == 1:
124 | 			file1.write(line)
125 | 		else:
126 | 			if split == 1:
127 | 				file2.write(line)
128 | 		i = i + 1
129 | 
130 | 	if warning > 0:
131 | 		stderr.write("""\
132 | Warning:
133 | 1. You may have regression data. Please use -s 1.
134 | 2. Classification data unbalanced or too small. We select at least 1 per class.
135 |    The subset thus contains {0} instances.
136 | """.format(n+warning))
137 | 
138 | 	# cleanup
139 | 	f.close()
140 | 	
141 | 	file1.close()
142 | 	
143 | 	if split == 1:
144 | 		file2.close()
145 | 
146 | main()
147 | 


--------------------------------------------------------------------------------
/king/MathCholesky.cpp:
--------------------------------------------------------------------------------
  1 | #include "MathCholesky.h"
  2 | #include "Error.h"
  3 | 
  4 | #include <math.h>
  5 | 
  6 | void Cholesky::Decompose(Matrix & A)
  7 |    {
  8 |    L.Dimension(A.rows, A.rows);
  9 |    L.Zero();
 10 |    FastDecompose(A);
 11 |    }
 12 | 
 13 | void Cholesky::FastDecompose(Matrix & A)
 14 |    {
 15 |    if (A.rows != A.cols)
 16 |       error("Cholesky.Decompose: Matrix %s is not square",
 17 |             (const char *) A.label);
 18 | 
 19 |    L.Dimension(A.rows, A.rows);
 20 | 
 21 |    for (int i=0; i<L.rows; i++)
 22 |       for (int j=i; j<L.rows; j++)
 23 |          {
 24 |          double sum = A.data[i]->data[j];
 25 |          for (int k = i - 1; k >= 0; k--)
 26 |             sum -= L.data[i]->data[k] * L.data[j]->data[k];
 27 |          if (i == j)
 28 |             if (sum <= 0.0)
 29 |                error("Cholesky - matrix %s is not positive definite",
 30 |                      (const char *) A.label);
 31 |             else
 32 |                L.data[i]->data[i] = sqrt(sum);
 33 |          else
 34 |             L.data[j]->data[i] = sum / L.data[i]->data[i];
 35 |          }
 36 |    }
 37 | 
 38 | bool Cholesky::TryDecompose(Matrix & A)
 39 |    {
 40 |    L.Dimension(A.rows, A.rows);
 41 |    L.Zero();
 42 | 
 43 |    if (A.rows != A.cols)
 44 |       return false;
 45 | 
 46 |    L.Dimension(A.rows, A.rows);
 47 | 
 48 |    for (int i=0; i<L.rows; i++)
 49 |       for (int j=i; j<L.rows; j++)
 50 |          {
 51 |          double sum = A[i][j];
 52 |          for (int k = i - 1; k >= 0; k--)
 53 |             sum -= L.data[i]->data[k] * L.data[j]->data[k];
 54 |          if (i == j)
 55 |             if (sum <= 0.0)
 56 |                return false;
 57 |             else
 58 |                L.data[i]->data[i] = sqrt(sum);
 59 |          else
 60 |             L.data[j]->data[i] = sum / L.data[i]->data[i];
 61 |          }
 62 | 
 63 |    return true;
 64 |    }
 65 |    
 66 | void Cholesky::BackSubst0(Vector & b)
 67 |    {
 68 |    x.Dimension(L.rows);
 69 | 
 70 |    // Solve L*v = b (store v in x)
 71 |    for (int i = 0; i < L.rows; i++)
 72 |       {
 73 |       double sum = b.data[i];
 74 |       for (int k = i-1; k>=0; k--)
 75 |          sum -= L.data[i]->data[k] * x.data[k];
 76 |       x.data[i] = sum / L.data[i]->data[i];
 77 |       }
 78 |    }
 79 | 
 80 | void Cholesky::BackSubst(Vector & b)
 81 |    {
 82 |    x.Dimension(L.rows);
 83 | 
 84 |    // Solve L*v = b (store v in x)
 85 |    for (int i = 0; i < L.rows; i++)
 86 |       {
 87 |       double sum = b[i];
 88 |       for (int k = i-1; k>=0; k--)
 89 |          sum -= L.data[i]->data[k] * x.data[k];
 90 |       x.data[i] = sum / L.data[i]->data[i];
 91 |       }
 92 | 
 93 |    // Solve transpose(L)*x = v
 94 |    // End result is ... A*x = L*t(L)*x = L*v = b
 95 |    for (int i=L.rows-1; i>=0; i--)
 96 |       {
 97 |       double sum = x[i];
 98 |       for (int k = i+1; k < L.rows; k++)
 99 |          sum -= L.data[k]->data[i] * x.data[k];
100 |       x.data[i] = sum / L.data[i]->data[i];
101 |       }
102 | 
103 |    // Done!
104 |    }
105 | 
106 | void Cholesky::Invert()
107 |    {
108 |    inv.Dimension(L.rows, L.rows);
109 | 
110 |    inv.Identity();
111 | 
112 |    for(int i = 0; i < L.rows; i++)
113 |       {
114 |       BackSubst(inv[i]);
115 |       inv[i] = x;
116 |       }
117 |    }
118 | 
119 | double Cholesky::lnDeterminantL()
120 |    {
121 |    double sum = 0;
122 |    for (int i = 0; i < L.rows; i++)
123 |       sum += log(L[i][i]);
124 |    return sum;
125 |    }
126 | 
127 | double Cholesky::DeterminantL()
128 |    {
129 |    double product = 1;
130 |    for (int i=0; i<L.rows; i++)
131 |       product *= L[i][i];
132 |    return product;
133 |    }
134 | 
135 | 


--------------------------------------------------------------------------------
/king/MerlinSort.cpp:
--------------------------------------------------------------------------------
 1 | ////////////////////////////////////////////////////////////////////// 
 2 | // merlin/MerlinSort.cpp 
 3 | // (c) 2000-2007 Goncalo Abecasis
 4 | // 
 5 | // This file is distributed as part of the MERLIN source code package   
 6 | // and may not be redistributed in any form, without prior written    
 7 | // permission from the author. Permission is granted for you to       
 8 | // modify this file for your own personal use, but modified versions  
 9 | // must retain this copyright notice and must not be distributed.     
10 | // 
11 | // Permission is granted for you to use this file to compile MERLIN.    
12 | // 
13 | // All computer programs have bugs. Use this file at your own risk.   
14 | // 
15 | // Tuesday December 18, 2007
16 | // 
17 |  
18 | #include "MerlinSort.h"
19 | #include "MathConstant.h"
20 | 
21 | void SortFamilies(Pedigree & ped)
22 |    {
23 |    IntArray scores;
24 | 
25 |    for (int f = 0; f < ped.familyCount; f++)
26 |       {
27 |       int & founders = ped.families[f]->founders;
28 |       int & count = ped.families[f]->count;
29 |       int * & path = ped.families[f]->path;
30 | 
31 |       // Define a score for each individual based on...
32 |       //     * No. of genotyped markers (primarily)
33 |       //     * Affection status (tie-breaker)
34 |       //
35 |       scores.Dimension(count);
36 | 
37 |       for (int i = founders; i < count; i++)
38 |          scores[i] = ped[path[i]].ngeno * 2 + (ped.affectionCount == 0 ? 0 :
39 |                      ped[path[i]].affections[0] == 2);
40 | 
41 |       // Optimize path so descendants with higher scores appear first
42 |       for (int i = founders + 1; i < count; i++)
43 |          {
44 |          // Non-founders must always follow founders
45 |          int new_pos = founders;
46 | 
47 |          // In addition they must follow their father and any of his MZ twins
48 |          Person & father = *ped[path[i]].father;
49 | 
50 |          if (father.traverse >= new_pos)
51 |             new_pos = father.traverse + 1;
52 | 
53 |          // If father is an MZ twin, can't move this individual above his co-twins
54 |          if (father.zygosity & 1)
55 |             for (int j = 0; j < father.sibCount; j++)
56 |                if (father.sibs[j]->zygosity == father.zygosity &&
57 |                    father.sibs[j]->traverse >= new_pos)
58 |                   new_pos = father.sibs[j]->traverse + 1;
59 | 
60 |          // In addition they must follow their mother and any of his MZ twins
61 |          Person & mother = *ped[path[i]].mother;
62 | 
63 |          if (mother.traverse >= new_pos)
64 |             new_pos = mother.traverse + 1;
65 | 
66 |          // If mother is an MZ twin, can't move this individual above her co-twins
67 |          if (mother.zygosity & 1)
68 |             for (int j = 0; j < mother.sibCount; j++)
69 |                if (mother.zygosity == mother.sibs[j]->zygosity &&
70 |                    mother.sibs[j]->traverse >= new_pos)
71 |                   new_pos = mother.sibs[j]->traverse + 1;
72 | 
73 |          // Subject to these constraints, place individual above any others
74 |          // with lower informativeness scores
75 |          while (scores[new_pos] > scores[i] && new_pos < i)
76 |              new_pos++;
77 | 
78 |          if (new_pos != i)
79 |             {
80 |             int person_to_move = path[i];
81 |             int saved_score = scores[i];
82 | 
83 |             for (int move = i; move > new_pos; move--)
84 |                {
85 |                scores[move] = scores[move-1];
86 |                path[move] = path[move-1];
87 |                ped[path[move]].traverse++;
88 |                }
89 | 
90 |             ped[person_to_move].traverse = new_pos;
91 |             path[new_pos] = person_to_move;
92 |             scores[new_pos] = saved_score;
93 |             }
94 |          }
95 |       }
96 |    }
97 | 
98 |  
99 | 


--------------------------------------------------------------------------------
/king/BasicHash.cpp:
--------------------------------------------------------------------------------
  1 | #include "BasicHash.h"
  2 | #include "Error.h"
  3 | 
  4 | #include <stdio.h>
  5 | 
  6 | BasicHash::BasicHash(int startsize)
  7 |    {
  8 |    count = 0;
  9 |    size  = startsize;
 10 |    mask  = startsize - 1;
 11 | 
 12 |    // In this implementation, the size of hash tables must be a power of two
 13 |    if (startsize & mask)
 14 |       error("BasicHash: Hash table size must be a power of two.\n");
 15 | 
 16 |    objects = new void * [size];
 17 |    keys    = new unsigned int [size];
 18 | 
 19 |    for (unsigned int i = 0; i < size; i++)
 20 |       { objects[i] = NULL; }
 21 |    };
 22 | 
 23 | BasicHash::~BasicHash()
 24 |    {
 25 |    delete [] objects;
 26 |    delete [] keys;
 27 |    }
 28 | 
 29 | void BasicHash::Clear()
 30 |    {
 31 | //   printf("Clearing...\n");
 32 | 
 33 |    count = 0;
 34 | 
 35 |    if (size > 16)
 36 |       SetSize(16);
 37 | 
 38 |    for (unsigned int i = 0; i < size; i++)
 39 |       objects[i] = NULL;
 40 |    }
 41 | 
 42 | void BasicHash::SetSize(int newsize)
 43 |    {
 44 |    int newmask = newsize - 1;
 45 | 
 46 |    void     ** newobjects = new void * [newsize];
 47 |    unsigned int * newkeys = new unsigned int [newsize];
 48 | 
 49 |    for (int i = 0; i < newsize; i++)
 50 |        { newobjects[i] = NULL; }
 51 | 
 52 |    if (count)
 53 |       for (unsigned int i = 0; i < size; i++)
 54 |          if (objects[i] != NULL)
 55 |             {
 56 |             unsigned int key = keys[i];
 57 |             unsigned int h   = key & newmask;
 58 | 
 59 |             while ( newobjects[h] != NULL && newkeys[h] != h)
 60 |                h = (h + 1) & newmask;
 61 | 
 62 |             newkeys[h] = key;
 63 |             newobjects[h] = objects[i];
 64 |             }
 65 | 
 66 |    delete [] objects;
 67 |    delete [] keys;
 68 | 
 69 |    objects = newobjects;
 70 |    keys = newkeys;
 71 |    size = newsize;
 72 |    mask = newmask;
 73 |    }
 74 | 
 75 | int BasicHash::Add(int key, void * object)
 76 |    {
 77 |    if (count * 2 > size)
 78 |       Grow();
 79 | 
 80 |    unsigned int h = Iterate(key);
 81 | 
 82 |    while ((objects[h] != NULL) && (objects[h] != object))
 83 |       h = ReIterate(key, h);
 84 | 
 85 |    if (objects[h] == NULL)
 86 |       {
 87 | //      printf("At position %d, inserted %x\n", h, key);
 88 |       keys[h] = key;
 89 |       count++;
 90 |       }
 91 | 
 92 |    objects[h] = object;
 93 | 
 94 |    return h;
 95 |    }
 96 | 
 97 | int BasicHash::Find(int key)
 98 |    {
 99 |    int h = Iterate(key);
100 | 
101 |    return objects[h] == NULL ? -1 : h;
102 |    }
103 | 
104 | int BasicHash::Rehash(int key, int h)
105 |    {
106 |    h = ReIterate(key, h);
107 | 
108 |    return objects[h] == NULL ? -1 : h;
109 |    }
110 | 
111 | void BasicHash::Delete(unsigned int index)
112 |    {
113 |    if (index >= size || objects[index] == NULL)
114 |       return;
115 | 
116 |    objects[index] = NULL;
117 |    count--;
118 | 
119 |    if (count * 8 < size && size > 32)
120 |       Shrink();
121 |    else
122 |       {
123 |       // rehash the next entries until we find empty slot
124 |       index = (index + 1) & mask;
125 | 
126 |       while (objects[index] != NULL)
127 |          {
128 |          if ((keys[index] & mask) != index)
129 |             {
130 |             unsigned int h = Iterate(keys[index]);
131 | 
132 |             while ((objects[h] != NULL) && (objects[h] != objects[index]))
133 |                h = ReIterate(keys[index], h);
134 | 
135 |             if (h != (unsigned int) index)
136 |                {
137 |                keys[h] = keys[index];
138 |                objects[h] = objects[index];
139 |                objects[index] = NULL;
140 |                }
141 |             }
142 | 
143 |          index = (index + 1) & mask;
144 |          }
145 |       }
146 |    }
147 | 


--------------------------------------------------------------------------------
/king/StringArray.h:
--------------------------------------------------------------------------------
  1 | ////////////////////////////////////////////////////////////////////// 
  2 | // libsrc/StringArray.h 
  3 | // (c) 2000-2007 Goncalo Abecasis
  4 | // 
  5 | // This file is distributed as part of the MERLIN source code package   
  6 | // and may not be redistributed in any form, without prior written    
  7 | // permission from the author. Permission is granted for you to       
  8 | // modify this file for your own personal use, but modified versions  
  9 | // must retain this copyright notice and must not be distributed.     
 10 | // 
 11 | // Permission is granted for you to use this file to compile MERLIN.    
 12 | // 
 13 | // All computer programs have bugs. Use this file at your own risk.   
 14 | // 
 15 | // Tuesday December 18, 2007
 16 | // 
 17 |  
 18 | #ifndef __STRING_ARRAY_H__
 19 | #define __STRING_ARRAY_H__
 20 | 
 21 | #include "StringBasics.h"
 22 | 
 23 | class StringArray
 24 |    {
 25 |    protected:
 26 |       String ** strings;
 27 |       int size, count;
 28 | 
 29 |    public:
 30 |       static int alloc;
 31 | 
 32 |       StringArray(int startsize = 0);
 33 |       StringArray(StringArray & original);
 34 |       virtual ~StringArray();
 35 | 
 36 |       // Each line in a file is parsed into a separate array element
 37 |       //
 38 | 
 39 |       void Read(FILE * f);
 40 |       void Write(FILE * f);
 41 |       void WriteLine(FILE * f);
 42 |       void Read(const char * filename);
 43 |       void Write(const char * filename);
 44 |       void WriteLine(const char * filename);
 45 | 
 46 | #ifdef __ZLIB_AVAILABLE__
 47 |       void Read(IFILE & f);
 48 | #endif
 49 | 
 50 |       // Write all strings to the screen
 51 |       void Print();
 52 |       void PrintLine();
 53 | 
 54 |       void Grow(int newsize);
 55 |       void Clear();
 56 | 
 57 |       int Length() const { return count; }
 58 |       int Dimension(int newcount);
 59 |       int CharLength();
 60 | 
 61 |       String & operator [] (int i) { return *(strings[i]); }
 62 |       const String & operator [] (int i) const { return *(strings[i]); }
 63 | 
 64 |       // These functions divide a string into tokens and append these to the
 65 |       // array. Return value is the new array length
 66 |       //
 67 | 
 68 |       int AddColumns(const String & s, char ch = '\t');
 69 |       int AddTokens(const String & s, char ch);
 70 |       int AddTokens(const String & s, const String & separators = " \t\r\n");
 71 | 
 72 |       int ReplaceColumns(const String & s, char ch = '\t')
 73 |          { Clear(); return AddColumns(s, ch); }
 74 |       int ReplaceTokens(const String & s, const String & separators = " \t\r\n")
 75 |          { Clear(); return AddTokens(s, separators); }
 76 | 
 77 |       // These functions add, insert or remove a single array element
 78 |       //
 79 | 
 80 |       int  Add(const String & s);
 81 |       void InsertAt(int position, const String & s);
 82 |       void Delete(int position);
 83 | 
 84 |       // These functions manipulate a string as a stack
 85 |       //
 86 | 
 87 |       String & Last() const;
 88 |       int      Push(const String & s) { return Add(s); }
 89 |       String   Pop();
 90 | 
 91 |       // Linear search (N/2 comparisons on average) for a single element
 92 |       // If searching is required, StringMaps are a better option
 93 |       //
 94 | 
 95 |       int Find(const String & s) const;
 96 |       int FastFind(const String & s) const;
 97 |       int SlowFind(const String & s) const;
 98 | 
 99 |       // Alphetically orders strings
100 |       //
101 |       void Sort();
102 | 
103 |       // Trims strings to remove whitespace
104 |       void Trim();
105 | 
106 |       StringArray & operator = (const StringArray & rhs);
107 | 
108 |       bool operator == (const StringArray & rhs);
109 |       bool operator != (const StringArray & rhs)
110 |          { return !(*this == rhs); }
111 | 
112 |    private:
113 |       static int ComparisonForSort(const void * a, const void * b);
114 |    };
115 | 
116 | #endif
117 | 
118 |  
119 | 


--------------------------------------------------------------------------------
/king/InputFile.h:
--------------------------------------------------------------------------------
  1 | ////////////////////////////////////////////////////////////////////// 
  2 | // libsrc/InputFile.h 
  3 | // (c) 2000-2007 Goncalo Abecasis
  4 | // 
  5 | // This file is distributed as part of the MERLIN source code package   
  6 | // and may not be redistributed in any form, without prior written    
  7 | // permission from the author. Permission is granted for you to       
  8 | // modify this file for your own personal use, but modified versions  
  9 | // must retain this copyright notice and must not be distributed.     
 10 | // 
 11 | // Permission is granted for you to use this file to compile MERLIN.    
 12 | // 
 13 | // All computer programs have bugs. Use this file at your own risk.   
 14 | // 
 15 | // Tuesday December 18, 2007
 16 | // 
 17 |  
 18 | #ifndef __INPUTFILE_H__
 19 | #define __INPUTFILE_H__
 20 | 
 21 | #ifdef  __gnu_linux__
 22 | #ifndef __ZLIB_AVAILABLE__
 23 | #define __ZLIB_AVAILABLE__
 24 | #endif
 25 | #endif
 26 | 
 27 | #ifdef  __ZLIB_AVAILABLE__
 28 | 
 29 | #include <zlib.h>
 30 | #include <stdio.h>
 31 | 
 32 | class IFILE
 33 |    {
 34 |    public:
 35 |       bool gzMode;
 36 |       union
 37 |          {
 38 |          gzFile gzHandle;
 39 |          FILE * handle;
 40 |          };
 41 | 
 42 |    IFILE()
 43 |       {
 44 |       gzMode = false;
 45 |       handle = NULL;
 46 |       }
 47 | 
 48 |    IFILE(const char * filename, const char * mode);
 49 | 
 50 |    operator void * ()
 51 |       { return gzMode ? (void *) gzHandle : (void *) handle; }
 52 | 
 53 |    IFILE operator = (const IFILE & rhs)
 54 |       {
 55 |       if ((gzMode = rhs.gzMode) == true)
 56 |          gzHandle = rhs.gzHandle;
 57 |       else
 58 |          handle = rhs.handle;
 59 | 
 60 |       return *this;
 61 |       }
 62 | 
 63 |    IFILE operator = (FILE * rhs)
 64 |       {
 65 |       gzMode = false;
 66 |       handle = rhs;
 67 |       return *this;
 68 |       }
 69 | 
 70 |    IFILE operator = (gzFile & rhs)
 71 |       {
 72 |       gzMode = true;
 73 |       gzHandle = rhs;
 74 |       return *this;
 75 |       }
 76 | 
 77 |    bool operator == (void * rhs)
 78 |       {
 79 |       if (rhs != NULL)
 80 |          return false;
 81 |       return gzMode ? gzHandle == rhs : handle == rhs;
 82 |       }
 83 |    };
 84 | 
 85 | inline IFILE ifopen(const char * filename, const char * mode)
 86 |    { IFILE file(filename, mode); return file; }
 87 | 
 88 | inline int ifclose(IFILE & file)
 89 |    { return file.gzMode ? gzclose(file.gzHandle) : fclose(file.handle); }
 90 | 
 91 | inline int ifgetc(IFILE & file)
 92 |    { return file.gzMode ? gzgetc(file.gzHandle) : fgetc(file.handle); }
 93 | 
 94 | inline void ifrewind(IFILE & file)
 95 |    { if (file.gzMode) gzrewind(file.gzHandle); else rewind(file.handle); }
 96 | 
 97 | inline int ifeof(IFILE & file)
 98 |    { return file.gzMode ? gzeof(file.gzHandle) : feof(file.handle); }
 99 | 
100 | #else
101 | 
102 | #include <stdio.h>
103 | 
104 | class IFILE
105 |    {
106 |    public:
107 |       FILE * handle;
108 | 
109 |       IFILE()
110 |          { handle = NULL; }
111 |       IFILE(const char * filename, const char * mode)
112 |          { handle = fopen(filename, mode); }
113 | 
114 |       operator FILE *()
115 |          { return handle; }
116 | 
117 |       IFILE & operator = (FILE * rhs)
118 |          { handle = rhs; return *this; }
119 | 
120 |       IFILE & operator = (const IFILE & rhs)
121 |          { handle = rhs.handle; return * this; }
122 | 
123 |       bool operator == (void * rhs)
124 |          {
125 |          if (rhs != NULL)
126 |             return false;
127 |          return handle == rhs;
128 |          }
129 |    };
130 | 
131 | inline IFILE ifopen(const char * filename, const char * mode)
132 |    { IFILE file(filename, mode); return file; }
133 | 
134 | inline int ifclose(IFILE & file)
135 |    { return fclose(file.handle); }
136 | 
137 | inline int ifgetc(IFILE & file)
138 |    { return fgetc(file.handle); }
139 | 
140 | inline void ifrewind(IFILE & file)
141 |    { rewind(file.handle); }
142 | 
143 | inline int ifeof(IFILE & file)
144 |    { return feof(file.handle); }
145 | 
146 | #endif
147 | 
148 | #endif
149 | 
150 |  
151 | 


--------------------------------------------------------------------------------
/king/StringMap.h:
--------------------------------------------------------------------------------
  1 | ////////////////////////////////////////////////////////////////////// 
  2 | // libsrc/StringMap.h 
  3 | // (c) 2000-2007 Goncalo Abecasis
  4 | // 
  5 | // This file is distributed as part of the MERLIN source code package   
  6 | // and may not be redistributed in any form, without prior written    
  7 | // permission from the author. Permission is granted for you to       
  8 | // modify this file for your own personal use, but modified versions  
  9 | // must retain this copyright notice and must not be distributed.     
 10 | // 
 11 | // Permission is granted for you to use this file to compile MERLIN.    
 12 | // 
 13 | // All computer programs have bugs. Use this file at your own risk.   
 14 | // 
 15 | // Tuesday December 18, 2007
 16 | // 
 17 |  
 18 | #ifndef __STRINGMAP_H__
 19 | #define __STRINGMAP_H__
 20 | 
 21 | #include "StringBasics.h"
 22 | 
 23 | class StringMap
 24 |    {
 25 |    protected:
 26 |       ::String ** strings;
 27 |       void     ** objects;
 28 |       int         count, size;
 29 | 
 30 |    public:
 31 |       static int alloc;
 32 | 
 33 |       StringMap(int startsize = 0);
 34 |       virtual ~StringMap();
 35 | 
 36 |       void Grow(int newsize);
 37 |       void Clear();
 38 |       int  Length() const { return count; }
 39 | 
 40 |       void * Object(int i) const { return objects[i]; }
 41 |       void * Object(const ::String & key) const
 42 |          {
 43 |          int index = Find(key);
 44 |          return (index >= 0) ? objects[index] : NULL;
 45 |          }
 46 |       void * Object(const ::String & key, void * (*create_object)())
 47 |          { return objects[Find(key, create_object)]; }
 48 | 
 49 |       void SetObject(int i, void * object)
 50 |          { objects[i] = object; }
 51 |       void SetObject(const ::String & key, void * object)
 52 |          { Add(key, object); }
 53 | 
 54 |       int Add(const ::String & s, void * object = NULL);
 55 |       int Find(const ::String & s, void * (*create_object)() = NULL);
 56 |       int Find(const ::String & s) const;
 57 |       int FindStem(const ::String & stem) const;
 58 | 
 59 |       StringMap & operator = (const StringMap & rhs);
 60 | 
 61 |       const ::String & operator [] (int i) const { return *(strings[i]); }
 62 |       ::String & operator [] (int i) { return *(strings[i]); }
 63 |       ::String & String(int i) { return *(strings[i]); }
 64 | 
 65 |       static void * CreateMap();
 66 | 
 67 |       void Delete(int index);
 68 |    };
 69 | 
 70 | class StringIntMap
 71 |    {
 72 |    protected:
 73 |       ::String ** strings;
 74 |       int       * integers;
 75 |       int         count, size;
 76 | 
 77 |    public:
 78 |       static int alloc;
 79 | 
 80 |       StringIntMap(int startsize = 0);
 81 |       virtual ~StringIntMap();
 82 | 
 83 |       void Grow(int newsize);
 84 |       void Clear();
 85 |       int  Length() const { return count; }
 86 | 
 87 |       int Integer(int i) const { return integers[i]; }
 88 |       int Integer(const ::String & key) const
 89 |          {
 90 |          int index = Find(key);
 91 |          return (index >= 0) ? (int) integers[index] : -1;
 92 |          }
 93 | 
 94 |       void SetInteger(int i, int value)
 95 |          { integers[i] = value; }
 96 |       void SetInteger(const ::String & key, int value)
 97 |          { Add(key, value); }
 98 | 
 99 |       int Add(const ::String & s, int i);
100 |       int Find(const ::String & s, int defaultValue);
101 |       int Find(const ::String & s) const;
102 |       int FindStem(const ::String & stem) const;
103 | 
104 |       StringIntMap & operator = (const StringIntMap & rhs);
105 | 
106 |       const ::String & operator [] (int i) const { return *(strings[i]); }
107 |       ::String & operator [] (int i) { return *(strings[i]); }
108 |       ::String & String(int i) { return *(strings[i]); }
109 | 
110 |       static void * CreateMap();
111 | 
112 |       int IncrementCount(const ::String & key);
113 |       int DecrementCount(const ::String & key);
114 |       int GetCount(const ::String & key) const;
115 |       int GetCount(int index) const { return integers[index]; }
116 | 
117 |       void Delete(int index);
118 |    };
119 | 
120 | #endif
121 | 
122 |  
123 | 


--------------------------------------------------------------------------------
/king/Hash.cpp:
--------------------------------------------------------------------------------
  1 | #include "Hash.h"
  2 | 
  3 | #include <ctype.h>
  4 | 
  5 | // ********************************************************
  6 | //
  7 | // This code is based on the original by Robert Jenkins.
  8 | //
  9 | // http://burtleburtle.net/bob/hash/doobs.html
 10 | //
 11 | // ********************************************************
 12 | 
 13 | #define MIX_INTEGERS(a,b,c) \
 14 |     { \
 15 |     a -= b; a -= c; a ^= (c>>13); \
 16 |     b -= c; b -= a; b ^= (a<<8);  \
 17 |     c -= a; c -= b; c ^= (b>>13); \
 18 |     a -= b; a -= c; a ^= (c>>12); \
 19 |     b -= c; b -= a; b ^= (a<<16); \
 20 |     c -= a; c -= b; c ^= (b>>5);  \
 21 |     a -= b; a -= c; a ^= (c>>3);  \
 22 |     b -= c; b -= a; b ^= (a<<10); \
 23 |     c -= a; c -= b; c ^= (b>>15); \
 24 |     }
 25 | 
 26 | #define ui   (unsigned int)
 27 | 
 28 | unsigned int hash ( const unsigned char * key, unsigned int length, unsigned int initval)
 29 |    {
 30 |    unsigned int a = 0x9e3779b9;
 31 |    unsigned int b = 0x9e3779b9;
 32 |    unsigned int c = initval;
 33 |    unsigned int len = length;
 34 | 
 35 |    /*---------------------------------------- handle most of the key */
 36 |    while (len >= 12)
 37 |       {
 38 |       a += (key[0] +(ui(key[1])<<8) +(ui(key[2])<<16) +(ui(key[3])<<24));
 39 |       b += (key[4] +(ui(key[5])<<8) +(ui(key[6])<<16) +(ui(key[7])<<24));
 40 |       c += (key[8] +(ui(key[9])<<8) +(ui(key[10])<<16)+(ui(key[11])<<24));
 41 |       MIX_INTEGERS(a,b,c);
 42 |       key += 12; len -= 12;
 43 |       }
 44 | 
 45 |    /*------------------------------------- handle the last 11 bytes */
 46 |    c += length;
 47 |    switch(len)              /* all the case statements fall through */
 48 |       {
 49 |       case 11: c+=(ui(key[10])<<24);
 50 |       case 10: c+=(ui(key[9])<<16);
 51 |       case 9 : c+=(ui(key[8])<<8);
 52 |       /* the first byte of c is reserved for the length */
 53 | 
 54 |       case 8 : b+=(ui(key[7])<<24);
 55 |       case 7 : b+=(ui(key[6])<<16);
 56 |       case 6 : b+=(ui(key[5])<<8);
 57 |       case 5 : b+=key[4];
 58 | 
 59 |       case 4 : a+=(ui(key[3])<<24);
 60 |       case 3 : a+=(ui(key[2])<<16);
 61 |       case 2 : a+=(ui(key[1])<<8);
 62 |       case 1 : a+=key[0];
 63 |      /* case 0: nothing left to add */
 64 |    }
 65 |    MIX_INTEGERS(a,b,c);
 66 | 
 67 |    /*-------------------------------------------- report the result */
 68 |    return c;
 69 |    }
 70 | 
 71 | unsigned int hash_no_case ( const unsigned char * key, unsigned int length, unsigned int initval)
 72 |    {
 73 |    unsigned int a = 0x9e3779b9;
 74 |    unsigned int b = 0x9e3779b9;
 75 |    unsigned int c = initval;
 76 |    unsigned int len = length;
 77 | 
 78 |    /*---------------------------------------- handle most of the key */
 79 |    while (len >= 12)
 80 |       {
 81 |       a += (toupper(key[0]) +(ui(toupper(key[1]))<<8) +(ui(toupper(key[2]))<<16) +(ui(toupper(key[3]))<<24));
 82 |       b += (toupper(key[4]) +(ui(toupper(key[5]))<<8) +(ui(toupper(key[6]))<<16) +(ui(toupper(key[7]))<<24));
 83 |       c += (toupper(key[8]) +(ui(toupper(key[9]))<<8) +(ui(toupper(key[10]))<<16)+(ui(toupper(key[11]))<<24));
 84 |       MIX_INTEGERS(a,b,c);
 85 |       key += 12; len -= 12;
 86 |       }
 87 | 
 88 |    /*------------------------------------- handle the last 11 bytes */
 89 |    c += length;
 90 |    switch(len)              /* all the case statements fall through */
 91 |       {
 92 |       case 11: c+=(ui(toupper(key[10]))<<24);
 93 |       case 10: c+=(ui(toupper(key[9]))<<16);
 94 |       case 9 : c+=(ui(toupper(key[8]))<<8);
 95 |       /* the first byte of c is reserved for the length */
 96 | 
 97 |       case 8 : b+=(ui(toupper(key[7]))<<24);
 98 |       case 7 : b+=(ui(toupper(key[6]))<<16);
 99 |       case 6 : b+=(ui(toupper(key[5]))<<8);
100 |       case 5 : b+=toupper(key[4]);
101 | 
102 |       case 4 : a+=(ui(toupper(key[3]))<<24);
103 |       case 3 : a+=(ui(toupper(key[2]))<<16);
104 |       case 2 : a+=(ui(toupper(key[1]))<<8);
105 |       case 1 : a+=toupper(key[0]);
106 |      /* case 0: nothing left to add */
107 |    }
108 |    MIX_INTEGERS(a,b,c);
109 | 
110 |    /*-------------------------------------------- report the result */
111 |    return c;
112 |    }
113 | 


--------------------------------------------------------------------------------
/king/IntArray.h:
--------------------------------------------------------------------------------
  1 | #ifndef __INTARRAY_H__
  2 | #define __INTARRAY_H__
  3 | 
  4 | #include <stdio.h>
  5 | 
  6 | class IntArray
  7 |    {
  8 |    private:
  9 |       int * items;
 10 |       int size, count;
 11 | 
 12 |       void Grow(int new_size);
 13 |       static int Compare(int * a, int * b);
 14 | 
 15 |    public:
 16 |       static int alloc;
 17 | 
 18 |       IntArray(int start_size = 0);
 19 |       IntArray(const IntArray & source);
 20 |       ~IntArray();
 21 | 
 22 |       IntArray & operator = (const IntArray & rhs);
 23 | 
 24 |       int & operator [] (int index) { return items[index]; }
 25 |       int   operator [] (int index) const { return items[index]; }
 26 | 
 27 |       int & operator [] (double fraction)
 28 |          { return items[(int) (count * fraction)]; }
 29 |       int   operator [] (double fraction) const
 30 |          { return items[(int) (count * fraction)]; }
 31 | 
 32 |       int  Append(int value);
 33 |       int  Append(const IntArray & rhs);
 34 | 
 35 |       void Push(int value)          { Append(value); }
 36 |       int  Pop()                    { return items[--count]; }
 37 |       int  Peek() const             { return items[count - 1]; }
 38 |       int &Last() const             { return items[count - 1]; }
 39 | 
 40 |       void PushIfNew(int value);    // used for maintaining list without duplicates
 41 | 
 42 |       int  Delete(int index);
 43 |       void InsertAt(int index, int value);
 44 | 
 45 |       int  Find(int value) const;
 46 |       int  FastFind(int value) const { return BinarySearch(value); }
 47 |       int  BinarySearch(int value) const;
 48 |       void Sort();
 49 | 
 50 |       void Zero();
 51 |       void Set(int value);
 52 |       void SetSequence(int start = 0, int increment = 1);
 53 | 
 54 |       int  Length() const           { return count; }
 55 |       void Dimension(int new_count) { Grow(new_count); count = new_count; }
 56 |       void Clear()                  { count = 0; }
 57 | 
 58 |       int  Sum() const              { return Sum(0, count - 1); }
 59 |       int  Sum(int start) const     { return Sum(start, count - 1); }
 60 |       int  Sum(int start, int end) const;
 61 | 
 62 |       int  Max() const              { return Max(0, count - 1);     }
 63 |       int  Max(int start) const     { return Max(start, count - 1); }
 64 |       int  Max(int start, int end) const;
 65 | 
 66 |       int  Min() const              { return Min(0, count - 1);     }
 67 |       int  Min(int start) const     { return Min(start, count - 1); }
 68 |       int  Min(int start, int end) const;
 69 | 
 70 |       int  Count() const            {return count; }
 71 |       int  CountIfGreater(int treshold) const;
 72 |       int  CountIfGreaterOrEqual(int treshold) const;
 73 | 
 74 |       void Swap(int i, int j)
 75 |            { int tmp = items[i]; items[i] = items[j]; items[j] = tmp; }
 76 | 
 77 |       void Reverse();
 78 | 
 79 |       operator int * ()               { return items; }
 80 | 
 81 |       void Add(int term);
 82 |       void Subtract(int term) { Add(-term); }
 83 |       void Multiply(int factor);
 84 |       void Divide(int denominator);
 85 | 
 86 |       IntArray & operator += (int rhs)
 87 |          { Add(rhs); return *this; }
 88 | 
 89 |       IntArray & operator *= (int rhs)
 90 |          { Multiply(rhs); return *this; }
 91 | 
 92 |       IntArray & operator -= (int rhs)
 93 |          { Add(-rhs); return *this; }
 94 | 
 95 |       IntArray & operator /= (int rhs)
 96 |          { Divide(rhs); return *this; }
 97 | 
 98 |       int  InnerProduct(IntArray & v);
 99 | 
100 |       bool operator == (const IntArray & rhs) const;
101 |       bool operator != (const IntArray & rhs) const;
102 | 
103 |       bool isAscending();
104 |       bool isDescending();
105 | 
106 |       void Stack(const IntArray & rhs);
107 | 
108 |       void Swap(IntArray & rhs);
109 | 
110 |       void Print()                   { Print(stdout); }
111 |       void Print(const char * label) { Print(stdout, label); }
112 |       void Print(FILE * output);
113 |       void Print(FILE * output, const char * label);
114 | 
115 |       int    Product();
116 |       double DoubleProduct();
117 | 
118 |       int  Hash(int initval = 0);
119 |    };
120 | 
121 | #endif
122 | 
123 | 
124 | 


--------------------------------------------------------------------------------
/king/Random.h:
--------------------------------------------------------------------------------
  1 | 
  2 | //////////////////////////////////////////////////////////////////////////////
  3 | // This file includes code derived from the original Mersenne Twister Code
  4 | // by Makoto Matsumoto and Takuji Nishimura
  5 | // and is subject to their original copyright notice copied below:
  6 | //////////////////////////////////////////////////////////////////////////////
  7 | 
  8 | //////////////////////////////////////////////////////////////////////////////
  9 | //              COPYRIGHT NOTICE FOR MERSENNE TWISTER CODE
 10 | //
 11 | // Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
 12 | // All rights reserved.
 13 | //
 14 | //   Redistribution and use in source and binary forms, with or without
 15 | //   modification, are permitted provided that the following conditions
 16 | //   are met:
 17 | //
 18 | //     1. Redistributions of source code must retain the above copyright
 19 | //        notice, this list of conditions and the following disclaimer.
 20 | //
 21 | //     2. Redistributions in binary form must reproduce the above copyright
 22 | //        notice, this list of conditions and the following disclaimer in the
 23 | //        documentation and/or other materials provided with the distribution.
 24 | //
 25 | //     3. The names of its contributors may not be used to endorse or promote
 26 | //        products derived from this software without specific prior written
 27 | //        permission.
 28 | //
 29 | //   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 30 | //   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 31 | //   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 32 | //   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 33 | //   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 34 | //   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 35 | //   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 36 | //   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 37 | //   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 38 | //   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 39 | //   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 40 | //
 41 | ///////////////////////////////////////////////////////////////////////////////
 42 | 
 43 | 
 44 | #ifndef __RANDOM_H__
 45 | #define __RANDOM_H__
 46 | 
 47 | // Define a quick and dirty generator
 48 | #define RANDMUL 1664525L
 49 | #define RANDADD 1013904223L
 50 | 
 51 | #define RAND(seed) ((seed = seed * RANDMUL + RANDADD) & 0xFFFFFFFF)
 52 | 
 53 | class Random
 54 | // Implements the Mersenne Twister as default random number generator.
 55 | // Compilation flag __NO_MERSENNE sets default generator to
 56 | // a minimal Park-Miller with Bays-Durham shuffle and added safe guards.
 57 |    {
 58 |    protected:
 59 |       // values for "minimal random values"
 60 |       long  seed;
 61 |       long  last;
 62 |       long  * shuffler;
 63 | 
 64 |       // and for normal deviates
 65 |       int      normSaved;
 66 |       double   normStore;
 67 | 
 68 |       double mersenneMult;
 69 | 
 70 |       // Array for Mersenne state vector
 71 |       unsigned long * mt;
 72 | 
 73 |       // Used to signal that Mersenne state vector is not initialized
 74 |       int mti;
 75 | 
 76 | 
 77 |    public:
 78 | 
 79 |       Random(long s = 0x7654321);
 80 |       ~Random();
 81 | 
 82 |       // Next bit in series of 0s and 1s
 83 |       int    Binary();     // Next bit in series of 0s and 1s
 84 | 
 85 |       // Next value in series, between 0 and 1
 86 |       double Next();
 87 | 
 88 |       // Next integer
 89 |       unsigned long NextInt();
 90 | 
 91 |       // Random number form N(0,1)
 92 |       double Normal();
 93 | 
 94 |       void   Reset(long s);
 95 |       void   InitMersenne(unsigned long s);
 96 | 
 97 |       // Random number between 0 and 1
 98 |       operator double()
 99 |          { return Next(); }
100 | 
101 |       // Random number between arbitrary bounds
102 |       double Uniform(double lo = 0.0, double hi = 1.0)
103 |          {
104 |          return lo + (hi - lo) * Next();
105 |          }
106 | 
107 |       void Choose(int * array, int n, int k);
108 |       void Choose(int * array, float * weights, int n, int k);
109 | 
110 |    };
111 | 
112 | extern Random globalRandom;
113 | 
114 | #endif
115 | 
116 | 


--------------------------------------------------------------------------------
/examples/index/list.107.local.crams.index:
--------------------------------------------------------------------------------
  1 | NWD100953	crams/NWD100953.recab.cram
  2 | NWD119836	crams/NWD119836.recab.cram
  3 | NWD119844	crams/NWD119844.recab.cram
  4 | NWD136397	crams/NWD136397.recab.cram
  5 | NWD146103	crams/NWD146103.recab.cram
  6 | NWD155824	crams/NWD155824.recab.cram
  7 | NWD165827	crams/NWD165827.recab.cram
  8 | NWD176325	crams/NWD176325.recab.cram
  9 | NWD183321	crams/NWD183321.recab.cram
 10 | NWD191048	crams/NWD191048.recab.cram
 11 | NWD230091	crams/NWD230091.recab.cram
 12 | NWD231092	crams/NWD231092.recab.cram
 13 | NWD234815	crams/NWD234815.recab.cram
 14 | NWD245311	crams/NWD245311.recab.cram
 15 | NWD259170	crams/NWD259170.recab.cram
 16 | NWD263474	crams/NWD263474.recab.cram
 17 | NWD285363	crams/NWD285363.recab.cram
 18 | NWD290849	crams/NWD290849.recab.cram
 19 | NWD293295	crams/NWD293295.recab.cram
 20 | NWD296991	crams/NWD296991.recab.cram
 21 | NWD298195	crams/NWD298195.recab.cram
 22 | NWD315195	crams/NWD315195.recab.cram
 23 | NWD315403	crams/NWD315403.recab.cram
 24 | NWD316026	crams/NWD316026.recab.cram
 25 | NWD319341	crams/NWD319341.recab.cram
 26 | NWD373853	crams/NWD373853.recab.cram
 27 | NWD422016	crams/NWD422016.recab.cram
 28 | NWD428511	crams/NWD428511.recab.cram
 29 | NWD433038	crams/NWD433038.recab.cram
 30 | NWD434806	crams/NWD434806.recab.cram
 31 | NWD444824	crams/NWD444824.recab.cram
 32 | NWD446684	crams/NWD446684.recab.cram
 33 | NWD455342	crams/NWD455342.recab.cram
 34 | NWD463423	crams/NWD463423.recab.cram
 35 | NWD465900	crams/NWD465900.recab.cram
 36 | NWD470340	crams/NWD470340.recab.cram
 37 | NWD479955	crams/NWD479955.recab.cram
 38 | NWD480514	crams/NWD480514.recab.cram
 39 | NWD490850	crams/NWD490850.recab.cram
 40 | NWD492101	crams/NWD492101.recab.cram
 41 | NWD495157	crams/NWD495157.recab.cram
 42 | NWD496530	crams/NWD496530.recab.cram
 43 | NWD502718	crams/NWD502718.recab.cram
 44 | NWD512755	crams/NWD512755.recab.cram
 45 | NWD518110	crams/NWD518110.recab.cram
 46 | NWD524030	crams/NWD524030.recab.cram
 47 | NWD535753	crams/NWD535753.recab.cram
 48 | NWD548310	crams/NWD548310.recab.cram
 49 | NWD578417	crams/NWD578417.recab.cram
 50 | NWD578584	crams/NWD578584.recab.cram
 51 | NWD580039	crams/NWD580039.recab.cram
 52 | NWD585842	crams/NWD585842.recab.cram
 53 | NWD611564	crams/NWD611564.recab.cram
 54 | NWD614349	crams/NWD614349.recab.cram
 55 | NWD626020	crams/NWD626020.recab.cram
 56 | NWD626736	crams/NWD626736.recab.cram
 57 | NWD635737	crams/NWD635737.recab.cram
 58 | NWD651125	crams/NWD651125.recab.cram
 59 | NWD651283	crams/NWD651283.recab.cram
 60 | NWD651359	crams/NWD651359.recab.cram
 61 | NWD652050	crams/NWD652050.recab.cram
 62 | NWD672633	crams/NWD672633.recab.cram
 63 | NWD677194	crams/NWD677194.recab.cram
 64 | NWD684137	crams/NWD684137.recab.cram
 65 | NWD697767	crams/NWD697767.recab.cram
 66 | NWD704221	crams/NWD704221.recab.cram
 67 | NWD714003	crams/NWD714003.recab.cram
 68 | NWD716220	crams/NWD716220.recab.cram
 69 | NWD725484	crams/NWD725484.recab.cram
 70 | NWD742333	crams/NWD742333.recab.cram
 71 | NWD746396	crams/NWD746396.recab.cram
 72 | NWD754590	crams/NWD754590.recab.cram
 73 | NWD759405	crams/NWD759405.recab.cram
 74 | NWD760327	crams/NWD760327.recab.cram
 75 | NWD761329	crams/NWD761329.recab.cram
 76 | NWD762682	crams/NWD762682.recab.cram
 77 | NWD763972	crams/NWD763972.recab.cram
 78 | NWD768309	crams/NWD768309.recab.cram
 79 | NWD768493	crams/NWD768493.recab.cram
 80 | NWD769626	crams/NWD769626.recab.cram
 81 | NWD778759	crams/NWD778759.recab.cram
 82 | NWD790783	crams/NWD790783.recab.cram
 83 | NWD791319	crams/NWD791319.recab.cram
 84 | NWD805667	crams/NWD805667.recab.cram
 85 | NWD812009	crams/NWD812009.recab.cram
 86 | NWD821054	crams/NWD821054.recab.cram
 87 | NWD831422	crams/NWD831422.recab.cram
 88 | NWD832275	crams/NWD832275.recab.cram
 89 | NWD841343	crams/NWD841343.recab.cram
 90 | NWD842401	crams/NWD842401.recab.cram
 91 | NWD855893	crams/NWD855893.recab.cram
 92 | NWD866959	crams/NWD866959.recab.cram
 93 | NWD875673	crams/NWD875673.recab.cram
 94 | NWD881320	crams/NWD881320.recab.cram
 95 | NWD886731	crams/NWD886731.recab.cram
 96 | NWD897509	crams/NWD897509.recab.cram
 97 | NWD901849	crams/NWD901849.recab.cram
 98 | NWD905240	crams/NWD905240.recab.cram
 99 | NWD909656	crams/NWD909656.recab.cram
100 | NWD910953	crams/NWD910953.recab.cram
101 | NWD918554	crams/NWD918554.recab.cram
102 | NWD929194	crams/NWD929194.recab.cram
103 | NWD952432	crams/NWD952432.recab.cram
104 | NWD953198	crams/NWD953198.recab.cram
105 | NWD967078	crams/NWD967078.recab.cram
106 | NWD968809	crams/NWD968809.recab.cram
107 | NWD991001	crams/NWD991001.recab.cram
108 | 


--------------------------------------------------------------------------------
/king/MathLu.cpp:
--------------------------------------------------------------------------------
  1 | #include "MathLu.h"
  2 | #include "Error.h"
  3 | 
  4 | #include <math.h>
  5 | 
  6 | LU::~LU()
  7 |    { }
  8 | 
  9 | void LU::Decompose(Matrix & a)
 10 |    {
 11 |    Vector vv;           // stores the implict scaling of each row
 12 | 
 13 |    if (a.rows != a.cols)
 14 |       error("LU.Decompose: Matrix %s is not square", (const char *) a.label);
 15 | 
 16 |    lu.Copy(a);
 17 |    vv.Dimension(lu.rows);
 18 |    d = 1.0;
 19 | 
 20 |    permutation.Dimension(lu.rows);
 21 | 
 22 |    // loop over rows to get implicit scaling information
 23 |    for (int i = 0; i < lu.rows; i++)
 24 |       {
 25 |       double big = 0.0, temp;
 26 |       for (int j = 0; j < lu.rows; j++)
 27 |          if ( (temp = fabs(lu[i][j])) > big) big = temp;
 28 |       if (big == 0.0)
 29 |          error ("LU.Decompose: Matrix %s is singular", (const char *) a.label);
 30 |       vv[i] = 1.0 / big;
 31 |       }
 32 | 
 33 |    // Loop over columns as per Crout's method
 34 |    for (int j=0; j < lu.rows; j++)
 35 |       {
 36 |       // Uij = aij - Sum(1 to i - 1)[Lik*Uik]
 37 |       for (int i=0; i < j; i++)
 38 |          {
 39 |          double sum = lu[i][j];
 40 |          for (int k=0; k < i; k++)
 41 |             sum -= lu[i][k] * lu[k][j];
 42 |          lu[i][j] = sum;
 43 |          }
 44 | 
 45 |       // find the pivot element
 46 |       double big = 0.0;
 47 |       int    imax;
 48 | 
 49 |       // and compute Lij = 1/Ujj * { aij - Sum(1 to j - 1)[Lik*Uik] }
 50 |       for (int i = j; i < lu.rows; i++)
 51 |          {
 52 |          double sum = lu[i][j];
 53 |          for (int k = 0; k < j; k++)
 54 |             sum -= lu[i][k] * lu[k][j];
 55 |          lu[i][j] = sum;
 56 | 
 57 |          // check the figure of merit for this pivot
 58 |          double merit = vv[i] * fabs(sum);
 59 |          if (merit >= big)
 60 |             {
 61 |             big = merit;
 62 |             imax = i;
 63 |             }
 64 |          }
 65 | 
 66 |       // interchange rows if necessary
 67 |       if (j != imax)
 68 |          {
 69 |          lu.SwapRows(j, imax);
 70 |          d = -d;
 71 |          vv[imax] = vv[j];
 72 |          }
 73 | 
 74 |       permutation[j] = imax;
 75 | 
 76 |       if (lu[j][j] == 0.0)
 77 |           error("LU.Decompose: Matrix %s has zero pivot",(const char *)a.label);
 78 | 
 79 |       // finally divide by pivot element
 80 |       if (j != lu.rows - 1)
 81 |          {
 82 |          double scale = 1.0 / lu[j][j];
 83 |          for (int i = j + 1; i < lu.rows; i++)
 84 |             lu[i][j] *= scale;
 85 |          }
 86 |       }
 87 |    }
 88 | 
 89 | void LU::BackSubst(Vector & b)
 90 |    {
 91 |    x.Copy(b);
 92 | 
 93 |    // take into account the possibility that b starts with
 94 |    // a number of leading zeros (ie. for matrix inversion)
 95 | 
 96 |    int nonZero = -1, unscramble;
 97 | 
 98 |    // forward substitution with unscrambling of the permutation...
 99 |    for (int i = 0; i < lu.rows; i++)
100 |       {
101 |       unscramble = permutation[i];
102 |       double sum = x[unscramble];
103 |       x[unscramble] = x[i];
104 | 
105 |       if (nonZero != -1)
106 |          for (int j = nonZero; j <= i - 1; j++)
107 |             sum -= lu[i][j] * x[j];
108 |       else
109 |          if (sum)
110 |             nonZero = i;
111 |       x[i] = sum;
112 |       }
113 | 
114 |    // Now do the backsubstitution
115 |    for (int i = lu.rows - 1; i >= 0; i--)
116 |       {
117 |       double sum = x[i];
118 |       for (int j = i + 1; j < lu.rows; j++)
119 |          sum -= lu[i][j] * x[j];
120 |       x[i] = sum / lu[i][i];
121 |       }
122 |    }
123 | 
124 | void LU::Invert()
125 |    {
126 |    inv.Dimension(lu.rows, lu.rows);
127 | 
128 |    inv.Identity();
129 | 
130 |    for(int i = 0; i < lu.rows; i++)
131 |       {
132 |       BackSubst(inv[i]);
133 |       inv[i] = x;
134 |       }
135 |    }
136 | 
137 | double LU::Determinant()
138 |    {
139 |    double det = d;
140 | 
141 |    for (int i = 0; i < lu.rows; i++)
142 |       det *= lu[i][i];
143 | 
144 |    return det;
145 |    }
146 | 
147 | double LU::lnDeterminant()
148 |    {
149 |    bool minus_sign = d == -1;
150 |    double    lnDet = 0.0;
151 | 
152 |    for (int i = 0; i < lu.rows; i++)
153 |       if (lu[i][i] > 0)
154 |          lnDet += log(lu[i][i]);
155 |       else
156 |          {
157 |          lnDet += log(-lu[i][i]),
158 |          minus_sign == !minus_sign;
159 |          }
160 | 
161 |    if (minus_sign)
162 |       error("LU::lnDeterminant cannot log negative value\n");
163 | 
164 |    return lnDet;
165 |    }
166 | 
167 | 


--------------------------------------------------------------------------------
/king/MathFloatVector.h:
--------------------------------------------------------------------------------
  1 | #ifndef __MATHFLOATVECTOR_H__
  2 | #define __MATHFLOATVECTOR_H__
  3 | 
  4 | #include "StringBasics.h"
  5 | 
  6 | #include <stdio.h>
  7 | #include <assert.h>
  8 | 
  9 | class Matrix;
 10 | 
 11 | class FloatVector
 12 |    {
 13 |    public:
 14 |       int         dim, size;
 15 |       float *    data;
 16 |       String      label;
 17 | 
 18 |    FloatVector()
 19 |       { Init(); }
 20 |    FloatVector(FloatVector & v)
 21 |       { Init(); Copy(v); }
 22 |    FloatVector(int d)
 23 |       { Init(); Dimension(d); }
 24 |    FloatVector(const char * text)
 25 |       { Init(); label = text; }
 26 |    FloatVector(const char * text, int d)
 27 |       { Init(); label = text; Dimension(d); }
 28 |    FloatVector(const char * text, FloatVector & v)
 29 |       { Init(); label = text; Copy(v); }
 30 | 
 31 |    ~FloatVector();
 32 | 
 33 |    void   Dimension(int d);
 34 |    int    Length() const { return dim; }
 35 | 
 36 |    void   SetLabel(const char * text) { label = text; }
 37 | 
 38 |    void   Zero();
 39 |    void   Set(double k);
 40 |    void   Set(FloatVector & v) { Copy(v); };
 41 |    void   SetMultiple(double k, FloatVector & v);
 42 | 
 43 |    void   Negate();
 44 |    void   Add(double n);
 45 |    void   Multiply(double k);
 46 | 
 47 |    double InnerProduct(FloatVector & v);
 48 |    void   Copy(const FloatVector & v);
 49 |    void   Add(FloatVector & v);
 50 |    void   AddMultiple(double k, FloatVector & v);
 51 |    void   Subtract(FloatVector & v);
 52 | 
 53 |    void Product(Matrix & m, FloatVector & v);
 54 | 
 55 |    float & operator [] (int n)
 56 |       { assert(n < dim);  return data[n]; }
 57 |    float operator [] (int n) const
 58 |       { assert(n < dim);  return data[n]; }
 59 | 
 60 |    float operator [] (double fraction)
 61 |       { return data[(int) (dim * fraction)]; }
 62 |    float & operator [] (double fraction) const
 63 |       { return data[(int) (dim * fraction)]; }
 64 | 
 65 |    FloatVector & operator = (const FloatVector & v);
 66 |    bool operator == (const FloatVector & v) const;
 67 |    bool operator != (const FloatVector & v) const { return !(*this == v); }
 68 | 
 69 |    void Swap(int i, int j)
 70 |       { double swap = data[i]; data[i] = data[j]; data[j] = swap; }
 71 |    void Swap(FloatVector & rhs);
 72 | 
 73 |    FloatVector & operator *= (double rhs) { Multiply(rhs); return *this; }
 74 |    FloatVector & operator += (double rhs) { Add(rhs); return *this; }
 75 |    FloatVector & operator -= (double rhs) { return *this += -rhs; }
 76 |    FloatVector & operator /= (double rhs) { return *this *= 1/rhs; }
 77 | 
 78 |    void DeleteDimension (int n);
 79 |    void Delete(int n) { DeleteDimension(n); }
 80 |    void Insert(int n, double value);
 81 | 
 82 |    // Calculates average and variance
 83 |    void   AveVar(double & ave, double & var) const;
 84 |    double Average() const;
 85 |    double Var() const;
 86 | 
 87 |    // Common descriptive functions
 88 |    double Sum() const;
 89 |    double SumSquares() const;
 90 |    double Product() const;
 91 | 
 92 |    // Find extreme values
 93 |    double Min() const;
 94 |    double Max() const;
 95 | 
 96 |    // Return the number of elements in a subset
 97 |    int  CountIfGreater(double treshold) const;
 98 |    int  CountIfGreaterOrEqual(double treshold) const;
 99 | 
100 |    // Append another vector to the end
101 |    void Stack(const FloatVector & v);
102 | 
103 |    void Print(int maxDim = -1) { Print(stdout, maxDim); }
104 |    void Print(FILE * output, int maxDim = -1);
105 | 
106 |    // Routines for creating and searching through sorted vectors
107 |    void Sort();
108 |    void Reverse();
109 |    void Sort(FloatVector & freeRider);
110 |    int  BinarySearch(double element);
111 | 
112 |    // Remove consecutive duplicate elements from FloatVector
113 |    void RemoveDuplicates();
114 | 
115 |    // Query first and last elements
116 |    //
117 | 
118 |    float & First() { return data[0]; }
119 |    float & Last()  { return data[dim - 1]; }
120 | 
121 |    // Routines for using a vector as a stack of doubles
122 |    //
123 | 
124 |    void   Clear()      { dim = 0; }
125 |    void   Push(double value);
126 |    double Pop()        { return data[--dim]; }
127 |    double Peek() const { return data[dim-1]; }
128 | 
129 |    // This routine adds items to a sorted list
130 |    //
131 | 
132 |    void   InsertInSortedList(int item);
133 | 
134 |    bool   isAscending();
135 |    bool   isDescending();
136 | 
137 |    // Routines for dealing with vectors that include missing data
138 |    //
139 | 
140 |    int SafeCount() const;
141 |    double SafeMin() const;
142 |    double SafeMax() const;
143 | 
144 |    private:
145 |       static int CompareFloat(const float * a, const float * b);
146 |       void Init();
147 |    };
148 | 
149 | #endif
150 | 
151 | 
152 | 
153 | 


--------------------------------------------------------------------------------
/king/PedigreeAlleles.h:
--------------------------------------------------------------------------------
  1 | ////////////////////////////////////////////////////////////////////// 
  2 | // libsrc/PedigreeAlleles.h 
  3 | // (c) 2000-2007 Goncalo Abecasis
  4 | // 
  5 | // This file is distributed as part of the MERLIN source code package   
  6 | // and may not be redistributed in any form, without prior written    
  7 | // permission from the author. Permission is granted for you to       
  8 | // modify this file for your own personal use, but modified versions  
  9 | // must retain this copyright notice and must not be distributed.     
 10 | // 
 11 | // Permission is granted for you to use this file to compile MERLIN.    
 12 | // 
 13 | // All computer programs have bugs. Use this file at your own risk.   
 14 | // 
 15 | // Tuesday December 18, 2007
 16 | // 
 17 |  
 18 | #ifndef __PEDALLELES_H__
 19 | #define __PEDALLELES_H__
 20 | 
 21 | #include "LongInt.h"
 22 | 
 23 | class Alleles{
 24 |    public:
 25 |       char geno;
 26 |       Alleles(){ geno = 0; }
 27 | 
 28 |       char operator [] (int i)
 29 |       { return (i == 1) ? (geno&15) : (geno>>4);}
 30 | 
 31 |       void AssignGenotype(int G1, int G2)
 32 |       {geno = char(G1 + (G2<<4));}
 33 | 
 34 |       void AssignGenotype(int G)
 35 |       {geno = char(G);}
 36 | 
 37 |       // is the genotype fully defined?
 38 |       bool isKnown(){ return geno != 0; }
 39 |       bool isHeterozygous()
 40 |          { return isKnown() && ((geno&15) != (geno>>4)); }
 41 |       bool isHomozygous()
 42 |          { return isKnown() && ((geno&15) == (geno>>4)); }
 43 |       bool hasAllele(int a)
 44 |          { return ((geno&15) == a) || ((geno>>4) == a); }
 45 | 
 46 |       // in a bi-allelic system (a, NOT a)
 47 |       bool isHeterozygousFor(int a){ return isHeterozygous() && hasAllele(a); }
 48 |       bool isHomozygousFor(int a){ return !(isHeterozygousFor(a)); }
 49 | 
 50 |       // how may alleles a in this genotype?
 51 |       int countAlleles(int a)
 52 |          { return (((geno&15) == a) ? 1 : 0) + (((geno>>4) == a) ? 1 : 0); }
 53 | 
 54 |       // what is the other allele, assuming genotype is (a, X)
 55 |       int otherAllele(int a)
 56 |          { return (((geno&15) == a) ? (geno>>4) : (geno&15)); }
 57 | 
 58 |       // are two unordered genotypes identical?
 59 |       int identicalTo(Alleles & al)
 60 |          { return (al.geno == geno) ||
 61 |                   ((al[2]>>4)+(al[1]<<4) == geno);}
 62 | 
 63 |       // how many alleles are identical by state
 64 |       int countIBS(Alleles & al)
 65 |          { return  ((geno&15) == al[1]) ?
 66 |                     (((geno>>4) == al[2]) ? 2 : 1) :
 67 |                   (  ((geno&15) == al[2]) ?
 68 |                     (((geno>>4) == al[1]) ? 2 : 1) :
 69 |                    ((((geno>>4) == al[1]) || ((geno>>4) == al[2])) ? 1 : 0));
 70 |          }
 71 | 
 72 |       int operator == (Alleles & rhs) { return identicalTo(rhs); }
 73 |       int operator != (Alleles & rhs) { return !identicalTo(rhs); }
 74 | 
 75 |       char Hi()
 76 |          { return (geno&15) > (geno>>4) ? (geno&15) : (geno>>4); }
 77 |       char Lo()
 78 |          { return (geno&15) > (geno>>4) ? (geno>>4) : (geno&15); }
 79 | 
 80 |       int SequenceCoded()
 81 |          { return isKnown() ? Hi() * (Hi() - 1) / 2 + Lo() : 0; }
 82 | 
 83 |       longint BinaryCoded()
 84 |          {
 85 |          if (isKnown())
 86 |             {
 87 |             longint allele1(1);
 88 |             longint allele2(1);
 89 | 
 90 |             allele1 <<= (geno&15) - 1;
 91 |             allele2 <<= (geno>>4) - 1;
 92 | 
 93 |             return allele1 | allele2;
 94 |             }
 95 |          else
 96 |             return NOTZERO;
 97 |          }
 98 | 
 99 |       void Intersect(Alleles & gen)
100 |          {
101 |          char a1 = Lo(), a2 = Hi();
102 |          char b1 = gen.Lo(), b2 = gen.Hi();
103 | 
104 |          if (a1 == b1 && a2 == b2)
105 |             return;
106 |          if (a1 == b1 || a1 == b2)
107 |             geno = (a1<<4) + a1;
108 |          else if (a2 == b1 || a2 == b2)
109 |             geno = (a2<<4) + a2;
110 |          else
111 |             geno = 0;
112 |          }
113 |                 
114 |       void Intersect(char allele)
115 |          {
116 |          if ((geno&15) != allele && (geno>>4) != allele)
117 |             geno = 0;
118 |          else
119 |             geno = (allele << 4) + allele;
120 |          }
121 | 
122 |       bool AddAllele(char allele)
123 |          {
124 |          if ((geno&15) == allele || (geno>>4) == allele)
125 |             return true;
126 | 
127 |          if ((geno&15) != 0 && (geno>>4) != 0)
128 |             return false;
129 | 
130 |          if ((geno&15) == 0) geno |= allele; else geno |= (allele>>4);
131 |          return true;
132 |          }
133 | 
134 |       void Wipe() {geno=0;}
135 |    };
136 | 
137 | #endif
138 | 
139 | 
140 | 


--------------------------------------------------------------------------------
/scripts/e04-filter-vars.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | 
 3 | use strict;
 4 | 
 5 | my $chr = $ARGV[2];
 6 | 
 7 | my $milk = $ARGV[1]; #"fixed0/milk.filt/milk.chr$chr.merged.sites.vcf.gz";
 8 | my $svm  = $ARGV[0]; #"analysis/filt/ld/frz9/svm.hm3ld.fmis10/frz9.milk_svm.hm3ld_fmis10.chr$chr.sites.vcf.gz";
 9 | my $outprefix  = $ARGV[3]; #"fixed0/svm.ld.fmis/frz9.milk_svm.release.chr$chr.sites";
10 | my $vcfsummary2 = "$ENV{'EXE_PREFIX'}/apigenome/bin/vcf-summary-v2";
11 | my $ref = "resources/ref/hs38DH.fa";
12 | my $dbsnp = "resources/ref/dbsnp_142.b38.vcf.gz";
13 | my @posVcfs = qw(resources/ref/hapmap_3.3.b38.sites.vcf.gz resources/ref/1000G_omni2.5.b38.sites.PASS.vcf.gz);
14 |     
15 | open(SVM,"zcat $svm|") || die "Cannot open file\n";
16 | open(MILK,"zcat $milk | grep -v ^# |") || die "Cannot open file\n";
17 | open(OUT1," | $ENV{'EXE_PREFIX'}/htslib/bgzip -c > $outprefix.vcf.gz") || die "Cannot open file\n";
18 | open(OUT2, "| $vcfsummary2 --ref $ref --db $dbsnp --FNRvcf $posVcfs[0] --chr $chr --tabix $ENV{'EXE_PREFIX'}/htslib/tabix --bgzip $ENV{'EXE_PREFIX'}/htslib/bgzip > $outprefix.summary_v2") || die "Cannot open file\n";
19 | while(<SVM>) {
20 |     if ( /^#/ ) {
21 | 	next if ( /^##FILTER=<ID=DISC,/ );	
22 | 	print OUT1 $_;
23 |         #if ( /^##INFO=<ID=LOG_HM3_DIST_08/ ) {
24 | 	if ( /^##INFO=<ID=FMIS/ ) {
25 | 	    print OUT1 "##INFO=<ID=DUP_NH_ALL,Number=1,Type=Integer,Description=\"Number of duplicate pairs where both genotype depths are 10+ and not identical homozygotes\">\n";
26 | 	    print OUT1 "##INFO=<ID=DUP_NH_DIS,Number=1,Type=Integer,Description=\"Number of discordant duplicate genotype pairs where both depths are 10+ and not identical homozygotes\">\n";
27 | 	    print OUT1 "##INFO=<ID=TRI_NH_ALL,Number=1,Type=Integer,Description=\"Number of trios where all genotype depths are 10+, mendelian-informative, and not identical homozygotes\">\n";
28 | 	    print OUT1 "##INFO=<ID=TRI_NH_DIS,Number=1,Type=Integer,Description=\"Number of mendelian-discordant trios where all genotype depths are 10+, mendelian-informative, and not identical homozygotes\">\n";
29 | 	    print OUT1 "##FILTER=<ID=DUP2,Description=\"Duplicate genotype discordance is greater than 2%, with at least two discordances\">\n";
30 | 	    print OUT1 "##FILTER=<ID=TRI2,Description=\"Mendelian genotype discordance is greater than 2%, with at least two discordances\">\n";
31 | 	    print OUT1 "##FILTER=<ID=MIS2,Description=\"Genotype missing rate at depth 10 is greater than 2%\">\n";	    
32 | 	}
33 |     }
34 |     else {
35 | 	my @F = split(/[\t\r\n]/);
36 | 	my @M = split(/[\t\r\n]/,<MILK>);
37 | 	die unless ( $F[1] == $M[1] );
38 | 
39 | 	#print STDERR "Processing $F[0]:$F[1]\n" if ( $. % 1000000 == 0 );
40 | 
41 | 	my ($dup,$trio) = ($1,$2) if ( $M[7] =~ /;DUP_CONC_THRES=([^;]+);.*;TRIO_CONC_THRES=([^;]+)/ );
42 | 	my $fmis = $1 if ( $F[7] =~ /;FMIS10=([^;]+)/ );
43 | 	my @dups = split(/,/,$dup);
44 | 	my @trios = split(/,/,$trio);
45 | 
46 | 	my $duphet  = $dups[4];
47 | 	my $dupdisc = $dups[1]+$dups[2]+$dups[3]+$dups[5]+$dups[6]+$dups[7];
48 | 	my $dupnhom = $duphet + $dupdisc;
49 | 	my $dupnref  = $dupnhom + $dups[8];
50 | 	my $dupall   = $dupnref + $dups[0];
51 | 	my $dupFilt = ( ($dupdisc > 1 ) && ( $dupdisc > 0.02*$dupnhom) ) ? 1 : 0;
52 | 
53 | 	## 0,1,2    : 0,0,0 C  0,0,1 D  0,0,2 D
54 | 	## 3,4,5    : 0,1,0 A  0,1,1 A  0,1,2 D
55 | 	## 6,7,8    : 0,2,0 D  0,2,1 C  0,2,2 D
56 | 	## 9,10,11  : 1,0,0 A  1,0,1 A  1,0,2 D
57 | 	## 12,13,14 : 1,1,0 A  1,1,1 A  1,1,2 A
58 | 	## 15,16,17 : 1,2,0 D  1,2,1 A  1,2,2 A
59 | 	## 18,19,20 : 2,0,0 D  2,0,1 C  2,0,2 D
60 | 	## 21,22,23 : 2,1,0 D  2,1,1 A  2,1,2 A
61 | 	## 24,25,26 : 2,2,0 D  2,2,1 D  2,2,2 C 
62 | 	my @idxD = (1,2,5,6,8,11,15,18,20,21,24,25);
63 | 	my @idxC = (0,7,19,26);
64 | 	my ($trioconc,$triodisc) = (0,0);
65 | 	foreach my $i (@idxD) { $triodisc += $trios[$i]; }
66 | 	foreach my $i (@idxC) { $trioconc += $trios[$i]; }
67 | 	
68 | 	my $trioall = $trioconc + $triodisc;
69 | 	my $trionref = $trioall - $trios[0];
70 | 	my $trionhom = $trionref - $trios[26];
71 | 	my $triFilt = ( ($triodisc > 1 ) && ( $triodisc > 0.02*$trionhom) ) ? 1 : 0;
72 | 	my $misFilt = ( $fmis > 0.02 ) ? 1 : 0;
73 | 	
74 | 	my @filts = split(/;/,$F[6]);
75 | 	my @newfilts = ();
76 | 	foreach my $f (@filts) {
77 | 	    push(@newfilts,$f) if ( ( $f ne "PASS" ) && ( $f ne "DISC" ) );
78 | 	}
79 | 	push(@newfilts,"DUP2") if ( $dupFilt == 1 );
80 | 	push(@newfilts,"TRI2") if ( $triFilt == 1 );	
81 | 	push(@newfilts,"MIS2") if ( $misFilt == 1 );
82 | 	push(@newfilts,"PASS") if ( $#newfilts < 0 );
83 | 
84 | 	$F[7] =~ s/;SVM=/;DUP_NH_ALL=$dupnhom;DUP_NH_DIS=$dupdisc;TRI_NH_ALL=$trionhom;TRI_NH_DIS=$triodisc;SVM=/;
85 | 	print OUT1 join("\t",@F[0..5],join(";",@newfilts),$F[7])."\n";
86 | 	print OUT2 join("\t",@F[0..5],join(";",@newfilts),$F[7])."\n";	
87 |     }
88 | }
89 | close OUT1;
90 | close OUT2;
91 | close MILK;
92 | close SVM;
93 | 
94 | print `$ENV{'EXE_PREFIX'}/htslib/tabix -f -pvcf $outprefix.vcf.gz`;
95 | 


--------------------------------------------------------------------------------
/king/OptimizerConstraints.cpp:
--------------------------------------------------------------------------------
  1 | #include "OptimizerConstraints.h"
  2 | 
  3 | #include <math.h>
  4 | 
  5 | #define   CONSTRAIN_NONE    0
  6 | #define   CONSTRAIN_MIN     1
  7 | #define   CONSTRAIN_MAX     2
  8 | #define   CONSTRAIN_RANGE   3
  9 | 
 10 | void OptimizerInterface::Dimension(int parameters)
 11 |    {
 12 |    point.Dimension(parameters);
 13 |    min.Dimension(parameters);
 14 |    max.Dimension(parameters);
 15 | 
 16 |    constraints.Dimension(parameters);
 17 |    constraints.Zero();
 18 |    }
 19 | 
 20 | void OptimizerInterface::SetMin(int parameter, double value)
 21 |    {
 22 |    constraints[parameter] |= CONSTRAIN_MIN;
 23 |    min[parameter] = value;
 24 |    }
 25 | 
 26 | void OptimizerInterface::SetMax(int parameter, double value)
 27 |    {
 28 |    constraints[parameter] |= CONSTRAIN_MAX;
 29 |    max[parameter] = value;
 30 |    }
 31 | 
 32 | void OptimizerInterface::SetRange(int parameter, double MIN, double MAX)
 33 |    {
 34 |    constraints[parameter] = CONSTRAIN_RANGE;
 35 |    min[parameter] = MIN;
 36 |    max[parameter] = MAX;
 37 |    }
 38 | 
 39 | void OptimizerInterface::Fix(int parameter, double value)
 40 |    {
 41 |    constraints[parameter] = CONSTRAIN_RANGE;
 42 |    min[parameter] = max[parameter] = value;
 43 |    }
 44 | 
 45 | void OptimizerInterface::ClearConstraints(int parameter)
 46 |    {
 47 |    constraints[parameter] = CONSTRAIN_NONE;
 48 |    }
 49 | 
 50 | void OptimizerInterface::ClearConstraints()
 51 |    {
 52 |    constraints.Zero();
 53 |    }
 54 | 
 55 | void OptimizerInterface::SetObjectiveFunction(ObjectiveFunction & function)
 56 |    {
 57 |    f = &function;
 58 |    }
 59 | 
 60 | double OptimizerInterface::Evaluate(Vector & vector)
 61 |    {
 62 |    Translate(vector, point);
 63 | 
 64 |    return f->Evaluate(point);
 65 |    }
 66 | 
 67 | void OptimizerInterface::Translate(Vector & unconstrained, Vector & constrained)
 68 |    {
 69 |    constrained.Dimension(constraints.Length());
 70 | 
 71 |    for (int i = 0, j = 0; i < constraints.Length(); i++)
 72 |       switch (constraints[i])
 73 |          {
 74 |          case CONSTRAIN_NONE :
 75 |             constrained[i] = unconstrained[j++];
 76 |             break;
 77 |          case CONSTRAIN_MIN :
 78 |             constrained[i] = min[i] + exp(unconstrained[j++]);
 79 |             break;
 80 |          case CONSTRAIN_MAX :
 81 |             constrained[i] = max[i] - exp(unconstrained[j++]);
 82 |             break;
 83 |          case CONSTRAIN_RANGE :
 84 |             if (min[i] == max[i])
 85 |                constrained[i] = min[i];
 86 |             else
 87 |                {
 88 |                double x = unconstrained[j++];
 89 | 
 90 |                if (x >= 36)
 91 |                   constrained[i] = max[i];
 92 |                else
 93 |                   constrained[i] = min[i] + (max[i] - min[i]) * exp(x) / (1 + exp(x));
 94 |                }
 95 |          }
 96 |    }
 97 | 
 98 | void OptimizerInterface::BackTranslate(Vector & constrained, Vector & unconstrained)
 99 |    {
100 |    unconstrained.Dimension(constraints.Length());
101 | 
102 |    int j = 0;
103 |    for (int i = 0; i < constraints.Length(); i++)
104 |       switch (constraints[i])
105 |          {
106 |          case CONSTRAIN_NONE :
107 |             unconstrained[j++] = constrained[i];
108 |             break;
109 |          case CONSTRAIN_MIN :
110 |             assert(constrained[i] >= min[i]);
111 |             unconstrained[j++] = log(constrained[i] + min[i] + 1e-16);
112 |             break;
113 |          case CONSTRAIN_MAX :
114 |             assert(constrained[i] <= max[i]);
115 |             unconstrained[j++] = exp(max[i] - constrained[i] + 1e-16);
116 |             break;
117 |          case CONSTRAIN_RANGE :
118 |             if (min[i] == max[i])
119 |                assert(constrained[i] == min[i]);
120 |             else
121 |                {
122 |                assert(constrained[i] >= min[i]);
123 |                assert(constrained[i] <= max[i]);
124 | 
125 |                double x = (constrained[i] - min[i]) / (max[i] - min[i]);
126 | 
127 |                if (x >= 0.999999999) x = 0.999999999;
128 |                if (x <= 1e-16)       x = 1e-16;
129 | 
130 |                unconstrained[j++] = log(x/(1-x));
131 |                }
132 |          }
133 | 
134 |    unconstrained.Dimension(j);
135 |    }
136 | 
137 | int OptimizerInterface::CountParameters()
138 |    {
139 |    return constraints.Length();
140 |    }
141 | 
142 | int OptimizerInterface::CountFreeParameters()
143 |    {
144 |    int parameters = constraints.Length();
145 | 
146 |    for (int i = 0; i < constraints.Length(); i++)
147 |       if (constraints[i] == CONSTRAIN_RANGE && min[i] == max[i])
148 |          parameters--;
149 | 
150 |    return parameters;
151 |    }
152 | 


--------------------------------------------------------------------------------
/king/BrentC.cpp:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <stdlib.h>
  3 | #include <stdio.h>
  4 | #include "analysis.h"
  5 | 
  6 | /// <summary>Brent's method for minimizing a 1d function</summary>
  7 | // Machine eps
  8 | double Engine::MACHEPS = (double)2.2204460492503131e-016;
  9 | double Engine::MACHEPS_SQRT = sqrt(MACHEPS);
 10 | double Engine::cbrent = ((double)3.0 - sqrt((double)5.0)) / (double)2.0;
 11 |    
 12 | /// <summary>Minimize the function over the interval [a, b]</summary>
 13 | /// <param name="f">Function to minimize</param>
 14 | /// <param name="a">Left side of the bracket</param>
 15 | /// <param name="b">Right side of the bracket</param>
 16 | /// <param name="eps">Stopping tolerance</param>
 17 | /// <param name="funcx">Function evaluated at the minimum</param>
 18 | /// <param name="numiter">number of function evaluations</param>
 19 | /// <param name="maxIter">maximum number of function evaluations allowed</param>
 20 | /// <param name="quiet">print out function evaluations?</param>
 21 | /// <returns>Point that minimizes the function</returns>
 22 | /// <remarks>This implements the algorithm from Brent's book, "Algorithms for Minimization without Derivatives"</remarks>
 23 | //double BrentC::minimize(BrentFunctor &f, double a, double b, double eps, double &funcx, size_t &numiter, size_t maxIter, bool quiet)
 24 | double Engine::minimize(double a, double b, double eps, double &funcx, int &numiter, int maxIter, bool quiet)
 25 | {
 26 |    if (a >= b){
 27 |       printf("Exception: a must be < b");
 28 |       throw(1);
 29 |    }
 30 | 
 31 |    double x = a + cbrent * (b - a);
 32 |    double v = x;
 33 |    double w = x;
 34 |    double e = 0;
 35 | 
 36 |    double fx = fLL(x);
 37 |    double fv = fx;
 38 |    double fw = fx;
 39 | 
 40 |    numiter = 0;
 41 | 
 42 |    while (true)
 43 |    {
 44 |       double m = (double)0.5 * (a + b);
 45 |       double tol = MACHEPS_SQRT * abs(x) + eps;
 46 |       double tol2 = (double)2 * tol;
 47 | 
 48 |       // Check the stopping criterion
 49 |       if (abs(x - m) <= tol2 - 0.5 * (b - a)){ break; }
 50 | 
 51 |       // Stop if we've exceeded the maximum number of iterations
 52 |       numiter++;
 53 |       if (numiter > maxIter){
 54 |          printf("Exception: Exceeded maximum number of iterations.");
 55 |          throw(2);
 56 |       }
 57 |       double p = 0.0, q = 0.0, r = 0.0;
 58 |       double d = 0.0;
 59 |       double u = 0.0;
 60 | 
 61 |       if (abs(e) > tol)
 62 |       {
 63 |          // Fit parabola
 64 |          r = (x - w) * (fx - fv);
 65 |          q = (x - v) * (fx - fw);
 66 |          p = (x - v) * q - (x - w)*r;
 67 |          q = (double)2.0 * (q - r);
 68 |          if (q > (double)0.0)
 69 |             p = -p;
 70 |          else
 71 |             q = -q;
 72 |          r = e;
 73 |          e = d;
 74 |       }
 75 | 
 76 |       if ((abs(p) < abs((double)0.5*q*r)) && (p < q*(a-x)) && (p < q*(b-x)))
 77 |       {
 78 |          // Parabolic interpolation step
 79 |          d = p / q;
 80 |          u = x + d;
 81 |          // f must not be evaluated too close to a or b
 82 |          if (u - a < tol2 || b - u < tol2)
 83 |             d = (x < m) ? tol : -tol;
 84 |       }
 85 |       else
 86 |       {
 87 |          // Golden section step
 88 |          e = (x < m) ? b - x : a - x;
 89 |          d = cbrent * e;
 90 |       }
 91 | 
 92 |       // f must not be evaluated too close to x
 93 |       if (abs(d) >= tol)
 94 |          u = x + d;
 95 |       else if (d > 0.0)
 96 |          u = x + tol;
 97 |       else
 98 |          u = x - tol;
 99 |       double fu = fLL(u);
100 | 
101 |       // Update
102 |       if (fu <= fx)
103 |       {
104 |          if (u < x)
105 |             b = x;
106 |          else
107 |             a = x;
108 |          v = w; fv = fw;
109 |          w = x; fw = fx;
110 |          x = u; fx = fu;
111 |       }
112 |       else
113 |       {
114 |          if (u < x)
115 |             a = u;
116 |          else
117 |             b = u;
118 | 
119 |          if (fu <= fw || w == x)
120 |          {
121 |             v = w; fv = fw;
122 |             w = u; fw = fu;
123 |          }
124 |          else if (fu <= fv || v == x || v == w)
125 |          {
126 |             v = u; fv = fu;
127 |          }
128 |       }
129 | 
130 |       if ( !quiet ){
131 |          printf("Iteration %d, min_x = %.4lf, f(min_x) = %.4lf\n", numiter, x, fx);
132 | 
133 |       /*
134 | #if defined( _MSC_VER )    // Windows/VC uses a %Iu specifier for size_t
135 |             const char *szFmt1 = "Iteration %Iu, min_x = %f, f(min_x) = %f, ";
136 | #else                      // Linux/g++ uses a %zu specifier for size_t
137 |             const char *szFmt1 = "Iteration %zu, min_x = %f, f(min_x) = %f, ";
138 | #endif
139 |          printf( szFmt1, numiter, x, fx );
140 |          */
141 |       }
142 |    }
143 |    funcx = fLL(x);
144 |    return x;
145 | }
146 | 
147 | 


--------------------------------------------------------------------------------
/king/MathGenMin.h:
--------------------------------------------------------------------------------
  1 | #ifndef __MATHPOWELL_H__
  2 | #define __MATHPOWELL_H__
  3 | 
  4 | #include "MathGold.h"
  5 | #include "MathVector.h"
  6 | #include "MathMatrix.h"
  7 | #include "Random.h"
  8 | 
  9 | // Multidimensional minimization of a continuous function
 10 | // starting with a user supplied starting point and
 11 | // direction vector
 12 | //
 13 | class GeneralMinimizer
 14 |    {
 15 |    public:
 16 |       VectorFunc * func;               // Function to be minimized
 17 |       Matrix   directions;
 18 |       Vector   point;
 19 |       double   fmin;
 20 | 
 21 |       // Setup matrices assuming ndim point
 22 |       virtual void   Reset(int ndim, double scale = 1.0);
 23 | 
 24 |       // Find a minimum using direction set and starting point
 25 |       virtual double Minimize(double ftol = TOL) = 0;
 26 | 
 27 |       GeneralMinimizer();
 28 |       virtual ~GeneralMinimizer() { }
 29 | 
 30 |       double f(Vector & v)
 31 |          { return func->Evaluate(v); }
 32 | 
 33 |       void df(Vector & v, Vector & d, double scale = 1.0)
 34 |          { func->Derivative(v, d, scale); }
 35 |    };
 36 | 
 37 | // Powell's conjugate direction method
 38 | // After each round, the direction of largest decrease is replaces
 39 | // its biggest component among the original directions
 40 | //
 41 | 
 42 | class PowellMinimizer : public GeneralMinimizer
 43 |    {
 44 |    public:
 45 |       int iter;
 46 | 
 47 |       virtual ~PowellMinimizer() { }
 48 |       virtual double Minimize(double ftol = TOL);
 49 |    };
 50 | 
 51 | 
 52 | // Simulated annealing using simplex method of Nelder and Mead
 53 | //
 54 | class SAMinimizer : public GeneralMinimizer
 55 |    {
 56 |    public:
 57 |       int      iter;
 58 |       bool     freeRand;
 59 |       Random * rand;
 60 | 
 61 |       Vector   y;                   // evaluation of entropy at y
 62 |       Matrix   simplex;             // volume in n dimensions (n+1) points
 63 | 
 64 |       SAMinimizer();
 65 |       SAMinimizer(Random & rand);
 66 | 
 67 |       virtual ~SAMinimizer();
 68 | 
 69 |       virtual void   Reset(int ndim, double scale = 1.0);
 70 | 
 71 |       // Lowers temperature T from maxT to minT in Tcycles linear decay cycles
 72 |       // Titer iterations at each temperature
 73 |       virtual double Minimize(double ftol = TOL);
 74 |       double MinimizeLoop(double ftol = TOL);
 75 | 
 76 |       double   T, maxT, minT;      // Temperature
 77 |       int      Tcycles, Titer;     // Cycling parameters
 78 | 
 79 |    private:
 80 |       Vector psum;
 81 |       Vector ptry;
 82 |       double yhi;
 83 | 
 84 |       void   Constructor();
 85 |       double Amoeba(int ihi, double factor);
 86 |    };
 87 | 
 88 | // Multidimensional minimization of a continuous function by
 89 | // the down-hill simplex method of Nelder and Mead
 90 | // (Computer Journal 1965)
 91 | //
 92 | class AmoebaMinimizer : public GeneralMinimizer
 93 |    {
 94 |    public:
 95 |       Matrix      simplex;
 96 |       long        cycleCount, cycleMax;       // number of function evaluations
 97 | 
 98 |       AmoebaMinimizer();
 99 |       virtual ~AmoebaMinimizer() { }
100 | 
101 |       virtual void Reset(int dimensions, double scale = 1.0);
102 |       virtual double Minimize(double ftol = TOL);
103 | 
104 |    private:
105 |       Vector psum, ptry, y;
106 | 
107 |       double Amoeba(int ihi, double factor);
108 |    };
109 | 
110 | // Differential Evolution minimizer
111 | // A stochastic minimizer based on the algorithm of Storn and Price, 1996
112 | 
113 | class EvolutionaryMinimizer : public GeneralMinimizer
114 |    {
115 |    public:
116 |       Matrix   points;
117 |       Vector   y;
118 | 
119 |       double crossover;       // This is the CR parameter of Storn and Price
120 |       double step_size;       // This is the L parameter of Storn and Price
121 |       int    multiples;       // The NP paraemter of Storn and Price will be dimensions * multiple
122 | 
123 |       Random * rand;
124 | 
125 |       bool   generate_random_points;
126 | 
127 |       int    generations;
128 |       int    max_generations;
129 | 
130 |       EvolutionaryMinimizer();
131 |       EvolutionaryMinimizer(Random & randomSeries);
132 | 
133 |       ~EvolutionaryMinimizer() { }
134 | 
135 |       virtual void Reset(int dimensions, double scale = 1.0);
136 |       virtual double Minimize(double ftol = TOL);
137 | 
138 |    private:
139 |       void Init(Random & randomSeries);
140 |    };
141 | 
142 | // Conjugate gradient minimizer
143 | // Polak-Ribiere improvement on Fletcher-Reeves algorithm for
144 | // multidimensional minimization.
145 | //
146 | 
147 | class FletcherMinimizer : public GeneralMinimizer
148 |    {
149 |    public:
150 |       int iter;
151 | 
152 |      FletcherMinimizer() { }
153 | 
154 |       virtual void    Reset(int dimensions, double scale = 1.0);
155 |       virtual double  Minimize(double ftol = TOL);
156 | 
157 |    private:
158 |       Vector g, h;
159 |    };
160 | 
161 | #endif
162 | 
163 | 
164 | 


--------------------------------------------------------------------------------
/king/MathNormal.h:
--------------------------------------------------------------------------------
  1 | #ifndef __NORMALEQUATIONS_H__
  2 | #define __NORMALEQUATIONS_H__
  3 | 
  4 | #include "IntArray.h"
  5 | #include "MathMatrix.h"
  6 | #include "MathVector.h"
  7 | #include "MathCholesky.h"
  8 | 
  9 | #define NORMAL_AMOEBA_MIN    0
 10 | #define NORMAL_POWELL_MIN    1
 11 | #define NORMAL_FLETCHER_MIN  2
 12 | 
 13 | class NormalEquations
 14 |    {
 15 |    public:
 16 |       Vector   means, variances;
 17 | 
 18 |       Matrix * varComponents;
 19 |       Matrix   linearModel;
 20 |       Vector   scores;
 21 | 
 22 |       double   likelihood;
 23 | 
 24 |       NormalEquations();
 25 |       virtual ~NormalEquations();
 26 | 
 27 |       void   Dimension(int vcCount);
 28 | 
 29 |       virtual void   Prepare();
 30 |       virtual void   SetParameters(Vector & means, Vector & variances);
 31 |       virtual double Evaluate();
 32 | 
 33 |       Cholesky  cholesky;
 34 |       Matrix    varMatrix;
 35 |       Vector    residuals;
 36 |       double    constant;
 37 |       bool      includeLikelihoodConstant;
 38 |       int       multiple;
 39 | 
 40 |       bool operator == (const NormalEquations & rhs);
 41 | 
 42 |       void      EnableConstant();
 43 |       void      DisableConstant();
 44 | 
 45 |       // Diagnostic statistics
 46 |       // see JL Hopper and JD Matthews Ann Hum Genet (1992) 46:373 - 383
 47 |       double rawQ;      // This is a chi-square with n degrees of freedom
 48 |       double Q;         // This is Q1 and has a standard normal distribution
 49 |       Vector Qi;        // Each Qi is approximately chi-square with 1 df
 50 |       void   Diagnostics();
 51 | 
 52 |    protected:
 53 |       void Free();
 54 | 
 55 |       void CalculateResiduals();
 56 |       void CalculateCovariances();
 57 | 
 58 |       bool      meanChange, varChange, init;
 59 |       IntArray  meanFlags;
 60 |    };
 61 | 
 62 | class NormalSet
 63 |    {
 64 |    public:
 65 |       NormalEquations ** sets;
 66 |       Vector    weights;
 67 |       IntArray  operators;
 68 | 
 69 |       double precision;
 70 |       int    numericMinimizer;
 71 |       int    size;
 72 |       int    count;
 73 |       int    maxThreads;
 74 |       double likelihood;
 75 |       Vector variances, means;
 76 | 
 77 |       // Number of function evaluations
 78 |       int    evaluations;
 79 | 
 80 |       NormalSet(int threads = 0);
 81 | 
 82 |       virtual ~NormalSet() { Free(); }
 83 | 
 84 |       void        Dimension(int setCount, int vcCount, int vcDerived = 0);
 85 |       double      Evaluate();
 86 |       void        SelectPoint(Vector & v);
 87 |       void        Solve();
 88 |       int         CountObservations();
 89 |       virtual int CountParameters();
 90 | 
 91 |       NormalEquations & operator [] (int n)
 92 |          { return *(sets[n]); }
 93 | 
 94 |       void EnableConstant();
 95 |       void DisableConstant();
 96 | 
 97 |       // Vector for storing intermediate likelihoods
 98 |       Vector recordedLikelihoods;
 99 | 
100 |       // This function should be over-ridden to calculate constrained variance
101 |       // components appropriately
102 |       virtual void CalculateConstrainedVariances();
103 | 
104 |    protected:
105 |       // for multi-threading
106 |       static void * EvaluateOneSet(void * which);
107 | 
108 |       // house-keeping
109 |       void  Free();
110 |       virtual void AllocateSets();
111 | 
112 |       // Helpers for solver
113 |       void         EditLinearDegenerates();
114 |       virtual void GetStartingPoint(Vector & startPoint);
115 |       void         RemoveRedundancy();
116 | 
117 |       // Intermediate results when calculating likelihoods
118 |       Vector logLikelihoods;
119 | 
120 |       // How many variance components should be estimated?
121 |       int  vcEstimated;
122 | 
123 |       // And how many variance components are constrained by the other parameters?
124 |       int  vcConstrained;
125 |    };
126 | 
127 | class NonLinearNormalSet : public NormalSet
128 |    {
129 |    public:
130 |       virtual void CalculateConstrainedVariances();
131 | 
132 |       IntArray  nonLinearVariances;
133 |       IntArray  component1, component2;
134 |    };
135 | 
136 | class NormalSolver : public VectorFunc
137 |    {
138 |    public:
139 |       NormalSet * normal;
140 | 
141 |       NormalSolver(NormalSet * n) : VectorFunc()
142 |          { normal = n; }
143 | 
144 |       virtual ~NormalSolver() { }
145 | 
146 |       virtual double Evaluate(Vector & point);
147 |    };
148 | 
149 | // Constants for setting elements of operations array
150 | // Which tell normal set class how to combine partial likelihoods
151 | //
152 | 
153 | #define NORMAL_OP_MASK    7
154 | #define NORMAL_NOP        0
155 | #define NORMAL_MUL_LK     1
156 | #define NORMAL_SCALE_LLK  2
157 | #define NORMAL_SUM_LK     3
158 | #define NORMAL_DIV_LK     4
159 | #define NORMAL_POP        5
160 | #define NORMAL_RECORD_LLK 6
161 | 
162 | #define NORMAL_OP(a,b,c,d)  ((a) | ((b) << 3) | ((c) << 6) | ((d) << 9))
163 | #define NORMAL_LAST_OP(a)   ((a) << 12)
164 | 
165 | #endif
166 | 
167 | 
168 | 


--------------------------------------------------------------------------------