├── king ├── LongHash.cpp ├── MemoryInfo.h ├── MapFunction.h ├── TraitTransformations.h ├── WindowsHelper.h ├── Hash.h ├── Error.h ├── LongLongCounter.h ├── Intervals.h ├── KinshipX.h ├── Kinship.h ├── MathDeriv.h ├── Sort.h ├── MemoryAllocators.h ├── Davies.h ├── MemoryInfo.cpp ├── Input.h ├── MapFunction.cpp ├── MathSobol.h ├── Matings.h ├── diseaseGEE.h ├── LongLongCounter.cpp ├── QuickIndex.h ├── Error.cpp ├── WindowsHelper.cpp ├── MerlinSort.h ├── OLS.h ├── Genetics.h ├── rplot.h ├── PedigreeAlleleFreq.h ├── MathLu.h ├── KinshipX.cpp ├── PeelerNodes.h ├── OptimizerConstraints.h ├── MathConstant.h ├── MathMiser.h ├── Constant.h ├── GenotypeLists.h ├── MathSVD.h ├── MathGold.h ├── MathCholesky.h ├── TDT.h ├── GenotypeCompressor.h ├── InputFile.cpp ├── Matings.cpp ├── MathVegas.h ├── MathAssoc.h ├── LongArray.h ├── MathDeriv.cpp ├── BasicHash.h ├── VCLinear.h ├── Genetics.cpp ├── Kinship.cpp ├── MathStats.h ├── PedigreeFamily.h ├── PedigreeDescription.h ├── VCGEE.h ├── IBD.h ├── MathSobol.cpp ├── PeelerNodes.cpp ├── TraitTransformations.cpp ├── LongArray.cpp ├── FortranFormat.h ├── PedigreePerson.h ├── MiniDeflate.h ├── MemoryAllocators.cpp ├── Input.cpp ├── MathCholesky.cpp ├── MerlinSort.cpp ├── BasicHash.cpp ├── StringArray.h ├── InputFile.h ├── StringMap.h ├── Hash.cpp ├── IntArray.h ├── Random.h ├── MathLu.cpp ├── MathFloatVector.h ├── PedigreeAlleles.h ├── OptimizerConstraints.cpp ├── BrentC.cpp ├── MathGenMin.h └── MathNormal.h ├── libsvm ├── .gitignore ├── svm.def ├── Makefile ├── COPYRIGHT ├── tools │ ├── easy.py │ ├── checkdata.py │ └── subset.py └── svm.h ├── examples └── index │ ├── seq.batches.by.20.txt │ ├── README │ └── list.107.local.crams.index ├── topmed_variant_calling_overview.png ├── .gitignore ├── scripts ├── run-merge-sites-local.cmd ├── run-milk-local.cmd ├── run-union-sites-local.cmd ├── run-batch-genotype-local.cmd ├── d13-add-fmis-to-frz9.pl ├── run-discovery-local.cmd ├── run-paste-genotype-local.cmd ├── e05-whitelist-gwas-variants.pl └── e04-filter-vars.pl ├── .gitmodules ├── config.yml ├── Dockerfile └── singularity.def /king/LongHash.cpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /libsvm/.gitignore: -------------------------------------------------------------------------------- 1 | svm-predict 2 | svm-scale 3 | svm-train 4 | svm.o 5 | -------------------------------------------------------------------------------- /examples/index/seq.batches.by.20.txt: -------------------------------------------------------------------------------- 1 | 1 2 | 21 3 | 41 4 | 61 5 | 81 6 | 101 7 | -------------------------------------------------------------------------------- /topmed_variant_calling_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/statgen/topmed_variant_calling/HEAD/topmed_variant_calling_overview.png -------------------------------------------------------------------------------- /king/MemoryInfo.h: -------------------------------------------------------------------------------- 1 | #ifndef __MEMORYINFO_H__ 2 | #define __MEMORYINFO_H__ 3 | 4 | #include "StringBasics.h" 5 | 6 | String & MemoryInfo(double bytes); 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /king/MapFunction.h: -------------------------------------------------------------------------------- 1 | #ifndef __MAPFUNCTION_H__ 2 | #define __MAPFUNCTION_H__ 3 | 4 | double DistanceToRecombination(double distance); 5 | double RecombinationToDistance(double recombination); 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /king/TraitTransformations.h: -------------------------------------------------------------------------------- 1 | #ifndef __TRAIT_TRANSFORMS__ 2 | #define __TRAIT_TRANSFORMS__ 3 | 4 | #include "Pedigree.h" 5 | 6 | void InverseNormalTransform(Pedigree & ped); 7 | void InverseNormalTransform(Pedigree & ped, int trait); 8 | 9 | #endif 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /king/WindowsHelper.h: -------------------------------------------------------------------------------- 1 | #ifndef __WINDOWSHELPER_H__ 2 | #define __WINDOWSHELPER_H__ 3 | 4 | #ifndef __WIN32__ 5 | inline void WildCardArguments(int argc, char ** argv) { } 6 | #else 7 | void WildCardArguments(int & argc, char ** & argv); 8 | #endif 9 | 10 | #endif 11 | 12 | 13 | -------------------------------------------------------------------------------- /king/Hash.h: -------------------------------------------------------------------------------- 1 | #ifndef __HASH_H__ 2 | #define __HASH_H__ 3 | 4 | unsigned int hash ( const unsigned char * key, unsigned int length, unsigned int initval); 5 | 6 | unsigned int hash_no_case ( const unsigned char * key, unsigned int length, unsigned int initval); 7 | 8 | #endif 9 | 10 | -------------------------------------------------------------------------------- /king/Error.h: -------------------------------------------------------------------------------- 1 | #ifndef _ERROR_H_ 2 | #define _ERROR_H_ 3 | 4 | // #ifdef __cplusplus 5 | // extern "C" { 6 | // #endif 7 | 8 | void error(const char * msg, ...); 9 | void warning(const char * msg, ...); 10 | void numerror(const char * msg, ...); 11 | 12 | // #ifdef __cplusplus 13 | // }; 14 | // #endif 15 | 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /king/LongLongCounter.h: -------------------------------------------------------------------------------- 1 | #ifndef __LONGLONGCOUNTER_H_ 2 | #define __LONGLONGCOUNTER_H_ 3 | 4 | #include "LongHash.h" 5 | 6 | class LongCounter : public LongHash 7 | { 8 | public: 9 | LongCounter(); 10 | 11 | void IncrementCount(long long key); 12 | void DecrementCount(long long key); 13 | int GetCount(long long key); 14 | }; 15 | 16 | #endif 17 | 18 | 19 | -------------------------------------------------------------------------------- /king/Intervals.h: -------------------------------------------------------------------------------- 1 | #ifndef __Intervals_h__ 2 | #define __Intervals_h__ 3 | #include "IntArray.h" 4 | 5 | double RoRP(IntArray &RP1, IntArray &RP2, IntArray &R1R2, IntArray &positionBP); 6 | double SegmentLength(IntArray &segs, IntArray &positionBP); 7 | double JoinLength(IntArray &A, IntArray &B, IntArray &positionBP); 8 | void SegmentIntersect(IntArray &A, IntArray &B, IntArray &C); 9 | void SegmentUnion(IntArray &A, IntArray &B, IntArray &C); 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /king/KinshipX.h: -------------------------------------------------------------------------------- 1 | #ifndef __KINSHIPX_H__ 2 | #define __KINSHIPX_H__ 3 | 4 | #include "Pedigree.h" 5 | #include "MathMatrix.h" 6 | 7 | class KinshipX 8 | { 9 | public: 10 | Matrix allPairs; 11 | Family * fam; 12 | 13 | KinshipX() : allPairs() 14 | { fam = NULL; } 15 | 16 | void Setup(Family & f); 17 | 18 | double operator () (Person & p1, Person & p2); 19 | 20 | }; 21 | 22 | #endif 23 | 24 | -------------------------------------------------------------------------------- /king/Kinship.h: -------------------------------------------------------------------------------- 1 | #ifndef __KINSHIP_H__ 2 | #define __KINSHIP_H__ 3 | 4 | #include "Pedigree.h" 5 | #include "MathMatrix.h" 6 | 7 | class Kinship 8 | { 9 | public: 10 | Matrix allPairs; 11 | Family * fam; 12 | 13 | Kinship() : allPairs() 14 | { fam = NULL; } 15 | 16 | void Setup(Family & f); 17 | 18 | bool isInbred(); 19 | 20 | double operator () (Person & p1, Person & p2); 21 | 22 | }; 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /libsvm/svm.def: -------------------------------------------------------------------------------- 1 | LIBRARY libsvm 2 | EXPORTS 3 | svm_train @1 4 | svm_cross_validation @2 5 | svm_save_model @3 6 | svm_load_model @4 7 | svm_get_svm_type @5 8 | svm_get_nr_class @6 9 | svm_get_labels @7 10 | svm_get_svr_probability @8 11 | svm_predict_values @9 12 | svm_predict @10 13 | svm_predict_probability @11 14 | svm_free_model_content @12 15 | svm_free_and_destroy_model @13 16 | svm_destroy_param @14 17 | svm_check_parameter @15 18 | svm_check_probability_model @16 19 | svm_set_print_string_function @17 20 | -------------------------------------------------------------------------------- /king/MathDeriv.h: -------------------------------------------------------------------------------- 1 | #ifndef __MATHDERIV_H__ 2 | #define __MATHDERIV_H__ 3 | 4 | #include "MathVector.h" 5 | 6 | // Evaluates the derivative of function func() at x, using h as an initial guess 7 | // stepsize. An estimate of the error in the derivative is stored in err. 8 | 9 | double dfunction(double (* func)(double), double x, double h, double & err); 10 | 11 | // Same as above, but without error estimate 12 | // 13 | 14 | double dfunction(double (* func)(double), double x, double h); 15 | 16 | #endif 17 | 18 | 19 | -------------------------------------------------------------------------------- /examples/index/README: -------------------------------------------------------------------------------- 1 | This directory contains the input file of the example TOPMed variant calling pipeline. 2 | 3 | The list.107.local.crams.index file contains 107 public (part of 1000 Genomes) samples sequenced for the TOPMed project. The samples can be downloaded from Google Cloud Storage Bucket. (The location of the bucket will be added here. Contact hmkang@umich.edu in the meantime). 4 | 5 | Also, the resource files are required to perform the example variant calling procedure. The files should be downloadable from ftp://share.sph.umich.edu/1000genomes/fullProject/hg38_resources/ -------------------------------------------------------------------------------- /king/Sort.h: -------------------------------------------------------------------------------- 1 | #ifndef __SORT_H__ 2 | #define __SORT_H__ 3 | 4 | #include "Constant.h" 5 | 6 | #include 7 | 8 | void QuickSort(void *base, size_t nelem, size_t width, 9 | int (*cmp)(const void *, const void *)); 10 | 11 | void QuickSort2(void *base, void * base2, size_t nelem, size_t width, 12 | int (*cmp)(const void *, const void *)); 13 | 14 | void * BinarySearch(const void *key, const void *base, 15 | size_t nelem, size_t width, 16 | int (*cmp)(const void *, const void *)); 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /king/MemoryAllocators.h: -------------------------------------------------------------------------------- 1 | #ifndef __MEMORY_ALLOCATORS_H__ 2 | #define __MEMORY_ALLOCATORS_H__ 3 | 4 | char ** AllocateCharMatrix(int rows, int cols); 5 | void FreeCharMatrix(char ** & matrix, int rows); 6 | 7 | float ** AllocateFloatMatrix(int rows, int cols); 8 | void FreeFloatMatrix(float ** & matrix, int rows); 9 | 10 | int ** AllocateIntMatrix(int rows, int cols); 11 | void FreeIntMatrix(int ** & matrix, int rows); 12 | 13 | char *** AllocateCharCube(int n, int rows, int cols); 14 | void FreeCharCube(char *** & matrix, int n, int rows); 15 | 16 | #endif 17 | 18 | -------------------------------------------------------------------------------- /king/Davies.h: -------------------------------------------------------------------------------- 1 | #ifndef __DAVIES_h__ 2 | #define __DAVIES_h__ 3 | //Davies R.B., Algorithm AS 155: The Distribution of a Linear Combination of chi-2 Random Variables, 4 | //Journal of the Royal Statistical Society. Series C (Applied Statistics), 29(3), p. 323-333, (1980) 5 | //void Davies(double* lb1, double* nc1, int* n1, int *r1, double *sigma, double *c1, int *lim1, double *acc, double* trace, int* ifault, double *res); 6 | #include 7 | double Davies(double c1, double* lb1, int r1, int *n1=NULL, double *nc1=NULL, double sigma=0, int lim1=10000, double acc=0.0001); 8 | 9 | #endif 10 | 11 | -------------------------------------------------------------------------------- /libsvm/Makefile: -------------------------------------------------------------------------------- 1 | CXX ?= g++ 2 | CFLAGS = -Wall -Wconversion -O3 -fPIC 3 | SHVER = 2 4 | 5 | all: svm-train svm-predict svm-scale 6 | 7 | lib: svm.o 8 | $(CXX) -shared -dynamiclib svm.o -o libsvm.so.$(SHVER) 9 | 10 | svm-predict: svm-predict.c svm.o 11 | $(CXX) $(CFLAGS) svm-predict.c svm.o -o svm-predict -lm 12 | svm-train: svm-train.c svm.o 13 | $(CXX) $(CFLAGS) svm-train.c svm.o -o svm-train -lm 14 | svm-scale: svm-scale.c 15 | $(CXX) $(CFLAGS) svm-scale.c -o svm-scale 16 | svm.o: svm.cpp svm.h 17 | $(CXX) $(CFLAGS) -c svm.cpp 18 | clean: 19 | rm -f *~ svm.o svm-train svm-predict svm-scale libsvm.so.$(SHVER) 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | 34 | htslib/tabix 35 | htslib/bgzip 36 | htslib/htsfile 37 | samtools/samtools 38 | bcftools/bcftools 39 | cramore/cramore 40 | vt-topmed/vt 41 | king/king 42 | king/*.o 43 | *.o 44 | libsvm/svm-train 45 | libsvm/svm-predict 46 | libsvm/svm-scale 47 | -------------------------------------------------------------------------------- /king/MemoryInfo.cpp: -------------------------------------------------------------------------------- 1 | #include "MemoryInfo.h" 2 | 3 | String & MemoryInfo(double bytes) 4 | { 5 | static String info; 6 | 7 | if (bytes < 1024) 8 | return info = "<1.0 kb"; 9 | 10 | if (bytes < 1024. * 1024.) 11 | info.printf("%.1f kb", (bytes + 1023) / 1024.); 12 | else if (bytes < 1024. * 1024. * 1024.) 13 | info.printf("%.1f mb", (bytes + 1024. * 1024. - 1) / (1024. * 1024.)); 14 | else if (bytes < 1024. * 1024. * 1024. * 1024.) 15 | info.printf("%.1f gb", bytes / (1024. * 1024. * 1024.)); 16 | else 17 | info.printf("%.1f tb", bytes / (1024. * 1024. * 1024. * 1024.)); 18 | 19 | return info; 20 | } 21 | -------------------------------------------------------------------------------- /king/Input.h: -------------------------------------------------------------------------------- 1 | #ifndef __INPUT_H__ 2 | #define __INPUT_H__ 3 | 4 | void Input(const char * prompt, int & n, int _default = 0); 5 | void Input(const char * prompt, double & d, double _default = 0.0); 6 | void Input(const char * prompt, char & c, char _default = 'A'); 7 | void Input(const char * prompt, char * s, char * _default = ""); 8 | void Input(const char * prompt, bool & b, bool _default); 9 | 10 | void InputBounds(const char * prompt, int & n, int min, int max, 11 | int _default = 0); 12 | void InputBounds(const char * prompt, double & d, double min, double max, 13 | double _default = 0); 14 | 15 | extern int InputPromptWidth; 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /king/MapFunction.cpp: -------------------------------------------------------------------------------- 1 | #include "MapFunction.h" 2 | #include "MathConstant.h" 3 | 4 | #include 5 | 6 | double DistanceToRecombination(double distance) 7 | { 8 | return (1.0 - exp(-2.0 * distance)) * 0.5; 9 | } 10 | 11 | double RecombinationToDistance(double recombination) 12 | { 13 | return (log(max(1.0 - 2 * recombination, 1e-7)) * -0.5); 14 | } 15 | 16 | double KosambiDistanceToRecombination(double distance) 17 | { 18 | double e_to_4x = exp(4.0 * distance); 19 | 20 | return (0.5 * (e_to_4x - 1.0) / (e_to_4x + 1.0)); 21 | } 22 | 23 | double RecombinationToKosambiDistance(double theta) 24 | { 25 | return 0.25 * log((1.0 + 2*theta) / max(1.0 - 2.0*theta, 1e-7)); 26 | } 27 | -------------------------------------------------------------------------------- /king/MathSobol.h: -------------------------------------------------------------------------------- 1 | #ifndef __MATH_SOBOL__ 2 | #define __MATH_SOBOL__ 3 | 4 | #include "IntArray.h" 5 | #include "MathVector.h" 6 | 7 | #define POLY_COUNT 36 8 | #define SOBOL_BITS 30 9 | #define SOBOL_FACTOR (1.0 / (1L << SOBOL_BITS)) 10 | 11 | class SobolSequence 12 | { 13 | public: 14 | IntArray * bits; 15 | IntArray x; 16 | int dim; 17 | long counter; 18 | 19 | SobolSequence(); 20 | ~SobolSequence(); 21 | 22 | void Init(int dimensions); 23 | Vector & Next(Vector & point); 24 | 25 | private: 26 | static int poly_integers[POLY_COUNT]; 27 | static int poly_degrees[POLY_COUNT]; 28 | }; 29 | 30 | #endif 31 | 32 | -------------------------------------------------------------------------------- /scripts/run-merge-sites-local.cmd: -------------------------------------------------------------------------------- 1 | # out : log/merge 2 | # list : BATCH : index/seq.batches.by.20.txt 3 | # list : INTERVAL : index/intervals/b38.intervals.X.10Mb.10Mb.txt 4 | # var : ROOT : .. 5 | # var : PREFIX : out/union/$BATCH$1$/b$BATCH$1$.$INTERVAL$1$_$INTERVAL$2$_$INTERVAL$3$ 6 | # target : $PREFIX$.merged.sites.bcf $PREFIX$.merged.sites.bcf.csi 7 | # name: example-merge 8 | mkdir -p out/union/$BATCH$1$/ 9 | cut -f 3 out/index/list.107.local.crams.vb_xy.index | tail -n +2 | tail -n +$BATCH$1$ | head -n 20 > $PREFIX$.bcflist.txt 10 | $ROOT$/cramore/cramore vcf-merge-candidate-variants --in-vcf-list $PREFIX$.bcflist.txt --region $INTERVAL$1$:$INTERVAL$2$-$INTERVAL$3$ --out-vcf $PREFIX$.merged.sites.bcf > $PREFIX$.merged.sites.bcf.out 2> $PREFIX$.merged.sites.bcf.err 11 | $ROOT$/bcftools/bcftools index $PREFIX$.merged.sites.bcf 12 | -------------------------------------------------------------------------------- /king/Matings.h: -------------------------------------------------------------------------------- 1 | #ifndef __MATINGS_H__ 2 | #define __MATINGS_H__ 3 | 4 | #include "Pedigree.h" 5 | 6 | class Matings 7 | { 8 | public: 9 | // Number of distinct matings in the pedigree 10 | int matingCount; 11 | int founders; 12 | 13 | // Map linking each non-founder to a mating 14 | IntArray matingMap; 15 | 16 | // Index all the matings in a family 17 | void ListMatings(Family * family); 18 | 19 | // Lookup the mating index for a specific offspring 20 | int LookupMating(int serial); 21 | int LookupMating(Person & p); 22 | 23 | private: 24 | void InitializeHash(int size); 25 | int LookupMating(int father, int mother); 26 | 27 | IntArray hash; 28 | IntArray hashId; 29 | }; 30 | 31 | 32 | 33 | #endif 34 | 35 | -------------------------------------------------------------------------------- /king/diseaseGEE.h: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////// 2 | // diseaseGEE.h 3 | // Author: Wei-Min Chen 4 | // March 16, 2005 5 | 6 | #ifndef __diseaseGEE_H__ 7 | #define __diseaseGEE_H__ 8 | 9 | #include "Pedigree.h" 10 | #include "IntArray.h" 11 | #include "MathMatrix.h" 12 | #include "MathVector.h" 13 | #include "MathCholesky.h" 14 | #include "VCGEE.h" 15 | 16 | class GEE_DIS: public GEE{ 17 | // void constraint(void){} 18 | void RefreshD(int f); 19 | // void RefreshOD(int f){} 20 | public: 21 | // double OR[6]; 22 | // double rho[6]; // correlation between relative pair 23 | IntArray * diseases; 24 | int disease; 25 | IntArray mCovariate; 26 | Vector *resid; 27 | void solve(); 28 | GEE_DIS(Pedigree & pedigree); 29 | ~GEE_DIS(); 30 | void InitCoef(); 31 | void summary(){} 32 | void print(); 33 | }; 34 | 35 | #endif 36 | 37 | 38 | -------------------------------------------------------------------------------- /scripts/run-milk-local.cmd: -------------------------------------------------------------------------------- 1 | # out : log/milk 2 | # list : INTERVAL : index/intervals/b38.intervals.X.10Mb.1Mb.txt 3 | # var : ROOT : .. 4 | # var : IN_PREFIX : out/genotypes/merged/$INTERVAL$1$/merged.$INTERVAL$1$_$INTERVAL$4$_$INTERVAL$5$ 5 | # var : OUT_PREFIX : out/milk/$INTERVAL$1$/milk.$INTERVAL$1$_$INTERVAL$4$_$INTERVAL$5$ 6 | # name : example-milk 7 | # target : $OUT_PREFIX$.full.vcf.gz $OUT_PREFIX$.sites.vcf.gz $OUT_PREFIX$.sites.vcf.gz.tbi 8 | mkdir -p out/milk/$INTERVAL$1$/ 9 | $ROOT$/vt-topmed/vt milk_filter -f out/genotypes/hgdp/merged.autosomes.gtonly.minDP0.hgdp.king.inferred.ped -b $IN_PREFIX$.genotypes.bcf -o $OUT_PREFIX$.full.vcf.gz -g $IN_PREFIX$.sex_map.txt --xLabel chrX --yLabel chrY --mtLabel chrM --xStart 2781479 --xStop 155701383 --af-field AF 10 | zcat $OUT_PREFIX$.full.vcf.gz | cut -f 1-8 | $ROOT$/htslib/bgzip -c > $OUT_PREFIX$.sites.vcf.gz 11 | $ROOT$/htslib/tabix -pvcf $OUT_PREFIX$.sites.vcf.gz 12 | -------------------------------------------------------------------------------- /king/LongLongCounter.cpp: -------------------------------------------------------------------------------- 1 | #include "LongLongCounter.h" 2 | 3 | LongCounter::LongCounter() : LongHash() 4 | { 5 | SetAllowDuplicateKeys(false); 6 | } 7 | 8 | void LongCounter::IncrementCount(long long key) 9 | { 10 | int slot = Find(key); 11 | 12 | if (slot == -1) 13 | Add(key, 1); 14 | else if (Object(slot) == -1) 15 | Delete(slot); 16 | else 17 | Object(slot)++; 18 | } 19 | 20 | void LongCounter::DecrementCount(long long key) 21 | { 22 | int slot = Find(key); 23 | 24 | if (slot == -1) 25 | Add(key, -1); 26 | else if (Object(slot) == 1) 27 | Delete(slot); 28 | else 29 | Object(slot)--; 30 | } 31 | 32 | int LongCounter::GetCount(long long key) 33 | { 34 | int slot = Find(key); 35 | 36 | if (slot == -1) 37 | return 0; 38 | else 39 | return Object(slot)--; 40 | } 41 | 42 | 43 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "apigenome"] 2 | path = apigenome 3 | url = https://github.com/hyunminkang/apigenome 4 | [submodule "bamUtil"] 5 | path = bamUtil 6 | url = https://github.com/statgen/bamUtil 7 | [submodule "libStatGen"] 8 | path = libStatGen 9 | url = https://github.com/statgen/libStatGen 10 | [submodule "invNorm"] 11 | path = invNorm 12 | url = https://github.com/hyunminkang/invNorm 13 | [submodule "cramore"] 14 | path = cramore 15 | url = https://github.com/hyunminkang/cramore 16 | [submodule "htslib"] 17 | path = htslib 18 | url = https://github.com/samtools/htslib 19 | [submodule "bcftools"] 20 | path = bcftools 21 | url = https://github.com/samtools/bcftools 22 | [submodule "samtools"] 23 | path = samtools 24 | url = https://github.com/samtools/samtools 25 | [submodule "libsvm"] 26 | path = libsvm 27 | url = https://github.com/cjlin1/libsvm 28 | [submodule "vt-topmed"] 29 | path = vt-topmed 30 | url = https://github.com/hyunminkang/vt-topmed 31 | -------------------------------------------------------------------------------- /king/QuickIndex.h: -------------------------------------------------------------------------------- 1 | #ifndef __QUICKINDEX_H__ 2 | #define __QUICKINDEX_H__ 3 | 4 | #include "MathVector.h" 5 | #include "StringArray.h" 6 | #include "IntArray.h" 7 | #include "StringMap.h" 8 | 9 | class QuickIndex : public IntArray 10 | { 11 | public: 12 | QuickIndex(); 13 | QuickIndex(const IntArray & source_data) 14 | { Index(source_data); } 15 | QuickIndex(const StringArray & source_data) 16 | { Index(source_data); } 17 | QuickIndex(const Vector & source_data) 18 | { Index(source_data); } 19 | 20 | void Index(const IntArray & source_data); 21 | void Index(const StringArray & source_data); 22 | void Index(const Vector & source_data); 23 | void IndexCounts(const StringIntMap & source_data); 24 | 25 | private: 26 | const void * source; 27 | int datatype; 28 | 29 | bool IsBefore(int i, int j); 30 | void Sort(); 31 | }; 32 | 33 | #endif 34 | 35 | -------------------------------------------------------------------------------- /king/Error.cpp: -------------------------------------------------------------------------------- 1 | #include "Error.h" 2 | 3 | #include "stdlib.h" 4 | #include "stdarg.h" 5 | #include "stdio.h" 6 | 7 | // Declare a dummy class to ensure that compilers recognize this as C++ code 8 | class String; 9 | 10 | void error ( const char * msg, ... ) 11 | { 12 | va_list ap; 13 | 14 | va_start(ap, msg); 15 | 16 | printf("\nFATAL ERROR - \n"); 17 | vprintf(msg, ap); 18 | printf("\n\n"); 19 | 20 | va_end(ap); 21 | 22 | exit(EXIT_FAILURE); 23 | } 24 | 25 | void warning ( const char * msg, ... ) 26 | { 27 | va_list ap; 28 | 29 | va_start(ap, msg); 30 | 31 | printf("\n\aWARNING - \n"); 32 | vprintf(msg, ap); 33 | printf("\n"); 34 | 35 | va_end(ap); 36 | } 37 | 38 | void numerror ( const char * msg , ... ) 39 | { 40 | va_list ap; 41 | 42 | va_start(ap, msg); 43 | 44 | printf("\nFATAL NUMERIC ERROR - "); 45 | vprintf(msg, ap); 46 | printf("\n\n"); 47 | 48 | va_end(ap); 49 | 50 | exit(EXIT_FAILURE); 51 | } 52 | -------------------------------------------------------------------------------- /config.yml: -------------------------------------------------------------------------------- 1 | exe_root: "/topmed_variant_calling" 2 | input_expression: "input/{sample_id}.cram" 3 | batch_size: 1000 4 | region_size: 10000000 5 | merge_region_size: 100000 6 | thresholds: 7 | vb_depth: 15 8 | freemix: 0.1 9 | frac_dp10: 0.9 10 | contigs: 11 | chr1: 248956422 12 | chr2: 242193529 13 | chr3: 198295559 14 | chr4: 190214555 15 | chr5: 181538259 16 | chr6: 170805979 17 | chr7: 159345973 18 | chr8: 145138636 19 | chr9: 138394717 20 | chr10: 133797422 21 | chr11: 135086622 22 | chr12: 133275309 23 | chr13: 114364328 24 | chr14: 107043718 25 | chr15: 101991189 26 | chr16: 90338345 27 | chr17: 83257441 28 | chr18: 80373285 29 | chr19: 58617616 30 | chr20: 64444167 31 | chr21: 46709983 32 | chr22: 50818468 33 | chrX: 156040895 34 | chrY: 57227415 35 | chrM: 16569 36 | autosome_contigs: [chr1, chr2, chr3, chr4, chr5, chr6, chr7, chr8, chr9, chr10, chr11, chr12, chr13, chr14, chr15, chr16, chr17, chr18, chr19, chr20, chr21, chr22] 37 | sample_ids: 38 | - "HG00096" 39 | - "HG00097" 40 | - "HG00099" 41 | -------------------------------------------------------------------------------- /king/WindowsHelper.cpp: -------------------------------------------------------------------------------- 1 | #include "WindowsHelper.h" 2 | #ifdef __WIN32__ 3 | #ifndef __GNUC__ 4 | #include 5 | 6 | void WildCardArguments(int & argc, char ** & argv) 7 | { 8 | if (argc < 2) return; 9 | 10 | int count = 0; 11 | for (int i = 1; i < argc; i++) 12 | { 13 | struct ffblk blk; 14 | 15 | int done = findfirst(argv[i], &blk, 0); 16 | while(!done) 17 | { 18 | done = findnext(&blk); 19 | count++; 20 | } 21 | } 22 | 23 | char ** new_argv = new char * [count + 1]; 24 | int new_argc = 1; 25 | 26 | new_argv[0] = argv[0]; 27 | for (int i = 1; i < argc; i++) 28 | { 29 | struct ffblk blk; 30 | 31 | int done = findfirst(argv[i], &blk, 0); 32 | while (!done && new_argc <= count) 33 | { 34 | new_argv[new_argc++] = strdup(blk.ff_name); 35 | done = findnext(&blk); 36 | } 37 | } 38 | 39 | argc = new_argc; 40 | argv = new_argv; 41 | } 42 | 43 | #endif 44 | #endif 45 | 46 | -------------------------------------------------------------------------------- /scripts/run-union-sites-local.cmd: -------------------------------------------------------------------------------- 1 | # out : log/merge 2 | # list : INTERVAL : index/intervals/b38.intervals.X.10Mb.10Mb.txt 3 | # var : ROOT : .. 4 | # var : PREFIX : out/union/union.$INTERVAL$1$_$INTERVAL$2$_$INTERVAL$3$ 5 | # target : $PREFIX$.sites.bcf $PREFIX$.sites.bcf.csi 6 | # name: example-union 7 | bash -c 'cat index/seq.batches.by.20.txt | xargs -I {} echo out/union/{}/b{}.$INTERVAL$1$_$INTERVAL$2$_$INTERVAL$3$.merged.sites.bcf > $PREFIX$.bcflist.txt' 8 | $ROOT$/cramore/cramore vcf-merge-candidate-variants --in-vcf-list $PREFIX$.bcflist.txt --region $INTERVAL$1$:$INTERVAL$2$-$INTERVAL$3$ --out-vcf $PREFIX$.merged.sites.bcf > $PREFIX$.merged.sites.bcf.out 2> $PREFIX$.merged.sites.bcf.err 9 | $ROOT$/bcftools/bcftools index -f $PREFIX$.merged.sites.bcf 10 | bash -c 'set -o pipefail; $ROOT$/vt-topmed/vt annotate_indels -r resources/ref/hs38DH.fa $PREFIX$.merged.sites.bcf -o + 2> $PREFIX$.annotated.sites.bcf.err | $ROOT$/vt-topmed/vt consolidate_variants + -o $PREFIX$.sites.bcf > $PREFIX$.sites.bcf.out 2> $PREFIX$.bcf.sites.err' 11 | $ROOT$/bcftools/bcftools index -f $PREFIX$.sites.bcf 12 | -------------------------------------------------------------------------------- /king/MerlinSort.h: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////// 2 | // merlin/MerlinSort.h 3 | // (c) 2000-2007 Goncalo Abecasis 4 | // 5 | // This file is distributed as part of the MERLIN source code package 6 | // and may not be redistributed in any form, without prior written 7 | // permission from the author. Permission is granted for you to 8 | // modify this file for your own personal use, but modified versions 9 | // must retain this copyright notice and must not be distributed. 10 | // 11 | // Permission is granted for you to use this file to compile MERLIN. 12 | // 13 | // All computer programs have bugs. Use this file at your own risk. 14 | // 15 | // Tuesday December 18, 2007 16 | // 17 | 18 | #ifndef __MERLINSORT_H__ 19 | #define __MERLINSORT_H__ 20 | 21 | #include "Pedigree.h" 22 | 23 | // This routine sorts families so that densely genotyped individuals 24 | // appear before those with more missing data, which empirically appears 25 | // to reduce the average size of gene flow trees 26 | // 27 | 28 | void SortFamilies(Pedigree & ped); 29 | 30 | #endif 31 | 32 | -------------------------------------------------------------------------------- /king/OLS.h: -------------------------------------------------------------------------------- 1 | #ifndef __OLS_h__ 2 | #define __OLS_h__ 3 | 4 | #include "Pedigree.h" 5 | #include "IntArray.h" 6 | #include "MathMatrix.h" 7 | #include "MathVector.h" 8 | #include "MathCholesky.h" 9 | 10 | class OLS_REGRESSION{ 11 | Matrix MatrixOne; 12 | Matrix L, Linverse; 13 | double Q; 14 | Matrix tMatrix; 15 | Vector tVector; 16 | public: 17 | // Input 18 | Vector Y; 19 | Matrix X; 20 | 21 | // Output 22 | int N; // sample size 23 | int P; // # covariates 24 | int testCount; 25 | StringArray covariateNames; 26 | int nuisanceCount; 27 | Vector beta; // regression coefficient 28 | Vector SE; 29 | Matrix Cov; 30 | double loglik; // log likelihood 31 | Vector t_statistic; 32 | Vector pvalue; 33 | Vector R2; // r-square: a Cov(X, Y) / Var(Y) 34 | // Vector R2_alt; // r-square: a^2 Var(X) / Var(Y) 35 | bool failure; 36 | 37 | OLS_REGRESSION(); 38 | void run(); 39 | void run(Vector y, Matrix X); 40 | void run(Vector y, Vector X); 41 | void Print(); 42 | void Print(const char* title); 43 | }; 44 | 45 | #endif 46 | 47 | 48 | -------------------------------------------------------------------------------- /scripts/run-batch-genotype-local.cmd: -------------------------------------------------------------------------------- 1 | # out : log/batch-geno 2 | # list : BATCH : index/seq.batches.by.20.txt 3 | # list : INTERVAL : index/intervals/b38.intervals.X.10Mb.10Mb.txt 4 | # var : ROOT : .. 5 | # var : PREFIX : out/genotypes/batches/$BATCH$1$/b$BATCH$1$.$INTERVAL$1$_$INTERVAL$2$_$INTERVAL$3$ 6 | # name : example-batch-genotype 7 | # target : $PREFIX$.genotypes.bcf $PREFIX$.genotypes.bcf.csi 8 | mkdir -p out/genotypes/batches/$BATCH$1$/ 9 | cut -f 1,20 out/index/list.107.local.crams.vb_xy.index | tail -n +2 | tail -n +$BATCH$1$ | head -n 20 > $PREFIX$.sex_map.txt 10 | cut -f 1,2,5 out/index/list.107.local.crams.vb_xy.index | tail -n +2 | tail -n +$BATCH$1$ | head -n 20 > $PREFIX$.cram_index.txt 11 | bash -c 'set -o pipefail; REF_PATH=resources/ref/md5/%2s/%2s/%s $ROOT$/cramore/cramore dense-genotype --in-cram-list $PREFIX$.cram_index.txt --in-vcf out/union/union.$INTERVAL$1$_$INTERVAL$2$_$INTERVAL$3$.sites.bcf --unit 6000000 --region $INTERVAL$1$:$INTERVAL$2$-$INTERVAL$3$ --sex-map $PREFIX$.sex_map.txt --xLabel chrX --yLabel chrY --xStart 2781479 --xStop 155701383 --print-tmp-info --out $PREFIX$.genotypes.bcf --min-mq 1 > $PREFIX$.genotypes.bcf.out 2> $PREFIX$.genotypes.bcf.err' 12 | $ROOT$/bcftools/bcftools index -f $PREFIX$.genotypes.bcf 13 | -------------------------------------------------------------------------------- /king/Genetics.h: -------------------------------------------------------------------------------- 1 | #ifndef __GENETICS_H__ 2 | #define __GENETICS_H__ 3 | 4 | #include "Parameters.h" 5 | 6 | // Genetic models 7 | #define GM_FREE 0 8 | #define GM_RECESSIVE 1 9 | #define GM_ADDITIVE 2 10 | #define GM_DOMINANT 3 11 | 12 | // Constants for imprinting analysis 13 | #define I_NONE 0 14 | #define I_PATERNAL 1 15 | #define I_MATERNAL 2 16 | #define I_FULL 3 17 | #define I_IMPRINTING 4 18 | 19 | // Constants for special effects 20 | #define SFX_NONE 0 21 | #define SFX_PATERNAL 1 22 | #define SFX_MATERNAL 2 23 | 24 | class ImprintingParameter : public Parameter 25 | { 26 | public: 27 | ImprintingParameter(char c, char * desc, int & v) 28 | : Parameter(c, desc, &v) 29 | {} 30 | 31 | virtual void Status(); 32 | 33 | protected: 34 | virtual void Translate(char * value); 35 | }; 36 | 37 | class GeneticModelParameter : public Parameter 38 | { 39 | public: 40 | GeneticModelParameter(char c, char * desc, int & v) 41 | : Parameter(c, desc, &v) 42 | {} 43 | 44 | virtual void Status(); 45 | 46 | protected: 47 | virtual void Translate(char * value); 48 | }; 49 | 50 | #endif 51 | 52 | -------------------------------------------------------------------------------- /king/rplot.h: -------------------------------------------------------------------------------- 1 | #ifndef __rplot_h__ 2 | #define __rplot_h__ 3 | 4 | #include "IntArray.h" 5 | #include "MathVector.h" 6 | 7 | void plotMIerror(const char *prefix); 8 | void plotUniqueFamily(const char *prefix, int degree, const char *analysis); 9 | void plotDuplicate(const char *prefix); 10 | void plotBuild(const char *prefix); 11 | void plotSplitped(const char *prefix); 12 | void plotCluster(const char *prefix); 13 | void plotGenderError(const char *prefix, IntArray & plotx, Vector & ploty, IntArray & plotz, double xHeterozygosity, int gendererrorCount); 14 | void plotRelationship(const char *prefix); 15 | void plotIBDSeg(const char *prefix); 16 | void plotPopStructure(const char *prefix, int projectFlag); 17 | 18 | // not released yet 19 | void plotAUCmapping(const char *prefix, int SEXCHR); 20 | void plotNPL(const char *prefix, int SEXCHR); 21 | void plotHEreg(const char *prefix, int SEXCHR); 22 | void plotIBDmapping(const char *prefix, int SEXCHR); 23 | void plotROHmapping(const char *prefix, const char *stratName, int SEXCHR); 24 | void plotROHforQT(const char *prefix, int SEXCHR); 25 | void plotPopROH(const char *prefix, int SEXCHR); 26 | void plotPopDist(const char *prefix); 27 | void plotAncestry(const char *prefix); 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /king/PedigreeAlleleFreq.h: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////// 2 | // libsrc/PedigreeAlleleFreq.h 3 | // (c) 2000-2007 Goncalo Abecasis 4 | // 5 | // This file is distributed as part of the MERLIN source code package 6 | // and may not be redistributed in any form, without prior written 7 | // permission from the author. Permission is granted for you to 8 | // modify this file for your own personal use, but modified versions 9 | // must retain this copyright notice and must not be distributed. 10 | // 11 | // Permission is granted for you to use this file to compile MERLIN. 12 | // 13 | // All computer programs have bugs. Use this file at your own risk. 14 | // 15 | // Tuesday December 18, 2007 16 | // 17 | 18 | #ifndef __ALLELEFREQUENCIES_H__ 19 | #define __ALLELEFREQUENCIES_H__ 20 | 21 | #include "Pedigree.h" 22 | 23 | int CountAlleles(Pedigree & ped, int marker); 24 | void LumpAlleles(Pedigree & ped, int marker, double threshold, bool reorder); 25 | 26 | #define FREQ_ALL 0 27 | #define FREQ_FOUNDERS 1 28 | #define FREQ_EQUAL 2 29 | 30 | // Returns true if frequencies estimated, false if previous information okay 31 | bool EstimateFrequencies(Pedigree & ped, int marker, int estimator); 32 | 33 | #endif 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /king/MathLu.h: -------------------------------------------------------------------------------- 1 | #ifndef __MATH_LU__ 2 | #define __MATH_LU__ 3 | 4 | #include "MathMatrix.h" 5 | #include "MathVector.h" 6 | #include "IntArray.h" 7 | 8 | class LU 9 | { 10 | public: 11 | Matrix lu, inv; 12 | Vector x; 13 | IntArray permutation; 14 | double d; 15 | 16 | LU() : lu("LU.LU"), x("LU.x"), inv("LU.inv") { } 17 | ~LU(); 18 | 19 | // Given a square matrix a, decomposes a permutation of a 20 | // into an LU product, stored in LU as follows: 21 | // Lij = LUij when i > j; 1.0 when i == j; 0.0 otherwise 22 | // Uij = LUij when i <= j; 0.0 otherwise 23 | // permutation[1..n] records the permutation effected by 24 | // partial pivoting 25 | // d is output as +1 or -1 depending on whether the number 26 | // of row interchanges was even or odd 27 | // (for calculating determinants) 28 | void Decompose(Matrix & a); 29 | 30 | // Solves LU*X = B, taking b as the right hand side vector 31 | // and storing the solution in x. 32 | void BackSubst(Vector & b); 33 | 34 | // Calculate matrix inverse by backsubstituting basis vectors 35 | void Invert(); 36 | 37 | // Calculate determinant 38 | double Determinant(); 39 | 40 | // Calculate log of determinant 41 | double lnDeterminant(); 42 | 43 | }; 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /king/KinshipX.cpp: -------------------------------------------------------------------------------- 1 | #include "KinshipX.h" 2 | 3 | void KinshipX::Setup(Family & f) 4 | { 5 | allPairs.Dimension(f.count, f.count); 6 | 7 | for (int i = 0; i < f.founders; i++) 8 | { 9 | bool isMale = f.ped[f.path[i]].sex == SEX_MALE; 10 | for (int j = 0; j < f.founders; j++) 11 | allPairs[i][j] = 0.0; 12 | allPairs[i][i] = isMale ? 1.0 : 0.5; 13 | } 14 | 15 | for (int i = f.founders; i < f.count; i++) 16 | { 17 | Person * p = &(f.ped[f.path[i]]); 18 | int k = p->father->traverse; 19 | int l = p->mother->traverse; 20 | 21 | bool isMale = f.ped[f.path[i]].sex == SEX_MALE; 22 | allPairs[i][i] = isMale ? 1.0 : 0.5 + allPairs[k][l] * 0.5; 23 | 24 | for (int j = 0; j < i; j++) 25 | if (!p->isMzTwin(f.ped[f.path[j]])) 26 | allPairs[i][j] = allPairs[j][i] = isMale ? 27 | allPairs[l][j] : (allPairs[k][j] + allPairs[l][j]) * 0.5; 28 | else 29 | allPairs[j][i] = allPairs[i][j] = allPairs[i][i]; 30 | } 31 | 32 | fam = &f; 33 | } 34 | 35 | double KinshipX::operator() (Person & p1, Person & p2) 36 | { 37 | int i = p1.traverse; 38 | int j = p2.traverse; 39 | 40 | return allPairs[i][j]; 41 | } 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /king/PeelerNodes.h: -------------------------------------------------------------------------------- 1 | #ifndef __PEELERNODES_H__ 2 | #define __PEELERNODES_H__ 3 | 4 | #include "Pedigree.h" 5 | 6 | class PeelerNode 7 | { 8 | public: 9 | ~PeelerNode(); 10 | 11 | protected: 12 | static Vector scratch; 13 | }; 14 | 15 | class MatingNode; 16 | class PersonNode; 17 | 18 | class PersonNode : public PeelerNode 19 | { 20 | public: 21 | Person * person; 22 | 23 | IntArray states; 24 | Vector probabilities; 25 | 26 | void Clear(); 27 | 28 | void PeelDescendants(MatingNode * mating, double (*trans) (int, int, int)); 29 | void PeelAncestors(MatingNode * mating, double (*trans) (int, int, int)); 30 | 31 | double Probability() { return probabilities.Sum(); } 32 | }; 33 | 34 | class MatingNode : public PeelerNode 35 | { 36 | public: 37 | IntArray mstates, pstates; 38 | Vector probabilities; 39 | 40 | PersonNode * father; 41 | PersonNode * mother; 42 | 43 | void Initialize(PersonNode * father, PersonNode * mother); 44 | 45 | void PeelFather(); 46 | void PeelMother(); 47 | void PeelOffspring(PersonNode * child, double (*trans) (int, int, int)); 48 | 49 | double Probability() { return probabilities.Sum(); } 50 | }; 51 | 52 | #endif 53 | 54 | -------------------------------------------------------------------------------- /king/OptimizerConstraints.h: -------------------------------------------------------------------------------- 1 | #ifndef __OPTIMIZER_INTERFACE_H__ 2 | #define __OPTIMIZER_INTERFACE_H__ 3 | 4 | #include "MathVector.h" 5 | #include "IntArray.h" 6 | 7 | class ObjectiveFunction 8 | { 9 | public: 10 | virtual ~ObjectiveFunction() { }; 11 | 12 | virtual double Evaluate(Vector & v) = 0; 13 | }; 14 | 15 | class OptimizerInterface : public VectorFunc 16 | { 17 | public: 18 | virtual double Evaluate(Vector & v); 19 | 20 | void Dimension(int parameters); 21 | int CountFreeParameters(); 22 | int CountParameters(); 23 | 24 | void ClearConstraints(); 25 | 26 | void SetMin(int parameter, double min); 27 | void SetMax(int parameter, double max); 28 | void SetRange(int parameter, double min, double max); 29 | void Fix(int parameter, double value); 30 | void ClearConstraints(int parameter); 31 | 32 | void SetObjectiveFunction(ObjectiveFunction & f); 33 | 34 | void Translate(Vector & unconstrained, Vector & constrained); 35 | void BackTranslate(Vector & constrained, Vector & unconstrained); 36 | 37 | private: 38 | IntArray constraints; 39 | Vector min, max, point; 40 | 41 | ObjectiveFunction * f; 42 | }; 43 | 44 | 45 | #endif 46 | 47 | 48 | -------------------------------------------------------------------------------- /scripts/d13-add-fmis-to-frz9.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | use strict; 4 | 5 | #my $chr = $ARGV[0]; 6 | 7 | my $milkvcf = $ARGV[0]; #"fixed0/milk.filt/milk.chr$chr.merged.sites.vcf.gz"; 8 | my $fmisbcf = $ARGV[1]; #"fixed0/sites.update_info/merged.chr$chr.gtonly.minDP10.update_info.sites.bcf"; 9 | #my $out = "/dev/stdout" #"analysis/filt/ld/frz9/frz9.milk_nold.fmis10.chr$chr.vcf.gz"; 10 | 11 | open(BCF,"$ENV{'EXE_PREFIX'}/bcftools/bcftools view -H $fmisbcf |") || die "Cannot open file\n"; 12 | 13 | open(IN,"zcat $milkvcf |") || die "Cannot open file\n"; 14 | open(OUT,"| $ENV{'EXE_PREFIX'}/htslib/bgzip -c") || die "Cannot open file\n"; 15 | while() { 16 | print STDERR "Processing $. lines..\n" if ( $. % 1000000 == 0 ); 17 | if ( /^#/ ) { 18 | print OUT $_; 19 | if ( /ID=TRIO_CONC_THRES/ ) { 20 | print OUT "##INFO=\n"; 21 | } 22 | } 23 | else { 24 | my @F = split; 25 | my @B = split(/[\t\r\n ]+/,); 26 | next unless ( ( $F[1] eq $B[1] ) || ( $F[3] eq $B[3] ) || ( $F[4] eq $B[4] ) ); 27 | my $fmis = $1 if ( $B[7] =~ /;FMIS=(\S+)/ ); 28 | $F[7] =~ s/;MILK_LRE=/;FMIS10=$fmis;MILK_LRE=/; 29 | print OUT join("\t",@F)."\n"; 30 | } 31 | } 32 | close OUT; 33 | close IN; 34 | close BCF; 35 | 36 | #print `tabix -f -pvcf $out`; 37 | -------------------------------------------------------------------------------- /king/MathConstant.h: -------------------------------------------------------------------------------- 1 | #ifndef __MATHCONSTANT_H__ 2 | #define __MATHCONSTANT_H__ 3 | 4 | #ifdef _MSC_VER 5 | #define _USE_MATH_DEFINES 6 | #endif 7 | 8 | #include "math.h" 9 | #include "stdlib.h" 10 | 11 | // Constants for numerical routines 12 | // 13 | 14 | #define TINY 1.0e-30 // A small number 15 | #define ITMAX 200 // Maximum number of iterations 16 | #define EPS 3.0e-7 // Relative accuracy 17 | #define ZEPS 3.0e-10 // Precision around zero 18 | #define FPMIN 1.0e-30 // Number near the smallest representable number 19 | #define FPMAX 1.0e+100 // Number near the largest representable number 20 | #define TOL 1.0e-6 // Zero SVD values below this 21 | #define GOLD 0.61803399 // Golden ratio 22 | #define CGOLD 0.38196601 // Complement of golden ratio 23 | 24 | inline double square(double a) { return a * a; } 25 | inline double sign(double a, double b) { return b >= 0 ? fabs(a) : -fabs(a); } 26 | inline double min(double a, double b) { return a < b ? a : b; } 27 | inline double max(double a, double b) { return a > b ? a : b; } 28 | 29 | inline int square(int a) { return a * a; } 30 | inline int sign(int a, int b) { return b >= 0 ? abs(a) : -abs(a); } 31 | inline int min(int a, int b) { return a < b ? a : b; } 32 | inline int max(int a, int b) { return a > b ? a : b; } 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /king/MathMiser.h: -------------------------------------------------------------------------------- 1 | #ifndef __MATH_MISER__ 2 | #define __MATH_MISER__ 3 | 4 | #include "Random.h" 5 | #include "MathMatrix.h" 6 | #include "MathVector.h" 7 | #include "MathSobol.h" 8 | 9 | // Monte Carlo Samples a user-supplied function in a rectangular volume 10 | // specified by region[2][dim]. The total of ncalls are made to the function 11 | // The integral of the function is returned in trgral and the standard 12 | // deviation of this estimate is in stdev. 13 | // 14 | 15 | struct MiserStack 16 | { 17 | int points; 18 | double weight; 19 | Matrix region; 20 | }; 21 | 22 | class MathMiser 23 | { 24 | public: 25 | SobolSequence sobol; 26 | 27 | long ncall; 28 | 29 | double tgral; 30 | double stdev; 31 | 32 | VectorFunc * vfunc; 33 | 34 | MathMiser() : sobol() 35 | { 36 | ncall = 1000; 37 | } 38 | 39 | double Integrate(Matrix & region); 40 | 41 | protected: 42 | // local variables for integration 43 | // are here... to save on new / delete 44 | // calls 45 | 46 | void RandomPoint(Matrix & region, Vector & point); 47 | 48 | double func (Vector & v) 49 | { return vfunc->Evaluate(v); } 50 | 51 | private: 52 | MiserStack stack[32]; // should be good for at least 2^31 points 53 | 54 | Vector midpoint, point, minl, minr, maxl, maxr; 55 | 56 | }; 57 | 58 | #endif 59 | 60 | -------------------------------------------------------------------------------- /king/Constant.h: -------------------------------------------------------------------------------- 1 | #ifndef _CONSTANT_H_ 2 | #define _CONSTANT_H_ 3 | 4 | #define COMPAREFUNC (int (*)(const void *, const void *)) 5 | 6 | #define BUFSIZE 1024 7 | #define FILENAMELEN 100 8 | #define IDLEN 20 9 | 10 | #define SEPARATORS " \t\n\r\f/" 11 | #define WHITESPACE " \t\n\r\f" 12 | 13 | #define SWTABLESKIP 9 14 | #define SWTABLEMAX 10000 15 | 16 | #define _NAN_ ((double) (6.66666e-66)) 17 | 18 | #define QTDTDATA "qtdt.dat" 19 | #define QTDTPED "qtdt.ped" 20 | #define QTDTIBD "qtdt.ibd" 21 | #define QTDTRAW "regress.tbl" 22 | #define GENIHDATAIN "genih.dat" 23 | 24 | #ifndef __WIN32__ 25 | #define stricmp strcasecmp 26 | #endif 27 | 28 | // Constants for older haplotype handling programs 29 | // Constants for HAPLOXT 30 | #define XT_MAX_ALLELES 50 // Maximum alleles for crosstabulation 31 | #define XT_VECTORSIZE 10000 // Total haplotypes in population 32 | #define XT_POOLTRESH 7 // Threshold for pooling rare alleles 33 | // Simwalk Haplotype Vectors 34 | #define HV_MAXSIZE 100 // Haplotypes in single SimWalk pedigree 35 | #define HV_INFOTRESH 75 // Percentage of loci typed 36 | #define HV_STATELENGTH 100 // Markers per haplotype 37 | #define HV_SKIPLINES 4 // lines to skip at bottom of family tree 38 | // Simwalk Summary Files 39 | #define HT_TABLE_SIZE 1000 40 | #define HT_SKIP_LINES 9 41 | 42 | #endif 43 | 44 | -------------------------------------------------------------------------------- /king/GenotypeLists.h: -------------------------------------------------------------------------------- 1 | #ifndef __GENOTYPE_ELIMINATION__ 2 | #define __GENOTYPE_ELIMINATION__ 3 | 4 | #include "Pedigree.h" 5 | 6 | class GenotypeList 7 | { 8 | public: 9 | 10 | IntArray allele1, allele2; 11 | IntArray alleles; 12 | 13 | bool ignore; 14 | int checked; 15 | 16 | GenotypeList(); 17 | 18 | static bool EliminateGenotypes(Pedigree & ped, Family * family, int marker); 19 | 20 | void Dimension(int genotypes); 21 | void Delete(int genotype); 22 | 23 | bool Matches(int genotype, int allele); 24 | bool Matches(int allele); 25 | 26 | int SaveGenotype(int genotype); 27 | void SetGenotype(int genotype, int al1, int al2); 28 | 29 | private: 30 | static void InitializeList(GenotypeList * list, Pedigree & p, Family * f, int marker); 31 | static bool PairwiseCheck(GenotypeList * list, Pedigree & p, Family * f); 32 | static bool FamilyCheck(GenotypeList * list, Pedigree & p, Family * f); 33 | 34 | static bool CheckTrio(GenotypeList * list, int fatid, int motid, int child, int i, int j, int k); 35 | static bool TrimParent(GenotypeList * list, Person & person, int fatid, int motid); 36 | static bool Cleanup(GenotypeList * list, Person & person, int fatid, int motid, int child, int geno); 37 | 38 | static void Print(GenotypeList * List, Pedigree & p, Family * f, int marker); 39 | }; 40 | 41 | 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /scripts/run-discovery-local.cmd: -------------------------------------------------------------------------------- 1 | # out : log/discover 2 | # list : SMIDX : index/list.107.local.crams.index 3 | # var : ROOT : .. 4 | # target : out/sm/$SMIDX$1$/$SMIDX$1$.vb2 out/sm/$SMIDX$1$/$SMIDX$1$.norm.xy out/sm/$SMIDX$1$/$SMIDX$1$.bcf out/sm/$SMIDX$1$/$SMIDX$1$.bcf.csi 5 | # name: example-discovery 6 | mkdir -p out/sm/$SMIDX$1$/ 7 | bash -c 'set -o pipefail; REF_PATH=resources/ref/md5/%2s/%2s/%s $ROOT$/samtools/samtools view -uh -T resources/ref/hs38DH.fa $SMIDX$2$ 2> out/sm/$SMIDX$1$/$SMIDX$1$.bcf.samtools_err | $ROOT$/bamUtil/bin/bam clipoverlap --poolSize 100000000 --in -.ubam --out -.ubam 2> out/sm/$SMIDX$1$/$SMIDX$1$.bcf.bamUtil_err | $ROOT$/vt-topmed/vt discover2 -z -q 20 -b + -r resources/ref/hs38DH.fa -s $SMIDX$1$ -o out/sm/$SMIDX$1$/$SMIDX$1$.bcf 2> out/sm/$SMIDX$1$/$SMIDX$1$.bcf.vt_err' 8 | $ROOT$/bcftools/bcftools index -f out/sm/$SMIDX$1$/$SMIDX$1$.bcf 9 | REF_PATH=resources/ref/md5/%2s/%2s/%s $ROOT$/cramore/cramore cram-verify-bam --svd resources/ref/HGDP_938.b38.genotypes.svd --sam $SMIDX$2$ --cap-DP 100 --out out/sm/$SMIDX$1$/$SMIDX$1$.vb2 --num-PC 4 > out/sm/$SMIDX$1$/$SMIDX$1$.vb2.stdout 2> out/sm/$SMIDX$1$/$SMIDX$1$.vb2.stderr 10 | $ROOT$/cramore/cramore vcf-normalize-depth --xy --vcf out/sm/$SMIDX$1$/$SMIDX$1$.bcf --known resources/ref/1000G_omni2.5.b38.sites.PASS.vcf.gz --gc resources/ref/hs38DH.gc.w150.s5.gz --xLabel chrX --yLabel chrY --xStart 2781479 --xStop 15570138 --out out/sm/$SMIDX$1$/$SMIDX$1$.norm > out/sm/$SMIDX$1$/$SMIDX$1$.norm.out 2> out/sm/$SMIDX$1$/$SMIDX$1$.norm.err 11 | -------------------------------------------------------------------------------- /king/MathSVD.h: -------------------------------------------------------------------------------- 1 | #ifndef __MATHSVD_H__ 2 | #define __MATHSVD_H__ 3 | 4 | #include "MathMatrix.h" 5 | #include "MathVector.h" 6 | #include "MathConstant.h" 7 | 8 | // SVD Decomposition 9 | // 10 | 11 | class SVD 12 | { 13 | // Given a matrix a[1..m][1..n] computes its singular value 14 | // decomposition, A = U*W*V^T. 15 | public: 16 | int m, n; // m - no. of rows, n - no. of parameters 17 | //Matrix u; // The matrix U 18 | double **u; 19 | Vector w; // The diagonal matrix of singular 20 | double **v; // values vector w[1..n] 21 | //Matrix v; // The matrix V (not the transpose V^T) 22 | // is output as v[1..n][1..n] 23 | 24 | Vector x; // The solution vector after backsubstitution 25 | 26 | Matrix cov; // The covariance matrix for the parameters 27 | // obtained by the fit 28 | 29 | SVD(); 30 | ~SVD(); 31 | 32 | void Decompose(Matrix & a, int mp = -1, int np = -1); 33 | void Edit(double tol = TOL); 34 | void BackSubst(Vector & b); 35 | void Covariances(); 36 | 37 | double RSS(Matrix & M, Vector & b); // Residual Sum of Squares 38 | void Residuals(Matrix & M, Vector & b, Vector & delta); // Residuals 39 | 40 | protected: 41 | void Empty(); 42 | 43 | private: 44 | static double pythag(double a, double b); 45 | }; 46 | 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /king/MathGold.h: -------------------------------------------------------------------------------- 1 | #ifndef _MATHGOLD_H_ 2 | #define _MATHGOLD_H_ 3 | 4 | #include "MathConstant.h" 5 | #include "MathVector.h" 6 | 7 | // Minimizes functions of one variable in one dimension 8 | class ScalarMinimizer 9 | { 10 | public: 11 | double (*func)(double); // function to be minimized 12 | double a, b, c, min; 13 | double fa, fb, fc, fmin; 14 | 15 | ScalarMinimizer() { func = NULL; }; 16 | virtual ~ScalarMinimizer() { } 17 | 18 | virtual double f(double x); 19 | 20 | void Bracket(double a, double b); // bracket a minimum near a and b 21 | virtual double Brent(double tol = TOL); // return minimum, to precision TOL 22 | // result stored in min 23 | }; 24 | 25 | class LineMinimizer : public ScalarMinimizer 26 | // Minimizes f(P) along the line define by P = point + x * line 27 | // Stores the best point (in point) along the line 28 | // and the displacement from the original (in line) 29 | { 30 | private: 31 | bool garbage; 32 | public: 33 | VectorFunc * func; // function to be minimized 34 | Vector line, point, temp; 35 | 36 | LineMinimizer(); 37 | LineMinimizer(VectorFunc & vfunc); 38 | LineMinimizer(double (*vfunc)(Vector & v)); 39 | 40 | virtual ~LineMinimizer() 41 | { if (garbage) delete func; } 42 | 43 | virtual double f(double x); 44 | 45 | virtual double Brent(double tol = TOL); 46 | }; 47 | 48 | #endif 49 | 50 | -------------------------------------------------------------------------------- /king/MathCholesky.h: -------------------------------------------------------------------------------- 1 | #ifndef __MATH_CHOLESKY__ 2 | #define __MATH_CHOLESKY__ 3 | 4 | #include "MathMatrix.h" 5 | #include "MathVector.h" 6 | 7 | class Cholesky 8 | { 9 | public: 10 | Matrix L, inv; 11 | Vector x; 12 | 13 | Cholesky() : L("cholesky.L"), inv("cholesky.inverse"), x("cholesky.x") 14 | { } 15 | 16 | ~Cholesky() 17 | { } 18 | 19 | // Given a symmetric positive definite matrix A finds 20 | // a lower triangular matrix L such that L * transpose(L) = A 21 | // Only the upper triangle of A need be given 22 | void Decompose(Matrix & A); 23 | 24 | // If you call fast decompose the upper triangle of U is 25 | // undefined (as opposed to zero). This is often okay and 26 | // allows for a little more speed... 27 | void FastDecompose(Matrix & A); 28 | 29 | // Tries to decompose matrix A, returning true on success 30 | // or zero on failure ... you should also check that 31 | // determinant is not zero before using results if this 32 | // is a concern 33 | bool TryDecompose(Matrix & A); 34 | 35 | // solve Y = X b 36 | void BackSubst(Vector & b); 37 | void BackSubst0(Vector & b); 38 | void Invert(); 39 | 40 | // determinant functions 41 | double lnDeterminantL(); 42 | double DeterminantL(); 43 | 44 | double lnDeterminant() 45 | { 46 | return 2 * lnDeterminantL(); 47 | } 48 | double Determinant() 49 | { 50 | double temp = DeterminantL(); 51 | return temp * temp; 52 | } 53 | }; 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:16.04 2 | 3 | COPY . /topmed_variant_calling 4 | 5 | RUN apt-get update && apt-get install -y \ 6 | apt-utils \ 7 | automake \ 8 | autoconf \ 9 | build-essential \ 10 | git \ 11 | ghostscript \ 12 | gnuplot \ 13 | groff \ 14 | libcurl4-openssl-dev \ 15 | liblzma-dev \ 16 | libncurses5-dev \ 17 | libssl-dev \ 18 | libzstd-dev \ 19 | python3 \ 20 | r-base \ 21 | unzip \ 22 | wget \ 23 | zlib1g-dev 24 | 25 | RUN mkdir /tmp/plink && cd /tmp/plink && wget http://s3.amazonaws.com/plink1-assets/plink_linux_x86_64_20190617.zip && unzip plink_linux_x86_64_20190617.zip && cp plink /usr/local/bin/plink-1.9 26 | 27 | WORKDIR /topmed_variant_calling 28 | RUN rm -r /tmp/plink 29 | 30 | RUN git submodule init && git submodule update 31 | 32 | RUN cd libsvm/ && git clean -fdx && make && cd .. 33 | RUN cd apigenome && git clean -fdx && autoreconf -vfi && ./configure --prefix $PWD && make && make install && cd .. 34 | RUN cd libStatGen && git clean -fdx && make && cd .. 35 | RUN cd bamUtil && git clean -fdx && make && cd .. 36 | RUN cd invNorm && git clean -fdx && make && cd .. 37 | RUN cd htslib && git clean -fdx && autoheader && autoconf && ./configure && make && cd .. 38 | RUN cd vt-topmed && git clean -fdx && make && cd .. 39 | RUN cd cramore && git clean -fdx && autoreconf -vfi && ./configure && make && cd .. 40 | RUN cd samtools && git clean -fdx && autoheader && autoconf -Wno-syntax && ./configure && make && cd .. 41 | RUN cd bcftools && git clean -fdx && make && cd .. 42 | RUN cd king && rm -f king *.o && g++ -O3 -c *.cpp && g++ -O3 -o king *.o -lz && cd .. 43 | 44 | -------------------------------------------------------------------------------- /libsvm/COPYRIGHT: -------------------------------------------------------------------------------- 1 | 2 | Copyright (c) 2000-2011 Chih-Chung Chang and Chih-Jen Lin 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions 7 | are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | 3. Neither name of copyright holders nor the names of its contributors 17 | may be used to endorse or promote products derived from this software 18 | without specific prior written permission. 19 | 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /king/TDT.h: -------------------------------------------------------------------------------- 1 | #ifndef __TDT_h__ 2 | #define __TDT_h__ 3 | 4 | class TDT:public AssociationAnalysis{ 5 | public: 6 | TDT(Pedigree & pedigree); 7 | ~TDT(); 8 | IntArray MT, MNT, pooT[2], pooNT[2]; 9 | void SetupGlobals(); 10 | void pre_genome(); 11 | void post_genome(); 12 | void PrintScores(); 13 | 14 | void Analyze(); 15 | void AnalyzeX(); 16 | 17 | void TDT_Preparation(); 18 | void TDT_Analysis(); 19 | void GEE_Analysis(); 20 | void WGDT_Analysis(); 21 | void TDT1P_Analysis(); 22 | void GDT_PO_Analysis(); 23 | void GDT_CP_Analysis(); 24 | void GDT_Missing_Analysis(); 25 | void PDT_Analysis(); 26 | void EPDT_Analysis(); 27 | void RDT_Analysis(); 28 | void GDT_Analysis(); 29 | void GDT_hetero_Analysis(); 30 | void FCAT_Analysis(); 31 | 32 | void GDT_AnalysisX(); 33 | void GDT_PO_AnalysisX(); 34 | 35 | int CheckTwin(); 36 | IntArray TwinFlag_Fam; 37 | IntArray TwinFlag_ID; 38 | void WrongQLS_Analysis(); 39 | void MQLS_Analysis(); 40 | 41 | char relationship(int i, int j); 42 | // String relationSet; 43 | 44 | // unreleased 45 | void QLS_Analysis(); 46 | void bQLS_Analysis(); 47 | void tQLS_Analysis(); 48 | void cQLS_Analysis(); 49 | void QLS_hetero_Analysis(); 50 | void LogisticScore_Analysis(); 51 | void GDT_FO_Analysis(); 52 | void GDT_MO_Analysis(); 53 | 54 | void rareGDT_Analysis(); 55 | void rareEDA_Analysis(); 56 | // void AutosomalCheck(); 57 | }; 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /king/GenotypeCompressor.h: -------------------------------------------------------------------------------- 1 | #ifndef __GENOTYPE_COMPRESSOR_H__ 2 | #define __GENOTYPE_COMPRESSOR_H__ 3 | 4 | #ifndef uchar 5 | #define uchar unsigned char 6 | #endif 7 | 8 | class GenotypeCompressor 9 | { 10 | public: 11 | static uchar * CompressGenotypes(uchar * genotypes, int n); 12 | static void RetrieveGenotypes(uchar * compressed, uchar * genotypes, int n); 13 | static char * Describe(uchar * compressed); 14 | 15 | static int MemoryAllocated(); 16 | static int MemoryInUse(); 17 | 18 | private: 19 | static uchar * memoryBlocks[1024]; 20 | static int blockIndex; 21 | static int blockByte; 22 | 23 | static void AllocateBlock(); 24 | static void AllocateMemory(int size); 25 | 26 | static uchar OddOneOut(uchar a, uchar b, uchar c); 27 | static uchar EncodeTriplet(uchar a, uchar b, uchar c); 28 | static void DecodeTriplet(uchar triplet, uchar & a, uchar & b, uchar & c); 29 | 30 | static void WRITEBIT(uchar * block, uchar & byte, uchar & mask, int bit) 31 | { 32 | if (bit) byte |= mask; 33 | mask *= 2; 34 | if (mask == 0) 35 | { 36 | block[blockByte++] = byte; 37 | mask = 1; 38 | byte = 0; 39 | } 40 | } 41 | 42 | static bool READBIT(uchar * & input, uchar & mask) 43 | { 44 | mask *= 2; 45 | 46 | if (mask == 0) mask = 1, input++; 47 | 48 | return *input & mask; 49 | } 50 | }; 51 | 52 | #endif 53 | 54 | 55 | -------------------------------------------------------------------------------- /king/InputFile.cpp: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////// 2 | // libsrc/InputFile.cpp 3 | // (c) 2000-2007 Goncalo Abecasis 4 | // 5 | // This file is distributed as part of the MERLIN source code package 6 | // and may not be redistributed in any form, without prior written 7 | // permission from the author. Permission is granted for you to 8 | // modify this file for your own personal use, but modified versions 9 | // must retain this copyright notice and must not be distributed. 10 | // 11 | // Permission is granted for you to use this file to compile MERLIN. 12 | // 13 | // All computer programs have bugs. Use this file at your own risk. 14 | // 15 | // Tuesday December 18, 2007 16 | // 17 | 18 | #include "InputFile.h" 19 | 20 | #ifdef __ZLIB_AVAILABLE__ 21 | 22 | IFILE::IFILE(const char * filename, const char * mode) 23 | { 24 | // Some implementations of zlib will not open files that are 25 | // larger than 2Gb. To ensure support for large (uncompressed) 26 | // files, we fall-back on the regular fopen when the initial 27 | // gzopen call fails and the filename does not end in .gz 28 | 29 | gzMode = true; 30 | gzHandle = gzopen(filename, mode); 31 | 32 | if (gzHandle == NULL) 33 | { 34 | int lastchar = 0; 35 | 36 | while (filename[lastchar] != 0) lastchar++; 37 | 38 | if (lastchar >= 3 && filename[lastchar - 3] == '.' && 39 | filename[lastchar - 2] == 'g' && 40 | filename[lastchar - 1] == 'z') 41 | return; 42 | 43 | gzMode = false; 44 | handle = fopen(filename, mode); 45 | } 46 | }; 47 | 48 | #endif 49 | 50 | -------------------------------------------------------------------------------- /king/Matings.cpp: -------------------------------------------------------------------------------- 1 | #include "Matings.h" 2 | 3 | #define MATING_HASH_PADDING 2 /* Number of empty slots per mating, 4 | which are used to speed up searching 5 | at the cost of increased memory use */ 6 | 7 | void Matings::ListMatings(Family * family) 8 | { 9 | founders = family->founders; 10 | matingCount = 0; 11 | 12 | InitializeHash(family->count); 13 | 14 | for (int i = family->founders; i < family->nonFounders; i++) 15 | { 16 | Person & p = family->ped[family->path[i]]; 17 | 18 | matingMap[i - family->founders] = LookupMating(p.father->serial, p.mother->serial); 19 | } 20 | } 21 | 22 | void Matings::InitializeHash(int size) 23 | { 24 | size *= MATING_HASH_PADDING; 25 | 26 | hash.Dimension(size); 27 | hash.Set(-1); 28 | 29 | hashId.Dimension(size); 30 | hashId.Set(-1); 31 | } 32 | 33 | int Matings::LookupMating(int father, int mother) 34 | { 35 | int id = father * hash.Length() + mother; 36 | int h = father * MATING_HASH_PADDING; 37 | 38 | while (true) 39 | { 40 | if (hash[h] == -1) 41 | { 42 | hashId[h] = id; 43 | return hash[h] = matingCount++; 44 | } 45 | 46 | if (hashId[h] == id) 47 | return hash[h]; 48 | 49 | h++; 50 | 51 | if (h == hash.Length()) h = 0; 52 | } 53 | } 54 | 55 | int Matings::LookupMating(Person & p) 56 | { 57 | return LookupMating(p.serial - founders); 58 | } 59 | 60 | int Matings::LookupMating(int serial) 61 | { 62 | return matingMap[serial - founders]; 63 | } 64 | -------------------------------------------------------------------------------- /king/MathVegas.h: -------------------------------------------------------------------------------- 1 | #ifndef __MATHVEGAS_H__ 2 | #define __MATHVEGAS_H__ 3 | 4 | #include "MathVector.h" 5 | #include "MathMatrix.h" 6 | #include "Random.h" 7 | 8 | #define ALPH 1.5 9 | #define NDMX 50 // Maximum number of increments for each axis 10 | #define MXDIM 10 // Maximum number of dimensions 11 | 12 | // Monte-carlo integration of user supplied ndimensional 13 | // function, in a rectangular volume specified by matrix 14 | // Volume[2][ndim], consisting of lower and upper bounds 15 | // itmx iterations each with about ncall function calls 16 | // The sampling grid is refined iteratively. Produces 17 | // the integral tgral, with standard deviation sd, and 18 | // an indicator of integrity chi2a (should be less than 1). 19 | class Vegas 20 | { 21 | public: 22 | int itmx, ncall; 23 | double tgral, sd, chi2a; 24 | static Random rand; 25 | VectorFunc * vfunc; 26 | 27 | Vegas(); 28 | ~Vegas(); 29 | 30 | double func(Vector & point) 31 | { return vfunc->Evaluate(point); } 32 | 33 | // Three levels of initialization possible 34 | // 0 - Total reset 35 | // 1 - Keep Grid, clear Estimates 36 | // 2 - Keep Grid and Estimates 37 | // 3 - Do additional iterations, no changes 38 | void Init(Matrix & Volume, int level = 0); 39 | 40 | // Integrate the function 41 | double Integrate(Matrix & Volume); 42 | 43 | private: 44 | void Rebin(double rc, Vector & xi); 45 | 46 | int mds, nd, ndo, ng, npg, * ia, * kg; 47 | double calls, dv2g, dxg, rc; 48 | double wgt, xjac, xn, xnd, xo, schi, si, swgt; 49 | Vector dt, dx, r, x, xin; 50 | Matrix d, di, xi; 51 | }; 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /king/MathAssoc.h: -------------------------------------------------------------------------------- 1 | #ifndef __MATH_ASSOC_H__ 2 | #define __MATH_ASSOC_H__ 3 | 4 | #include "MathVector.h" 5 | 6 | // Measures of association based on chi-sq 7 | // 8 | 9 | class AssocChi 10 | { 11 | public: 12 | double sum; // values scored N 13 | double chisq; // chi-square value 14 | double df; // degrees freedom df 15 | double prob; // significance level p 16 | double lop; // -log10 of significance 17 | double cramrv; // between 0 and 1 - Cramer's V 18 | double ccc; // measure of association - depends on I and J 19 | 20 | int isValid; 21 | 22 | AssocChi(); 23 | 24 | void Calc(int ** nn, int ni, int nj); 25 | }; 26 | 27 | // Measures of Association based on entropy 28 | // 29 | 30 | class AssocEntropy 31 | { 32 | public: 33 | double sum; // values scored N 34 | double h; // entropy of whole table 35 | double hx; // entropy of the x distribution 36 | double hy; // entropy of the y distribution 37 | double hygx; // entropy of y given x 38 | double hxgy; // entropy of x given y 39 | double uygx; // dependency of x on y 40 | double uxgy; // dependency of y on x 41 | double uxy; // symmetrical dependency of x and y 42 | 43 | int isValid; 44 | 45 | AssocEntropy(); 46 | 47 | void Calc(int **nn, int ni, int nj); 48 | }; 49 | 50 | // Spearman's Rank Correlation 51 | void Spearman(Vector & v1, Vector & v2, 52 | double & rankD, double & zD, double & probD, 53 | double & spearmanR, double & probR); 54 | 55 | #endif 56 | 57 | 58 | -------------------------------------------------------------------------------- /king/LongArray.h: -------------------------------------------------------------------------------- 1 | #ifndef __LONGINTARRAY_H__ 2 | #define __LONGINTARRAY_H__ 3 | 4 | #include "LongInt.h" 5 | 6 | class LongArray 7 | { 8 | private: 9 | longint * items; 10 | int size, count; 11 | 12 | void Grow(int new_size); 13 | static int Compare(int * a, int * b); 14 | 15 | public: 16 | static int alloc; 17 | 18 | LongArray(int start_size = 0); 19 | LongArray(LongArray & source); 20 | ~LongArray(); 21 | 22 | LongArray & operator = (const LongArray & rhs); 23 | 24 | longint & operator [] (int index) { return items[index]; } 25 | 26 | int Append(longint value); 27 | void Push(longint value) { Append(value); } 28 | longint Pop() { return items[--count]; } 29 | longint Peek() const { return items[count - 1]; } 30 | longint &Last() const { return items[count - 1]; } 31 | 32 | int Delete(int index); 33 | void InsertAt(int index, longint value); 34 | 35 | int Find(longint value) const; 36 | void Sort(); 37 | 38 | void Zero(); 39 | void Set(longint value); 40 | 41 | int Length() { return count; } 42 | void Dimension(int new_count) { Grow(new_count); count = new_count; } 43 | void Clear() { count = 0; } 44 | 45 | void Swap(int i, int j) 46 | { longint tmp = items[i]; items[i] = items[j]; items[j] = tmp; } 47 | 48 | void Reverse(); 49 | 50 | operator longint * () { return items; } 51 | 52 | bool operator == (const LongArray & rhs) const; 53 | bool operator != (const LongArray & rhs) const; 54 | 55 | int Hash(int initval); 56 | }; 57 | 58 | #endif /* __LONGINTARRAY_H */ 59 | 60 | 61 | -------------------------------------------------------------------------------- /king/MathDeriv.cpp: -------------------------------------------------------------------------------- 1 | #include "MathDeriv.h" 2 | #include "MathConstant.h" 3 | 4 | #include 5 | 6 | #define MAXROUNDS 20 7 | #define SQRT_HALF (1.0/M_SQRT2) 8 | #define TWO (M_SQRT2 * M_SQRT2) 9 | 10 | double dfunction(double (* func)(double), double x, double h, double & err) 11 | { 12 | double a[MAXROUNDS][MAXROUNDS]; 13 | 14 | // Initial crude estimate 15 | double result = a[0][0] = ((*func)(x+h) - (*func)(x-h)) / (2.0 * h); 16 | 17 | // Initial guess of error is large 18 | err = 1e30; 19 | 20 | // At each round, update Neville tableau with smaller stepsize and higher 21 | // order extrapolation ... 22 | for (int i = 1; i < MAXROUNDS; i++) 23 | { 24 | // Decrease h 25 | h *= SQRT_HALF; 26 | 27 | // Re-evaluate function 28 | a[0][i] = ((*func)(x+h) - (*func)(x-h)) / (2.0 * h); 29 | 30 | // Calculate extrapolations of various orders ... 31 | double factor = TWO, error; 32 | 33 | for (int j = 1; j <= i; j++) 34 | { 35 | a[j][i] = (a[j-1][i] * factor - a[j-1][i-1])/(factor - 1.0); 36 | 37 | factor *= TWO; 38 | 39 | error = max(fabs(a[j][i] - a[j-1][i]), fabs(a[j][i] - a[j-1][i-1])); 40 | 41 | // Did we improve solution? 42 | if (error < err) 43 | { 44 | err = error; 45 | result = a[j][i]; 46 | } 47 | } 48 | 49 | // Stop if solution is deteriorating ... 50 | if (fabs(a[i][i] - a[i-1][i-1]) >= 2.0 * err) 51 | break; 52 | } 53 | 54 | return result; 55 | } 56 | 57 | double dfunction(double (* func)(double), double x, double h) 58 | { 59 | double err; 60 | 61 | return dfunction(func, x, h, err); 62 | } 63 | -------------------------------------------------------------------------------- /king/BasicHash.h: -------------------------------------------------------------------------------- 1 | #ifndef __BASICHASH_H__ 2 | #define __BASICHASH_H__ 3 | 4 | #include 5 | 6 | class BasicHash 7 | { 8 | protected: 9 | void ** objects; 10 | unsigned int * keys; 11 | unsigned int count, size; 12 | unsigned int mask; 13 | 14 | public: 15 | BasicHash(int startsize = 32); 16 | virtual ~BasicHash(); 17 | 18 | void Grow() { SetSize(size * 2); } 19 | void Shrink() { SetSize(size / 2); } 20 | 21 | void SetSize(int newsize); 22 | 23 | void Clear(); 24 | 25 | int Capacity() const { return size; } 26 | int Entries() const { return count; } 27 | 28 | void * Object(int i) const { return objects[i]; } 29 | 30 | void SetObject(int i, void * object) 31 | { objects[i] = object; } 32 | 33 | int Add (int key, void * object = NULL); 34 | int Find (int key); 35 | int Rehash (int key, int h); 36 | 37 | BasicHash & operator = (const BasicHash & rhs); 38 | 39 | void * operator [] (int i) const { return objects[i]; } 40 | 41 | void Delete(unsigned int index); 42 | 43 | bool SlotInUse(int index) { return objects[index] != NULL; } 44 | 45 | private: 46 | unsigned int Iterate(unsigned int key) const 47 | { 48 | unsigned int h = key & mask; 49 | 50 | while (objects[h] != NULL && keys[h] != key) 51 | h = (h + 1) & mask; 52 | 53 | return h; 54 | } 55 | 56 | unsigned int ReIterate(unsigned int key, unsigned int h) const 57 | { 58 | h = (h + 1) & mask; 59 | 60 | while (objects[h] != NULL && keys[h] != key) 61 | h = (h + 1) & mask; 62 | 63 | return h; 64 | } 65 | }; 66 | 67 | #endif 68 | -------------------------------------------------------------------------------- /king/VCLinear.h: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////// 2 | // VCLINEAR.h 3 | // Author: Wei-Min Chen 4 | // May 13, 2005 5 | 6 | #ifndef _VC_LINEAR_H_ 7 | #define _VC_LINEAR_H_ 8 | 9 | #include "Pedigree.h" 10 | #include "IntArray.h" 11 | #include "MathMatrix.h" 12 | #include "MathVector.h" 13 | #include "MathCholesky.h" 14 | #include "VCGEE.h" 15 | 16 | class GEEVC_LINEAR:public GEE{ 17 | protected: 18 | virtual void RefreshOD(int f); 19 | public: 20 | double H2; 21 | double seH2; 22 | double totalVariance; 23 | double stat; 24 | double LOD; 25 | double pvalue; 26 | IntArray personValid; 27 | 28 | Matrix * varianceComponents; 29 | Matrix PhiX; 30 | Matrix PhiM; 31 | GEEVC_LINEAR(Pedigree & pedigree); 32 | ~GEEVC_LINEAR(); 33 | 34 | void init(); 35 | void InitCoef(); 36 | virtual void summary(); 37 | void print(); 38 | double residual(int p); 39 | 40 | int trait; 41 | IntArray mCovariate; 42 | }; 43 | 44 | class POLY:public GEEVC_LINEAR{ 45 | protected: 46 | void RefreshO(int f); 47 | void RefreshOD(int f); 48 | int StopRule(); 49 | public: 50 | void InitCoef(); 51 | POLY(Pedigree & pedigree):GEEVC_LINEAR(pedigree){} 52 | ~POLY(){} 53 | }; 54 | 55 | class GEEVC_LINKAGE:public GEEVC_LINEAR{ 56 | protected: 57 | void RefreshO(int f); 58 | public: 59 | double h2; 60 | Vector *ibd; 61 | void InitCoef(); 62 | void summary(); 63 | GEEVC_LINKAGE(Pedigree & pedigree):GEEVC_LINEAR(pedigree){ibd=NULL;} 64 | ~GEEVC_LINKAGE(){if(ibd) delete []ibd;} 65 | }; 66 | 67 | class GEEVC_ASSOC:public GEEVC_LINKAGE{ 68 | public: 69 | Vector IBS; 70 | void InitCoef(); 71 | void summary(); 72 | GEEVC_ASSOC(Pedigree & pedigree):GEEVC_LINKAGE(pedigree){/*IBS=new Vector[ped.familyCount];*/} 73 | ~GEEVC_ASSOC(){/*if(IBS) delete []IBS;*/} 74 | }; 75 | 76 | #endif 77 | -------------------------------------------------------------------------------- /king/Genetics.cpp: -------------------------------------------------------------------------------- 1 | #include "Genetics.h" 2 | #include "Error.h" 3 | 4 | #include 5 | 6 | void ImprintingParameter::Status() 7 | { 8 | char * msg; 9 | 10 | switch (* (int *) var) 11 | { 12 | case I_NONE: msg = "NOT MODELLED"; break; 13 | case I_MATERNAL: msg = "MATERNAL"; break; 14 | case I_PATERNAL: msg = "PATERNAL"; break; 15 | case I_FULL: msg = "FULLY MODELLED"; break; 16 | case I_IMPRINTING: msg = "TEST IMPRINTING"; break; 17 | } 18 | 19 | printf("%30s : %15s (-%c[+|-|f|i|m|p])\n", description, msg, ch); 20 | } 21 | 22 | void ImprintingParameter::Translate(char * value) 23 | { 24 | switch (tolower(*value)) 25 | { 26 | case '-' : * (int *) var = I_NONE; break; 27 | case 'm' : * (int *) var = I_MATERNAL; break; 28 | case 'p' : * (int *) var = I_PATERNAL; break; 29 | case 'f' : 30 | case '+' : 31 | case 0 : * (int *) var = I_FULL; break; 32 | case 'i' : * (int *) var = I_IMPRINTING; break; 33 | default : warning("unknown parameter %c%s\n", ch, value); 34 | }; 35 | } 36 | 37 | void GeneticModelParameter::Status() 38 | { 39 | char * msg; 40 | 41 | switch (* (int *) var) 42 | { 43 | case GM_FREE: msg = "FREE"; break; 44 | case GM_RECESSIVE: msg = "RECESSIVE"; break; 45 | case GM_ADDITIVE: msg = "ADDITIVE"; break; 46 | case GM_DOMINANT: msg = "DOMINANT"; break; 47 | } 48 | 49 | printf("%30s : %15s (-%c[a|d|f|r])\n", description, msg, ch); 50 | } 51 | 52 | void GeneticModelParameter::Translate(char * value) 53 | { 54 | switch (tolower(*value)) 55 | { 56 | case 'a' : * (int *) var = GM_ADDITIVE; break; 57 | case 'd' : * (int *) var = GM_DOMINANT; break; 58 | case 'f' : * (int *) var = GM_FREE; break; 59 | case 'r' : * (int *) var = GM_RECESSIVE; break; 60 | default : warning("unknown parameter %c%s\n", ch, value); 61 | }; 62 | } 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /king/Kinship.cpp: -------------------------------------------------------------------------------- 1 | #include "Kinship.h" 2 | 3 | #define MAX_TABLE 850 4 | 5 | void Kinship::Setup(Family & f) 6 | { 7 | int count = f.count > MAX_TABLE ? MAX_TABLE : f.count; 8 | int founders = f.founders > MAX_TABLE ? MAX_TABLE : f.founders; 9 | 10 | allPairs.Dimension(count, count); 11 | 12 | for (int i = 0; i < founders; i++) 13 | { 14 | for (int j = 0; j < founders; j++) 15 | allPairs[i][j] = 0.0; 16 | allPairs[i][i] = 0.5; 17 | } 18 | 19 | for (int i = founders; i < count; i++) 20 | { 21 | Person * p = &(f.ped[f.path[i]]); 22 | int k = p->father->traverse; 23 | int l = p->mother->traverse; 24 | 25 | for (int j = 0; j < i; j++) 26 | if (!p->isMzTwin(f.ped[f.path[j]])) 27 | allPairs[i][j] = allPairs[j][i] = 28 | (allPairs[k][j] + allPairs[l][j]) * 0.5; 29 | else 30 | allPairs[j][i] = allPairs[i][j] = 0.5 + allPairs[k][l] * 0.5; 31 | 32 | allPairs[i][i] = 0.5 + allPairs[k][l] * 0.5; 33 | } 34 | 35 | fam = &f; 36 | } 37 | 38 | double Kinship::operator() (Person & p1, Person & p2) 39 | { 40 | int i = p1.traverse; 41 | int j = p2.traverse; 42 | 43 | if (i >= MAX_TABLE || j >= MAX_TABLE) 44 | { 45 | if (p1.isFounder() && p2.isFounder()) 46 | return 0.0; 47 | 48 | if (i == j || p1.isMzTwin(p2)) 49 | return 0.5 + (*this)(*p1.father, *p1.mother) * 0.5; 50 | 51 | if (i < j) 52 | return 0.5 * ((*this)(*p2.father, p1) + (*this)(*p2.mother, p1)); 53 | else 54 | return 0.5 * ((*this)(*p1.father, p2) + (*this)(*p1.mother, p2)); 55 | } 56 | 57 | return allPairs[i][j]; 58 | } 59 | 60 | bool Kinship::isInbred() 61 | { 62 | for (int i=0; i < allPairs.rows; i++) 63 | if (allPairs[i][i] != 0.5) 64 | return true; 65 | 66 | for (int i=allPairs.rows; i < fam->count; i++) 67 | if ((*this)(fam->ped[fam->path[i]], fam->ped[fam->path[i]]) != 0.5) 68 | return true; 69 | 70 | return false; 71 | } 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /king/MathStats.h: -------------------------------------------------------------------------------- 1 | #ifndef _MATHSTATS_H_ 2 | #define _MATHSTATS_H_ 3 | 4 | #include "MathVector.h" 5 | #include "MathMatrix.h" 6 | 7 | // Normal distribution functions 8 | // 9 | double ndist (double x, bool upper = true); 10 | 11 | // ninv(p) calculates X such that p = P(x >= X) for std normal dist 12 | // 13 | double ninv ( double p ); 14 | 15 | // Chi-Sq distribution function 16 | // P(Chi>=X) for v degrees of freedom 17 | // 18 | double chidist(double x, double v); 19 | double chidist(double x, double v, double ncp); 20 | 21 | // F distribution function 22 | // P(F>=x) for v1 and v2 degrees freedom 23 | // 24 | double fdist(double x, double v1, double v2); 25 | 26 | // P(T>=x) for v degrees freedom 27 | double tdist(double x, double v); 28 | 29 | // Gamma distribution utility functions 30 | // (required for the chi-sq distribution) 31 | // 32 | 33 | double erff (double x); // the error function 34 | double erffc(double x); // the complementary error function 35 | double erfcc(double x); // heuristic version of erffc 36 | double gammln ( double xx ); // return the value of ln ( gamma ( xx ) ) | xx > 0 37 | double gammp ( double a, double x); // return the incomplete gamma function P(a,x) 38 | double gammq ( double a, double x); // return the incomplete gamma function Q(a,x) = 1 - P(a,x) 39 | 40 | // Estimates P(a,x) by its series representation and gammln(a) 41 | void gser ( double * gamser, double a, double x, double * gln); 42 | // Estimates Q(a,x) by its continued fraction representation and gammln(a) 43 | void gcf ( double * gammcf, double a, double x, double * gln); 44 | 45 | // Beta distribution utility functions 46 | // 47 | double betai(double a, double b, double x); // Returns the incomplete 48 | // beta function Ix(a,b) 49 | double betacf(double a, double b, double x); // Evaluates continued fraction 50 | // for incomplete beta function 51 | // by modified Lentz's method 52 | 53 | // Rapid approximation to the sqrt for integers 54 | // 55 | 56 | int introot(int n); 57 | 58 | #endif 59 | 60 | -------------------------------------------------------------------------------- /king/PedigreeFamily.h: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////// 2 | // libsrc/PedigreeFamily.h 3 | // (c) 2000-2007 Goncalo Abecasis 4 | // 5 | // This file is distributed as part of the MERLIN source code package 6 | // and may not be redistributed in any form, without prior written 7 | // permission from the author. Permission is granted for you to 8 | // modify this file for your own personal use, but modified versions 9 | // must retain this copyright notice and must not be distributed. 10 | // 11 | // Permission is granted for you to use this file to compile MERLIN. 12 | // 13 | // All computer programs have bugs. Use this file at your own risk. 14 | // 15 | // Tuesday December 18, 2007 16 | // 17 | 18 | #ifndef __PEDFAMILY_H__ 19 | #define __PEDFAMILY_H__ 20 | 21 | #include "PedigreeAlleles.h" 22 | #include "PedigreePerson.h" 23 | #include "StringBasics.h" 24 | 25 | class Pedigree; 26 | 27 | class Family 28 | { 29 | public: 30 | Pedigree & ped; 31 | String famid; 32 | int serial; 33 | int first, last; // sentinel family members 34 | int count; // number of individuals in pedigree 35 | int founders; // number of founders in pedigree 36 | int nonFounders; // number of non-founders in pedigree 37 | int mzTwins; // number of MZ twins, excluding 1st twin in set 38 | int * path; // traverses the pedigree so that ancestors 39 | // preceed their descendants 40 | 41 | int generations; // Rough classification as: 42 | // 1 -- all individuals are unrelated 43 | // 2 -- two generations (inc. multiple couples) 44 | // 3 -- three or more generations 45 | 46 | bool isNuclear() 47 | { return (generations == 2) && (founders == 2); } 48 | 49 | Family(Pedigree & ped, int top, int bottom, int serial = 0); 50 | ~Family(); 51 | 52 | int ConnectedGroups(IntArray * groupMembership = NULL); 53 | 54 | private: 55 | void ShowInvalidCycles(); 56 | 57 | Family & operator = (Family & rhs); 58 | // void Mark(int who, int group, IntArray * stack, IntArray & group_id ); 59 | }; 60 | 61 | #endif 62 | 63 | 64 | -------------------------------------------------------------------------------- /king/PedigreeDescription.h: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////// 2 | // libsrc/PedigreeDescription.h 3 | // (c) 2000-2007 Goncalo Abecasis 4 | // 5 | // This file is distributed as part of the MERLIN source code package 6 | // and may not be redistributed in any form, without prior written 7 | // permission from the author. Permission is granted for you to 8 | // modify this file for your own personal use, but modified versions 9 | // must retain this copyright notice and must not be distributed. 10 | // 11 | // Permission is granted for you to use this file to compile MERLIN. 12 | // 13 | // All computer programs have bugs. Use this file at your own risk. 14 | // 15 | // Tuesday December 18, 2007 16 | // 17 | 18 | #ifndef __PEDDESCRIBE_H__ 19 | #define __PEDDESCRIBE_H__ 20 | 21 | #include "PedigreeGlobals.h" 22 | #include "PedigreePerson.h" 23 | #include "StringArray.h" 24 | #include "IntArray.h" 25 | 26 | #include 27 | 28 | // Possible pedigree columns 29 | #define pcSkip 0 30 | #define pcMarker 1 31 | #define pcTrait 2 32 | #define pcAffection 3 33 | #define pcCovariate 4 34 | #define pcZygosity 5 35 | #define pcEnd 6 36 | 37 | // Undocumented pedigree column types -- not recommended 38 | #define pcUndocumentedTraitCovariate 1001 39 | 40 | class PedigreeDescription : public PedigreeGlobals 41 | { 42 | public: 43 | int columnCount; 44 | IntArray columns, columnHash; 45 | 46 | PedigreeDescription(); 47 | ~PedigreeDescription(); 48 | 49 | void Load(IFILE & Input, bool warnIfLinkage = false); 50 | void Load(const char * filename, bool warnIfLinkage = false); 51 | 52 | void LoadLinkageDataFile(IFILE & input); 53 | void LoadLinkageDataFile(const char * filename); 54 | 55 | void LoadMendelDataFile(IFILE & input); 56 | void LoadMendelDataFile(const char * filename); 57 | 58 | void LoadMap(IFILE & Input); 59 | void LoadMap(const char * filename); 60 | 61 | PedigreeDescription & operator = (PedigreeDescription & rhs); 62 | 63 | int CountTextColumns(); 64 | 65 | // returns a string summarizing column contents 66 | const char * ColumnSummary(String & string); 67 | 68 | // Flag specifying Mendel format 69 | bool mendelFormat; 70 | 71 | String filename; 72 | 73 | private: 74 | int ReadLineHelper(IFILE & input, String & buffer, StringArray & tokens); 75 | 76 | int CountColumns(int type); 77 | void UpdateSummary(String & string, int type, const char * label); 78 | }; 79 | 80 | #endif 81 | 82 | 83 | -------------------------------------------------------------------------------- /king/VCGEE.h: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////// 2 | // GEE.h 3 | // Author: Wei-Min Chen 4 | // Oct 10, 2004 5 | 6 | #ifndef __VCGEE_H__ 7 | #define __VCGEE_H__ 8 | 9 | #include "Pedigree.h" 10 | #include "IntArray.h" 11 | #include "MathMatrix.h" 12 | #include "MathVector.h" 13 | #include "MathCholesky.h" 14 | 15 | class GEE{ 16 | protected: 17 | Vector delta; 18 | Vector delta2; 19 | Matrix W2, B[5]; 20 | 21 | virtual void GetPhi(int f); 22 | 23 | Vector SEvariances, SEvariances_R; 24 | 25 | Matrix DVD; 26 | Vector SEcoef, SEcoef_R; 27 | Matrix CovCoef; 28 | Matrix CovCoef_R; 29 | Cholesky chol; 30 | Matrix D; // D for GEE 31 | Matrix Omega; // variace-covariance matrix of trait 32 | Matrix OmegaInv; 33 | Matrix *OD; 34 | Matrix Phi; 35 | Matrix Delta; 36 | int parCount; // number of variance components 37 | int coefCount; // number of regression coefficients 38 | int size; // size of score 39 | 40 | inline int Index(int u, int v){ 41 | if(u==v) return u; 42 | else if(u 7 | 8 | class IBD 9 | { 10 | public: 11 | double p0, p1, p2; 12 | 13 | IBD() 14 | { p0 = p1 = p2 = 0.0; } 15 | IBD(double zero, double one, double two) 16 | { p0 = zero; p1 = one; p2 = two; } 17 | 18 | void defaultSib() 19 | { p0 = p2 = 0.25; p1 = 0.5; } 20 | 21 | void defaultSelf() 22 | { p0 = p1 = 0; p2 = 1.0; } 23 | 24 | void defaultUnrelated() 25 | { p0 = 1.0; p1 = p2 = 0.0; } 26 | 27 | void defaultFounderOffspring() 28 | { p0 = p2 = 0.0; p1 = 1.0; } 29 | 30 | double expected() 31 | { return 0.5 * p1 + p2; } 32 | 33 | bool isValid() 34 | { return (p0 + p1 + p2) == 1.0; } 35 | 36 | IBD & operator = (IBD & rhs) 37 | { p0 = rhs.p0; 38 | p1 = rhs.p1; 39 | p2 = rhs.p2; 40 | return (*this); } 41 | 42 | bool operator == (IBD & rhs); 43 | bool operator != (IBD & rhs); 44 | 45 | IBD * SimpleIBD(int marker, Person & p1, Person & p2); 46 | }; 47 | 48 | struct IBDKey 49 | { 50 | int serialLo; 51 | int serialHi; 52 | 53 | void SelectPair(Person & p1, Person & p2); 54 | }; 55 | 56 | struct IBDPair 57 | { 58 | int serialLo; 59 | int serialHi; 60 | IBD ibd; 61 | 62 | void Assign(IBDKey & key, IBD & i) 63 | { 64 | serialLo = key.serialLo; 65 | serialHi = key.serialHi; 66 | ibd = i; 67 | } 68 | }; 69 | 70 | class IBDList 71 | { 72 | public: 73 | IBDPair * list; 74 | int size, count; 75 | 76 | IBDList(); 77 | ~IBDList(); 78 | 79 | IBD * Lookup(Person & p1, Person & p2); 80 | void Append(Person & p1, Person & p2, IBD & ibd); 81 | void Sort(Pedigree & ped); 82 | bool IsRangeEmpty(int low, int high); 83 | 84 | private: 85 | void Grow(); 86 | }; 87 | 88 | class IBDTable 89 | { 90 | public: 91 | IBDList * markers; 92 | 93 | IBDTable(); 94 | ~IBDTable(); 95 | 96 | void Load(Pedigree & ped, FILE * f); 97 | void Load(Pedigree & ped, const char * filename); 98 | void Load(Pedigree & ped, const char * filename, Vector & LocusMap); 99 | 100 | IBD * Lookup(int marker, Person & p1, Person & p2); 101 | 102 | bool HaveFamily(int marker, Family * f); 103 | 104 | bool isEmpty() 105 | { return markers == NULL; } 106 | }; 107 | 108 | 109 | #endif 110 | -------------------------------------------------------------------------------- /king/MathSobol.cpp: -------------------------------------------------------------------------------- 1 | #include "MathSobol.h" 2 | #include "Random.h" 3 | #include "Error.h" 4 | 5 | #include "stdlib.h" 6 | 7 | int SobolSequence::poly_degrees[POLY_COUNT] = 8 | { 1, 2, 3, 3, 4, 4, 5, 5, 5, 5, 5, 5, 9 | 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 10 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 11 | }; 12 | 13 | int SobolSequence::poly_integers[POLY_COUNT] = 14 | { 0, 1, 1, 2, 1, 4, 2, 4, 7, 11, 13, 14, 15 | 1, 13, 16, 19, 22, 25, 1, 4, 7, 8, 14, 19, 16 | 21, 28, 31, 32, 37, 41, 42, 50, 55, 56, 59, 62 17 | }; 18 | 19 | SobolSequence::SobolSequence() 20 | { 21 | bits = NULL; 22 | } 23 | 24 | SobolSequence::~SobolSequence() 25 | { 26 | if (bits != NULL) delete [] bits; 27 | } 28 | 29 | void SobolSequence::Init(int dimensions) 30 | { 31 | if (dimensions > POLY_COUNT) 32 | numerror("Sobol sequences of > %d dimensions not supported", POLY_COUNT); 33 | 34 | x.Dimension(dim = dimensions); 35 | x.Set(0); 36 | bits = new IntArray[SOBOL_BITS]; 37 | 38 | for (int i = 0; i < SOBOL_BITS; i++) 39 | bits[i].Dimension(dim); 40 | 41 | unsigned long seed = 0; 42 | 43 | for (int k = 0; k < dim; k++) 44 | { 45 | int degrees = poly_degrees[k]; 46 | 47 | for (int j = 0; j < degrees; j++) 48 | // initialize the 0 to kth bit as random odd number <= 2^j - 1 49 | // and apply a left shift by SOBOL_BITS - j - 1 50 | { 51 | bits[j][k] = (RAND(seed) % (1 << j) * 2 | 1); 52 | bits[j][k] <<= (SOBOL_BITS - j - 1); 53 | } 54 | 55 | for (int j = degrees; j < SOBOL_BITS; j++) 56 | // Fill in the remaining values using recurrence 57 | { 58 | long poly = poly_integers[k]; 59 | 60 | long i = bits[j - degrees][k]; 61 | i ^= (i >> poly_degrees[k]); 62 | 63 | for (int l = j - degrees + 1; l < j; l++) 64 | { 65 | if (poly & 1) i ^= bits[l][k]; 66 | poly >>= 1; 67 | } 68 | 69 | bits[j][k] = i; 70 | } 71 | } 72 | counter = 0; 73 | } 74 | 75 | Vector & SobolSequence::Next(Vector & point) 76 | { 77 | long i = counter, bit; 78 | 79 | for (bit = 0; bit < SOBOL_BITS; bit++) 80 | { 81 | if (!(i & 1)) break; 82 | i >>= 1; 83 | } 84 | 85 | if (bit == SOBOL_BITS) numerror("SobolSequence is too short"); 86 | 87 | for (int k = 0; k < dim; k++) 88 | { 89 | x[k] ^= bits[bit][k]; 90 | point[k] = x[k] * SOBOL_FACTOR; 91 | } 92 | 93 | counter++; 94 | 95 | return point; 96 | } 97 | 98 | 99 | -------------------------------------------------------------------------------- /scripts/run-paste-genotype-local.cmd: -------------------------------------------------------------------------------- 1 | # out : log/paste-geno 2 | # list : INTERVAL : index/intervals/b38.intervals.X.10Mb.1Mb.txt 3 | # var : ROOT : .. 4 | # var : PREFIX : $INTERVAL$1$/merged.$INTERVAL$1$_$INTERVAL$4$_$INTERVAL$5$ 5 | # name : example-paste-genotype 6 | # target : out/genotypes/merged/$PREFIX$.genotypes.bcf out/genotypes/merged/$PREFIX$.genotypes.bcf.csi out/genotypes/minDP0/$PREFIX$.gtonly.minDP0.bcf out/genotypes/minDP0/$PREFIX$.gtonly.minDP0.bcf.csi out/genotypes/minDP10/$PREFIX$.gtonly.minDP10.bcf out/genotypes/minDP10/$PREFIX$.gtonly.minDP10.bcf.csi out/genotypes/hgdp/$PREFIX$.gtonly.minDP0.hgdp.bcf out/genotypes/hgdp/$PREFIX$.gtonly.minDP0.hgdp.bcf.csi 7 | mkdir -p out/genotypes/merged/$INTERVAL$1$/ 8 | mkdir -p out/genotypes/minDP0/$INTERVAL$1$/ 9 | mkdir -p out/genotypes/minDP10/$INTERVAL$1$/ 10 | mkdir -p out/genotypes/hgdp/$INTERVAL$1$/ 11 | cut -f 1,20 out/index/list.107.local.crams.vb_xy.index | tail -n +2 > out/genotypes/merged/$PREFIX$.sex_map.txt 12 | cat index/seq.batches.by.20.txt | xargs -I {} echo 'out/genotypes/batches/{}/b{}.$INTERVAL$1$_$INTERVAL$2$_$INTERVAL$3$.genotypes.bcf' > out/genotypes/merged/$PREFIX$.bcflist.txt 13 | $ROOT$/cramore/cramore vcf-paste-calls --vcf-list out/genotypes/merged/$PREFIX$.bcflist.txt --num-pc 0 --sex-map out/genotypes/merged/$PREFIX$.sex_map.txt --xLabel chrX --yLabel chrY --mtLabel chrM --xStart 2781479 --xStop 155701383 --skip-tmp-info --region $INTERVAL$1$:$INTERVAL$4$-$INTERVAL$5$ --out out/genotypes/merged/$PREFIX$.genotypes.bcf > out/genotypes/merged/$PREFIX$.genotypes.bcf.out 2> out/genotypes/merged/$PREFIX$.genotypes.bcf.err 14 | $ROOT$/bcftools/bcftools index -f out/genotypes/merged/$PREFIX$.genotypes.bcf 15 | $ROOT$/cramore/cramore vcf-squeeze --in out/genotypes/merged/$PREFIX$.genotypes.bcf --minDP 0 --out out/genotypes/minDP0/$PREFIX$.gtonly.minDP0.bcf > out/genotypes/minDP0/$PREFIX$.gtonly.minDP0.bcf.out 2> out/genotypes/minDP0/$PREFIX$.gtonly.minDP0.bcf.err 16 | $ROOT$/bcftools/bcftools index -f out/genotypes/minDP0/$PREFIX$.gtonly.minDP0.bcf 17 | $ROOT$/cramore/cramore vcf-squeeze --in out/genotypes/merged/$PREFIX$.genotypes.bcf --minDP 10 --out out/genotypes/minDP10/$PREFIX$.gtonly.minDP10.bcf > out/genotypes/minDP10/$PREFIX$.gtonly.minDP10.bcf.out 2> out/genotypes/minDP10/$PREFIX$.gtonly.minDP10.bcf.err 18 | $ROOT$/bcftools/bcftools index -f out/genotypes/minDP10/$PREFIX$.gtonly.minDP10.bcf 19 | $ROOT$/cramore/cramore vcf-extract --vcf out/genotypes/minDP0/$PREFIX$.gtonly.minDP0.bcf --site resources/ref/HGDP_938.hg38.sites.vcf.gz --out out/genotypes/hgdp/$PREFIX$.gtonly.minDP0.hgdp.bcf > out/genotypes/hgdp/$PREFIX$.gtonly.minDP0.hgdp.bcf.out 2> out/genotypes/hgdp/$PREFIX$.gtonly.minDP0.hgdp.bcf.err 20 | $ROOT$/bcftools/bcftools index -f out/genotypes/hgdp/$PREFIX$.gtonly.minDP0.hgdp.bcf 21 | -------------------------------------------------------------------------------- /king/PeelerNodes.cpp: -------------------------------------------------------------------------------- 1 | #include "PeelerNodes.h" 2 | 3 | Vector PeelerNode::scratch; 4 | 5 | PeelerNode::~PeelerNode() 6 | { 7 | } 8 | 9 | void PersonNode::PeelAncestors(MatingNode * mating, double (* trans)(int, int, int)) 10 | { 11 | for (int j = 0; j < states.Length(); j++) 12 | { 13 | double p = 0.0; 14 | 15 | for (int i = 0; i < mating->mstates.Length(); i++) 16 | p += trans(mating->father->states[mating->pstates[i]], 17 | mating->mother->states[mating->mstates[i]], 18 | states[j]) * mating->probabilities[i]; 19 | 20 | probabilities[j] *= p; 21 | } 22 | } 23 | 24 | void PersonNode::PeelDescendants(MatingNode * mating, double (* trans)(int, int, int)) 25 | { 26 | scratch.Dimension(states.Length()); 27 | scratch.Zero(); 28 | 29 | IntArray & index = person->sex == SEX_MALE ? mating->pstates : mating->mstates; 30 | 31 | for (int i = 0; i < index.Length(); i++) 32 | scratch[index[i]] += mating->probabilities[i]; 33 | 34 | for (int j = 0; j < states.Length(); j++) 35 | probabilities[j] *= scratch[j]; 36 | } 37 | 38 | void PersonNode::Clear() 39 | { 40 | states.Clear(); 41 | probabilities.Clear(); 42 | } 43 | 44 | void MatingNode::PeelFather() 45 | { 46 | for (int j = 0; j < pstates.Length(); j++) 47 | probabilities[j] *= father->probabilities[pstates[j]]; 48 | } 49 | 50 | void MatingNode::PeelMother() 51 | { 52 | for (int j = 0; j < mstates.Length(); j++) 53 | probabilities[j] *= mother->probabilities[mstates[j]]; 54 | } 55 | 56 | void MatingNode::PeelOffspring(PersonNode * child, double (*trans) (int, int, int)) 57 | { 58 | for (int i = pstates.Length() - 1; i >= 0; i--) 59 | { 60 | double p = 0.0; 61 | 62 | for (int j = 0; j < child->states.Length(); j++) 63 | p += trans(pstates[i], mstates[i], child->states[j]) * 64 | child->probabilities[j]; 65 | 66 | if (p > 0.0) 67 | probabilities[i] *= p; 68 | else 69 | mstates.Delete(i), 70 | pstates.Delete(i), 71 | probabilities.Delete(i); 72 | } 73 | } 74 | 75 | void MatingNode::Initialize(PersonNode * father, PersonNode * mother) 76 | { 77 | mstates.Dimension(father->states.Length() * mother->states.Length()); 78 | pstates.Dimension(mstates.Length()); 79 | 80 | probabilities.Dimension(mstates.Length()); 81 | probabilities.Set(1.0); 82 | 83 | for (int i = 0; i < father->states.Length(); i++) 84 | for (int j = 0; j < mother->states.Length(); j++) 85 | pstates[i] = father->states[i], 86 | mstates[j] = mother->states[j]; 87 | } 88 | 89 | 90 | -------------------------------------------------------------------------------- /libsvm/tools/easy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import os 5 | from subprocess import * 6 | 7 | if len(sys.argv) <= 1: 8 | print('Usage: {0} training_file [testing_file]'.format(sys.argv[0])) 9 | raise SystemExit 10 | 11 | # svm, grid, and gnuplot executable files 12 | 13 | is_win32 = (sys.platform == 'win32') 14 | if not is_win32: 15 | svmscale_exe = "../svm-scale" 16 | svmtrain_exe = "../svm-train" 17 | svmpredict_exe = "../svm-predict" 18 | grid_py = "./grid.py" 19 | gnuplot_exe = "/usr/bin/gnuplot" 20 | else: 21 | # example for windows 22 | svmscale_exe = r"..\windows\svm-scale.exe" 23 | svmtrain_exe = r"..\windows\svm-train.exe" 24 | svmpredict_exe = r"..\windows\svm-predict.exe" 25 | gnuplot_exe = r"c:\tmp\gnuplot\bin\pgnuplot.exe" 26 | grid_py = r".\grid.py" 27 | 28 | assert os.path.exists(svmscale_exe),"svm-scale executable not found" 29 | assert os.path.exists(svmtrain_exe),"svm-train executable not found" 30 | assert os.path.exists(svmpredict_exe),"svm-predict executable not found" 31 | assert os.path.exists(gnuplot_exe),"gnuplot executable not found" 32 | assert os.path.exists(grid_py),"grid.py not found" 33 | 34 | train_pathname = sys.argv[1] 35 | assert os.path.exists(train_pathname),"training file not found" 36 | file_name = os.path.split(train_pathname)[1] 37 | scaled_file = file_name + ".scale" 38 | model_file = file_name + ".model" 39 | range_file = file_name + ".range" 40 | 41 | if len(sys.argv) > 2: 42 | test_pathname = sys.argv[2] 43 | file_name = os.path.split(test_pathname)[1] 44 | assert os.path.exists(test_pathname),"testing file not found" 45 | scaled_test_file = file_name + ".scale" 46 | predict_test_file = file_name + ".predict" 47 | 48 | cmd = '{0} -s "{1}" "{2}" > "{3}"'.format(svmscale_exe, range_file, train_pathname, scaled_file) 49 | print('Scaling training data...') 50 | Popen(cmd, shell = True, stdout = PIPE).communicate() 51 | 52 | cmd = '{0} -svmtrain "{1}" -gnuplot "{2}" "{3}"'.format(grid_py, svmtrain_exe, gnuplot_exe, scaled_file) 53 | print('Cross validation...') 54 | f = Popen(cmd, shell = True, stdout = PIPE).stdout 55 | 56 | line = '' 57 | while True: 58 | last_line = line 59 | line = f.readline() 60 | if not line: break 61 | c,g,rate = map(float,last_line.split()) 62 | 63 | print('Best c={0}, g={1} CV rate={2}'.format(c,g,rate)) 64 | 65 | cmd = '{0} -c {1} -g {2} "{3}" "{4}"'.format(svmtrain_exe,c,g,scaled_file,model_file) 66 | print('Training...') 67 | Popen(cmd, shell = True, stdout = PIPE).communicate() 68 | 69 | print('Output model: {0}'.format(model_file)) 70 | if len(sys.argv) > 2: 71 | cmd = '{0} -r "{1}" "{2}" > "{3}"'.format(svmscale_exe, range_file, test_pathname, scaled_test_file) 72 | print('Scaling testing data...') 73 | Popen(cmd, shell = True, stdout = PIPE).communicate() 74 | 75 | cmd = '{0} "{1}" "{2}" "{3}"'.format(svmpredict_exe, scaled_test_file, model_file, predict_test_file) 76 | print('Testing...') 77 | Popen(cmd, shell = True).communicate() 78 | 79 | print('Output prediction: {0}'.format(predict_test_file)) 80 | -------------------------------------------------------------------------------- /libsvm/tools/checkdata.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # 4 | # A format checker for LIBSVM 5 | # 6 | 7 | # 8 | # Copyright (c) 2007, Rong-En Fan 9 | # 10 | # All rights reserved. 11 | # 12 | # This program is distributed under the same license of the LIBSVM package. 13 | # 14 | 15 | from sys import argv, exit 16 | import os.path 17 | 18 | def err(line_no, msg): 19 | print("line {0}: {1}".format(line_no, msg)) 20 | 21 | # works like float() but does not accept nan and inf 22 | def my_float(x): 23 | if x.lower().find("nan") != -1 or x.lower().find("inf") != -1: 24 | raise ValueError 25 | 26 | return float(x) 27 | 28 | def main(): 29 | if len(argv) != 2: 30 | print("Usage: {0} dataset".format(argv[0])) 31 | exit(1) 32 | 33 | dataset = argv[1] 34 | 35 | if not os.path.exists(dataset): 36 | print("dataset {0} not found".format(dataset)) 37 | exit(1) 38 | 39 | line_no = 1 40 | error_line_count = 0 41 | for line in open(dataset, 'r'): 42 | line_error = False 43 | 44 | # each line must end with a newline character 45 | if line[-1] != '\n': 46 | err(line_no, "missing a newline character in the end") 47 | line_error = True 48 | 49 | nodes = line.split() 50 | 51 | # check label 52 | try: 53 | label = nodes.pop(0) 54 | 55 | if label.find(',') != -1: 56 | # multi-label format 57 | try: 58 | for l in label.split(','): 59 | l = my_float(l) 60 | except: 61 | err(line_no, "label {0} is not a valid multi-label form".format(label)) 62 | line_error = True 63 | else: 64 | try: 65 | label = my_float(label) 66 | except: 67 | err(line_no, "label {0} is not a number".format(label)) 68 | line_error = True 69 | except: 70 | err(line_no, "missing label, perhaps an empty line?") 71 | line_error = True 72 | 73 | # check features 74 | prev_index = -1 75 | for i in range(len(nodes)): 76 | try: 77 | (index, value) = nodes[i].split(':') 78 | 79 | index = int(index) 80 | value = my_float(value) 81 | 82 | # precomputed kernel's index starts from 0 and LIBSVM 83 | # checks it. Hence, don't treat index 0 as an error. 84 | if index < 0: 85 | err(line_no, "feature index must be positive; wrong feature {0}".format(nodes[i])) 86 | line_error = True 87 | elif index < prev_index: 88 | err(line_no, "feature indices must be in an ascending order, previous/current features {0} {1}".format(nodes[i-1], nodes[i])) 89 | line_error = True 90 | prev_index = index 91 | except: 92 | err(line_no, "feature '{0}' not an : pair, integer, real number ".format(nodes[i])) 93 | line_error = True 94 | 95 | line_no += 1 96 | 97 | if line_error: 98 | error_line_count += 1 99 | 100 | if error_line_count > 0: 101 | print("Found {0} lines with error.".format(error_line_count)) 102 | return 1 103 | else: 104 | print("No error.") 105 | return 0 106 | 107 | if __name__ == "__main__": 108 | exit(main()) 109 | -------------------------------------------------------------------------------- /king/TraitTransformations.cpp: -------------------------------------------------------------------------------- 1 | #include "TraitTransformations.h" 2 | #include "QuickIndex.h" 3 | #include "MathStats.h" 4 | 5 | void InverseNormalTransform(Pedigree & ped) 6 | { 7 | Vector phenotypes; 8 | IntArray individuals; 9 | QuickIndex index; 10 | 11 | phenotypes.Dimension(ped.count); 12 | individuals.Dimension(ped.count); 13 | 14 | for (int trait = 0; trait < ped.traitCount; trait++) 15 | { 16 | phenotypes.Dimension(0); 17 | individuals.Dimension(0); 18 | 19 | for (int i = 0; i < ped.count; i++) 20 | if (ped[i].traits[trait] != _NAN_) 21 | { 22 | phenotypes.Push(ped[i].traits[trait]); 23 | individuals.Push(i); 24 | } 25 | 26 | int count = individuals.Length(); 27 | 28 | if (count == 0) continue; 29 | 30 | index.Index(phenotypes); 31 | 32 | double scale = 1.0 / count; 33 | 34 | for (int i = 0, j; i < index.Length(); i++) 35 | { 36 | for (j = i; j + 1 < index.Length(); j++) 37 | if (ped[individuals[index[i]]].traits[trait] != 38 | ped[individuals[index[j]]].traits[trait] ) 39 | break; 40 | 41 | if (ped[individuals[index[i]]].traits[trait] != 42 | ped[individuals[index[j]]].traits[trait] ) 43 | j--; 44 | 45 | double z = ninv(((i + j) * 0.5 + 0.5) * scale); 46 | 47 | for (int k = i; k <= j; k++) 48 | ped[individuals[index[k]]].traits[trait] = z; 49 | 50 | i = j; 51 | } 52 | } 53 | } 54 | 55 | void InverseNormalTransform(Pedigree & ped, int trait) 56 | { 57 | Vector phenotypes; 58 | IntArray individuals; 59 | QuickIndex index; 60 | 61 | phenotypes.Dimension(ped.count); 62 | phenotypes.Dimension(0); 63 | 64 | individuals.Dimension(ped.count); 65 | individuals.Dimension(0); 66 | 67 | for (int i = 0; i < ped.count; i++) 68 | if (ped[i].traits[trait] != _NAN_) 69 | { 70 | phenotypes.Push(ped[i].traits[trait]); 71 | individuals.Push(i); 72 | } 73 | 74 | int count = individuals.Length(); 75 | 76 | if (count == 0) return; 77 | 78 | index.Index(phenotypes); 79 | 80 | double scale = 1.0 / count; 81 | 82 | for (int i = 0, j; i < index.Length(); i++) 83 | { 84 | for (j = i; j + 1 < index.Length(); j++) 85 | if (ped[individuals[index[i]]].traits[trait] != 86 | ped[individuals[index[j]]].traits[trait] ) 87 | break; 88 | 89 | if (ped[individuals[index[i]]].traits[trait] != 90 | ped[individuals[index[j]]].traits[trait] ) 91 | j--; 92 | 93 | double z = ninv(((i + j) * 0.5 + 0.5) * scale); 94 | 95 | for (int k = i; k <= j; k++) 96 | ped[individuals[index[k]]].traits[trait] = z; 97 | 98 | i = j; 99 | } 100 | } 101 | 102 | 103 | 104 | -------------------------------------------------------------------------------- /king/LongArray.cpp: -------------------------------------------------------------------------------- 1 | #include "LongArray.h" 2 | #include "Hash.h" 3 | #include "Sort.h" 4 | 5 | #include 6 | 7 | int LongArray::alloc = 4; 8 | 9 | LongArray::LongArray(int start_size) 10 | { 11 | count = start_size; 12 | size = (count + alloc) / alloc * alloc; 13 | items = new longint [size]; 14 | } 15 | 16 | LongArray::LongArray(LongArray & source) 17 | { 18 | count = source.count; 19 | size = source.size; 20 | items = new longint [size]; 21 | 22 | for (int i = 0; i < count; i++) 23 | items[i] = source.items[i]; 24 | } 25 | 26 | LongArray::~LongArray() 27 | { 28 | delete [] items; 29 | } 30 | 31 | void LongArray::Grow(int new_size) 32 | { 33 | if (new_size > size) 34 | { 35 | if ((new_size >> 1) >= size) 36 | size = (new_size + alloc) / alloc * alloc; 37 | else 38 | { 39 | size = alloc; 40 | while (size <= new_size) 41 | size *= 2; 42 | } 43 | 44 | longint * new_items = new longint [size]; 45 | for (int i = 0; i < count; i++) 46 | new_items[i] = items[i]; 47 | delete [] items; 48 | items = new_items; 49 | } 50 | } 51 | 52 | int LongArray::Append(longint value) 53 | { 54 | Grow(count + 1); 55 | items[count++] = value; 56 | return count; 57 | } 58 | 59 | void LongArray::Set(longint value) 60 | { 61 | for (int i = 0; i < count; i++) 62 | items[i] = value; 63 | } 64 | 65 | int LongArray::Delete(int index) 66 | { 67 | count--; 68 | if (count - index) 69 | memmove(items + index, items + index + 1, sizeof(longint) * (count - index)); 70 | return count; 71 | } 72 | 73 | void LongArray::InsertAt(int index, longint value) 74 | { 75 | Grow(count + 1); 76 | memmove(items + index + 1, items + index, sizeof(longint) * (count - index)); 77 | items[index] = value; 78 | count++; 79 | } 80 | 81 | LongArray & LongArray::operator = (const LongArray & rhs) 82 | { 83 | Grow(rhs.count); 84 | count = rhs.count; 85 | for (int i = 0; i < count; i++) 86 | items[i] = rhs.items[i]; 87 | return *this; 88 | } 89 | 90 | int LongArray::Find(longint value) const 91 | { 92 | for (int i = 0; i < count; i++) 93 | if (value == items[i]) 94 | return i; 95 | return -1; 96 | } 97 | 98 | void LongArray::Zero() 99 | { 100 | for (int i = 0; i < count; i++) 101 | items[i] = 0; 102 | } 103 | 104 | void LongArray::Reverse() 105 | { 106 | for (int i = 0, j = count - 1; i < j; i++, j--) 107 | Swap(i, j); 108 | } 109 | 110 | bool LongArray::operator == (const LongArray & rhs) const 111 | { 112 | if (count != rhs.count) 113 | return false; 114 | 115 | for (int i = 0; i < rhs.count; i++) 116 | if (items[i] != rhs.items[i]) 117 | return false; 118 | 119 | return true; 120 | } 121 | 122 | bool LongArray::operator != (const LongArray & rhs) const 123 | { 124 | return !(*this == rhs); 125 | } 126 | 127 | int LongArray::Hash(int initval) 128 | { 129 | return hash((unsigned char *) items, sizeof(longint) * count, initval); 130 | } 131 | -------------------------------------------------------------------------------- /king/FortranFormat.h: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////// 2 | // libsrc/FortranFormat.h 3 | // (c) 2000-2007 Goncalo Abecasis 4 | // 5 | // This file is distributed as part of the MERLIN source code package 6 | // and may not be redistributed in any form, without prior written 7 | // permission from the author. Permission is granted for you to 8 | // modify this file for your own personal use, but modified versions 9 | // must retain this copyright notice and must not be distributed. 10 | // 11 | // Permission is granted for you to use this file to compile MERLIN. 12 | // 13 | // All computer programs have bugs. Use this file at your own risk. 14 | // 15 | // Tuesday December 18, 2007 16 | // 17 | 18 | #ifndef __FORTRAN_FORMAT__ 19 | #define __FORTRAN_FORMAT__ 20 | 21 | #include "StringBasics.h" 22 | #include "IntArray.h" 23 | 24 | class FortranFormat 25 | { 26 | public: 27 | // This class reads a user specified input file, one line at a time, 28 | // and returns individual fields according to a user specified format 29 | // statement 30 | FortranFormat(); 31 | 32 | // Set the fortran format statement 33 | void SetFormat(const String & formatString); 34 | 35 | // Set the input file 36 | void SetInputFile(IFILE & file); 37 | 38 | // Read one field from input file 39 | void GetNextField(String & field); 40 | int GetNextInteger(); 41 | char GetNextCharacter(); 42 | 43 | // Process a token in format statement and return true 44 | // if token corresponds to input field. Return false if 45 | // token led to processing of white-space or input line 46 | // positioning 47 | bool ProcessToken(String & field); 48 | 49 | // Flush the pattern -- this finishes processing the current 50 | // pattern and ensures that all trailing new-lines, etc. are 51 | // handled correctly 52 | void Flush(); 53 | 54 | private: 55 | // The input line and current position along it 56 | String inputLine; 57 | int inputPos; 58 | 59 | // The Fortran format statement and current position along it 60 | String format; 61 | int formatPos; 62 | 63 | // The position of the pattern we are repeating, if any 64 | int repeatCount; 65 | 66 | // Returns an integer from the current format statement, if any 67 | int GetIntegerFromFormat(); 68 | 69 | // These functions check the next character in format string 70 | bool DigitFollows(); 71 | bool CharacterFollows(); 72 | 73 | // This function finish the input field 74 | void FinishField(bool haveSlash = false); 75 | 76 | // Reject width were appropriate 77 | void RejectWidth(char type); 78 | 79 | // The input file 80 | IFILE input; 81 | 82 | // Stacks to keep track of nested parenthesis 83 | IntArray bracketStack; 84 | IntArray bracketCount; 85 | IntArray bracketCounter; 86 | 87 | int lastBracket; 88 | int lastCount; 89 | 90 | // Buffer for reading fields 91 | String buffer; 92 | 93 | // Flag that indicates whether we have reached end-of-pattern 94 | bool endOfPattern; 95 | }; 96 | 97 | #endif 98 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /king/PedigreePerson.h: -------------------------------------------------------------------------------- 1 | #ifndef __PEDPERSON_H__ 2 | #define __PEDPERSON_H__ 3 | 4 | #include "Constant.h" 5 | #include "PedigreeAlleles.h" 6 | #include "PedigreeGlobals.h" 7 | #include "StringArray.h" 8 | #include "IntArray.h" 9 | 10 | #define SEX_MALE 1 11 | #define SEX_FEMALE 2 12 | #define SEX_UNKNOWN 0 13 | 14 | class Person : public PedigreeGlobals 15 | { 16 | public: 17 | String famid; 18 | String pid; 19 | String motid; 20 | String fatid; 21 | int sex; 22 | int zygosity; 23 | int serial, traverse; 24 | 25 | Alleles * markers; 26 | // double * traits; 27 | Vector traits; 28 | char * affections; 29 | // double * covariates; 30 | Vector covariates; 31 | 32 | Person * father; 33 | Person * mother; 34 | 35 | int sibCount; 36 | Person ** sibs; 37 | 38 | int ngeno; 39 | 40 | bool filter; 41 | 42 | Person(); 43 | ~Person(); 44 | 45 | bool isHalfSib(Person & sib) 46 | { 47 | return hasBothParents && 48 | ((sib.father == father) ^ (sib.mother == mother)); 49 | } 50 | 51 | bool isSib(Person & sib) 52 | { 53 | return hasBothParents && 54 | (sib.father == father) && (sib.mother == mother); 55 | } 56 | 57 | bool isTwin(Person & twin) 58 | { 59 | return (zygosity != 0) && (zygosity == twin.zygosity) && isSib(twin); 60 | } 61 | 62 | bool isMzTwin(Person & mzTwin) 63 | { 64 | return (zygosity & 1) && (zygosity == mzTwin.zygosity) && isSib(mzTwin); 65 | } 66 | 67 | // Check that both parents or none are available 68 | // Verify that fathers are male and mothers are female 69 | bool CheckParents(); 70 | 71 | // Assess status before using quick diagnostics functions 72 | void AssessStatus(); 73 | 74 | // Quick diagnostics 75 | bool isFounder() 76 | { return !hasBothParents; } 77 | bool isSexed() 78 | { return sex != 0; } 79 | bool isGenotyped(int m) 80 | { return markers[m].isKnown(); } 81 | bool isFullyGenotyped() 82 | { return ngeno == markerCount; } 83 | bool isControlled(int c) 84 | { return covariates[c] != _NAN_; } 85 | bool isFullyControlled() 86 | { return hasAllCovariates; } 87 | bool isPhenotyped(int t) 88 | { return traits[t] != _NAN_; } 89 | bool isFullyPhenotyped() 90 | { return hasAllTraits; } 91 | bool isDiagnosed(int a) 92 | { return affections[a] != 0; } 93 | bool isFullyDiagnosed() 94 | { return hasAllAffections; } 95 | bool haveData(); 96 | bool isAncestor(Person * descendant); 97 | 98 | int GenotypedMarkers(); 99 | 100 | static void Order(Person * & p1, Person * & p2); 101 | 102 | void Copy(Person & rhs); 103 | void CopyIDs(Person & rhs); 104 | void CopyPhenotypes(Person & rhs); 105 | void WipePhenotypes(bool remove_genotypes = true); 106 | 107 | private: 108 | 109 | bool hasAllCovariates, hasAllTraits, 110 | hasAllAffections, hasBothParents; 111 | }; 112 | 113 | #endif 114 | 115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /singularity.def: -------------------------------------------------------------------------------- 1 | Bootstrap: library 2 | From: ubuntu:16.04 3 | 4 | %files 5 | .git /topmed_variant_calling/.git 6 | .gitmodules /topmed_variant_calling/.gitmodules 7 | apigenome /topmed_variant_calling/apigenome 8 | bamUtil /topmed_variant_calling/bamUtil 9 | bcftools /topmed_variant_calling/bcftools 10 | cramore /topmed_variant_calling/cramore 11 | htslib /topmed_variant_calling/htslib 12 | invNorm /topmed_variant_calling/invNorm 13 | king /topmed_variant_calling/king 14 | libStatGen /topmed_variant_calling/libStatGen 15 | libsvm /topmed_variant_calling/libsvm 16 | samtools /topmed_variant_calling/samtools 17 | scripts /topmed_variant_calling/scripts 18 | vt-topmed /topmed_variant_calling/vt-topmed 19 | 20 | %environment 21 | export LC_ALL=C 22 | 23 | %post 24 | set -eu 25 | 26 | apt-get update && apt-get install -y \ 27 | apt-utils \ 28 | automake \ 29 | autoconf \ 30 | build-essential \ 31 | cmake \ 32 | default-jre \ 33 | default-jdk \ 34 | gdb \ 35 | git \ 36 | ghostscript \ 37 | gnuplot \ 38 | groff \ 39 | libcurl4-gnutls-dev \ 40 | liblzma-dev \ 41 | libncurses5-dev \ 42 | libssl-dev \ 43 | libzstd-dev \ 44 | python3 \ 45 | r-base \ 46 | wget \ 47 | zlib1g-dev 48 | 49 | cd /topmed_variant_calling 50 | 51 | git submodule init 52 | git submodule update 53 | 54 | git clone https://github.com/samtools/htslib htslib-1.13 55 | cd htslib-1.13 56 | git checkout 1.13 57 | git submodule update --init --recursive 58 | autoreconf -i 59 | ./configure --disable-libcurl 60 | make 61 | cd .. 62 | 63 | mkdir plink 64 | cd plink/ 65 | wget http://s3.amazonaws.com/plink1-assets/plink_linux_x86_64_20190617.zip 66 | unzip plink_linux_x86_64_20190617.zip 67 | rm plink_linux_x86_64_20190617.zip 68 | install -T plink /usr/local/bin/plink-1.9 69 | cd .. 70 | 71 | wget https://sourceforge.net/projects/snpeff/files/snpEff_v4_3t_core.zip/download 72 | unzip download 73 | rm download 74 | rm -r clinEff/ 75 | 76 | cd libsvm/ 77 | make clean 78 | make 79 | cd .. 80 | 81 | cd apigenome 82 | git clean -fdx 83 | autoreconf -vfi 84 | ./configure --prefix $PWD 85 | make 86 | make install 87 | cd .. 88 | 89 | cd libStatGen 90 | git clean -fdx 91 | make 92 | cd .. 93 | 94 | cd bamUtil 95 | git clean -fdx 96 | make 97 | cd .. 98 | 99 | cd invNorm 100 | git clean -fdx 101 | make 102 | cd .. 103 | 104 | cd htslib 105 | git clean -fdx 106 | autoheader 107 | autoconf 108 | ./configure --disable-libcurl 109 | make 110 | install bgzip /usr/local/bin 111 | install tabix /usr/local/bin 112 | cd .. 113 | 114 | cd vt-topmed 115 | git clean -fdx 116 | make 117 | cd .. 118 | 119 | cd cramore 120 | git clean -fdx 121 | mkdir build 122 | cd build 123 | cmake -DCMAKE_BUILD_TYPE=Release -DHTS_INCLUDE_DIRS=/topmed_variant_calling/htslib-1.13 -DHTS_LIBRARIES=/topmed_variant_calling/htslib-1.13/libhts.a .. 124 | make 125 | cd ../.. 126 | 127 | cd samtools 128 | git clean -fdx 129 | autoheader 130 | autoconf -Wno-syntax 131 | ./configure 132 | make 133 | make install 134 | cd .. 135 | 136 | cd bcftools 137 | git clean -fdx 138 | make 139 | make install 140 | cd .. 141 | 142 | cd king 143 | rm -f king *.o 144 | g++ -O3 -fopenmp -o king *.cpp -lm -lz 145 | cd .. 146 | 147 | -------------------------------------------------------------------------------- /scripts/e05-whitelist-gwas-variants.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | use strict; 4 | 5 | my $chr = $ARGV[0]; 6 | 7 | my %hw = (); 8 | my $nw = 0; 9 | open(IN,"resources/gwascatalog.20200204.uniq.rsid.entries.tsv") || die "Cannot open file\n"; 10 | while() { 11 | my ($build,$rsno,@F) = split; 12 | next unless ( $F[0] eq "$chr" ); 13 | my @alts = split(/,/,$F[4]); 14 | next unless ( $F[7] =~ /;CAF=/ ); 15 | my @cafs = split(/,/,$1) if ( $F[7] =~ /;CAF=([^;]+);/ ); 16 | my ($maxaf,$imax) = (0,0); 17 | for(my $i=1; $i < @cafs; ++$i) { 18 | if ( ( $cafs[$i] ne "." ) && ( $cafs[$i] > $maxaf ) ) { 19 | $imax = $i; 20 | $maxaf = $cafs[$i]; 21 | } 22 | } 23 | next if ( $imax == 0 ); 24 | $hw{"$F[1]:$F[3]:$alts[$imax-1]"} = 1; 25 | ++$nw; 26 | } 27 | close IN; 28 | 29 | print STDERR "Finished loading $nw variants to be whitelisted\n"; 30 | 31 | if ($nw == 0) { 32 | die("Error: no variants to be whitelisted\n"); 33 | } 34 | 35 | my $vcf = $ARGV[1]; #"release/sites/nowhite/freeze9.merged.chr$chr.filtered.anno.sites.vcf.gz"; 36 | my $outprefix = $ARGV[2]; #"release/sites/freeze9.merged.chr$chr.filtered.anno.gwas.sites"; 37 | my $vcfsummary2 = "$ENV{'EXE_PREFIX'}/apigenome/bin/vcf-summary-v2"; 38 | my $ref = "resources/ref/hs38DH.fa"; 39 | my $dbsnp = "resources/ref/dbsnp_142.b38.vcf.gz"; 40 | my @posVcfs = qw(resources/ref/hapmap_3.3.b38.sites.vcf.gz resources/ref/1000G_omni2.5.b38.sites.PASS.vcf.gz); 41 | 42 | open(VCF,"zcat $vcf |") || die "Cannot open file\n"; 43 | open(OUT1," | bgzip -c > $outprefix.vcf.gz") || die "Cannot open file\n"; 44 | open(OUT2, "| $vcfsummary2 --ref $ref --db $dbsnp --FNRvcf $posVcfs[0] --chr $chr --tabix tabix --bgzip bgzip > $outprefix.summary_v2") || die "Cannot open file\n"; 45 | 46 | my ($ngwasPass,$ngwasKeep,$ngwasSwitch) = (0,0,0); 47 | while() { 48 | if ( /^#/ ) { 49 | next if ( /^INFO=\n"; 57 | } 58 | } 59 | else { 60 | my @F = split(/[\t\r\n]/); 61 | $F[7] =~ s/;AF=.*;HWEAF_P=[^;]+;/;/; 62 | $F[7] =~ s/;AVG_IF=[^;]+;/;/; 63 | my $key = "$F[1]:$F[3]:$F[4]"; 64 | 65 | if ( defined($hw{$key}) ) { 66 | my $oldFilt = $F[6]; 67 | $oldFilt =~ s/;/,/g; ## oldFilt contains the old filters 68 | my $newFilt = "PASS"; ## newFilt should be PASS in most cases 69 | if ( $oldFilt eq "PASS" ) { ++$ngwasPass; } ## if already PASS, that is fine still pass 70 | elsif ( ( $oldFilt =~ /SVM/ ) || ( $oldFilt =~ /CEN/ ) || ( $oldFilt =~ /EXHET/ ) || ( $oldFilt =~ /DISC/ ) || ( $oldFilt =~ /CHRXHET/ ) ) { ## if failed by existing filters 71 | ++$ngwasKeep; ## keep the current filter 72 | $newFilt = $F[6]; 73 | } 74 | else { ## must be only MIS2,DUP2,TRI2 75 | ++$ngwasSwitch; 76 | } 77 | $F[7] .= ";GWAS=$oldFilt"; 78 | $F[6] = $newFilt; #"PASS"; 79 | print STDERR "$F[0]:$F[1]:$F[3]:$F[4] $ngwasPass $ngwasKeep $ngwasSwitch\n" if ( rand() < 0.01 ); 80 | } 81 | print OUT1 join("\t",@F)."\n"; 82 | print OUT2 join("\t",@F)."\n"; 83 | } 84 | } 85 | close OUT1; 86 | close OUT2; 87 | close VCF; 88 | 89 | print STDERR "Finished $ngwasPass $ngwasKeep $ngwasSwitch\n"; 90 | 91 | print `tabix -f -pvcf $outprefix.vcf.gz`; 92 | -------------------------------------------------------------------------------- /king/MiniDeflate.h: -------------------------------------------------------------------------------- 1 | #ifndef __MINIDEFLATE_H__ 2 | #define __MINIDEFLATE_H__ 3 | 4 | #include 5 | 6 | // MiniDeflate reads and writes files in a simple Deflate like format 7 | // A quick overview of this format follows, at the bottom of this file 8 | // 9 | 10 | // Performance tuning constants 11 | // 12 | 13 | // Hash table size is HASH_SIZE (a prime) 14 | #define HASH_SIZE 4093 15 | // Hash table depth is HASH_DEPTH (a power of 2) 16 | #define HASH_DEPTH 8 17 | // Matches that are not at least OKAY_MATCH chars are added to hash table 18 | #define OKAY_MATCH 32 19 | // Buffer size for FILE I/O 20 | #define BUFFER_SIZE (32 * 1024) 21 | 22 | class MiniDeflate 23 | { 24 | public: 25 | MiniDeflate(); 26 | ~MiniDeflate(); 27 | 28 | void Deflate(FILE * output, void * input, size_t bytes); 29 | void Inflate(FILE * input, void * ouput, size_t bytes); 30 | 31 | private: 32 | unsigned char * buffer; 33 | unsigned char * hash_keys; 34 | unsigned char ** hash_values; 35 | 36 | // Inline functions used during file compression 37 | inline void EvaluateMatch(unsigned char * in, int len, int hash, 38 | unsigned char * & best_pos, int & best_match); 39 | inline void QuoteLiterals(unsigned char * & in, int literal, 40 | unsigned char * & out, int & buffer_len, 41 | FILE * output); 42 | inline void OutputLiterals(unsigned char * & in, int literal, 43 | unsigned char * & out, int & buffer_len, 44 | FILE * output); 45 | inline void CiteLiteral(unsigned char * & out, int literal, 46 | unsigned char * & in, int & buffer_len, 47 | FILE * input); 48 | }; 49 | 50 | // Format specification for deflate files 51 | // 52 | // A compressed file is a sequence of bytes {0 .. N}. 53 | // Each byte is a sequence of bits [0 .. 7] with 0 as the Most Significant Bit. 54 | // 55 | // The following tokens are recognized: 56 | // 57 | // Literal quotes -- refer to unique strings 58 | // 59 | // BYTE0 BYTE1 BYTE2 Description 60 | // 0 HI LO Quote of 31 bytes of more 61 | // Followed by (HI << 8 + LO + 31) quoted chars 62 | // 0:4|LEN Quote of up to 1-15 bytes 63 | // Followed by LEN quoted chars 64 | // 65 | // String matches -- refer to previous strings in the input stream 66 | // 67 | // BYTE0 BYTE1 BYTE2 BYTE3 BYTE4 Description 68 | // 1:4|OFF OFF1 OFF2:2|0 HI LO Long match of > 66 bytes 69 | // Offset of OFF|OFF1|OFF2 + 1 70 | // Length of HI|LO + 66 71 | // 1:4|OFF OFF1 OFF2:2|LEN Distant match of < 66 bytes 72 | // Offset of OFF|OFF1|OFF2 + 1 73 | // Length of LEN + 2 74 | // LEN|OFF OFF1 Nearby short match 75 | // Offset OFF|OFF1 + 1 76 | // Length LEN 77 | // 78 | 79 | // NOTE: When partitioning bytes, I use the notation X:n|Y so that 80 | // X takes the n MSB bits of byte and Y takes the remaining bits. 81 | 82 | 83 | #endif 84 | 85 | 86 | -------------------------------------------------------------------------------- /libsvm/svm.h: -------------------------------------------------------------------------------- 1 | #ifndef _LIBSVM_H 2 | #define _LIBSVM_H 3 | 4 | #define LIBSVM_VERSION 310 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | extern int libsvm_version; 11 | 12 | struct svm_node 13 | { 14 | int index; 15 | double value; 16 | }; 17 | 18 | struct svm_problem 19 | { 20 | int l; 21 | double *y; 22 | struct svm_node **x; 23 | }; 24 | 25 | enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR }; /* svm_type */ 26 | enum { LINEAR, POLY, RBF, SIGMOID, PRECOMPUTED }; /* kernel_type */ 27 | 28 | struct svm_parameter 29 | { 30 | int svm_type; 31 | int kernel_type; 32 | int degree; /* for poly */ 33 | double gamma; /* for poly/rbf/sigmoid */ 34 | double coef0; /* for poly/sigmoid */ 35 | 36 | /* these are for training only */ 37 | double cache_size; /* in MB */ 38 | double eps; /* stopping criteria */ 39 | double C; /* for C_SVC, EPSILON_SVR and NU_SVR */ 40 | int nr_weight; /* for C_SVC */ 41 | int *weight_label; /* for C_SVC */ 42 | double* weight; /* for C_SVC */ 43 | double nu; /* for NU_SVC, ONE_CLASS, and NU_SVR */ 44 | double p; /* for EPSILON_SVR */ 45 | int shrinking; /* use the shrinking heuristics */ 46 | int probability; /* do probability estimates */ 47 | }; 48 | 49 | // 50 | // svm_model 51 | // 52 | struct svm_model 53 | { 54 | struct svm_parameter param; /* parameter */ 55 | int nr_class; /* number of classes, = 2 in regression/one class svm */ 56 | int l; /* total #SV */ 57 | struct svm_node **SV; /* SVs (SV[l]) */ 58 | double **sv_coef; /* coefficients for SVs in decision functions (sv_coef[k-1][l]) */ 59 | double *rho; /* constants in decision functions (rho[k*(k-1)/2]) */ 60 | double *probA; /* pariwise probability information */ 61 | double *probB; 62 | 63 | /* for classification only */ 64 | 65 | int *label; /* label of each class (label[k]) */ 66 | int *nSV; /* number of SVs for each class (nSV[k]) */ 67 | /* nSV[0] + nSV[1] + ... + nSV[k-1] = l */ 68 | /* XXX */ 69 | int free_sv; /* 1 if svm_model is created by svm_load_model*/ 70 | /* 0 if svm_model is created by svm_train */ 71 | }; 72 | 73 | struct svm_model *svm_train(const struct svm_problem *prob, const struct svm_parameter *param); 74 | void svm_cross_validation(const struct svm_problem *prob, const struct svm_parameter *param, int nr_fold, double *target); 75 | 76 | int svm_save_model(const char *model_file_name, const struct svm_model *model); 77 | struct svm_model *svm_load_model(const char *model_file_name); 78 | 79 | int svm_get_svm_type(const struct svm_model *model); 80 | int svm_get_nr_class(const struct svm_model *model); 81 | void svm_get_labels(const struct svm_model *model, int *label); 82 | double svm_get_svr_probability(const struct svm_model *model); 83 | 84 | double svm_predict_values(const struct svm_model *model, const struct svm_node *x, double* dec_values); 85 | double svm_predict(const struct svm_model *model, const struct svm_node *x); 86 | double svm_predict_probability(const struct svm_model *model, const struct svm_node *x, double* prob_estimates); 87 | 88 | void svm_free_model_content(struct svm_model *model_ptr); 89 | void svm_free_and_destroy_model(struct svm_model **model_ptr_ptr); 90 | void svm_destroy_param(struct svm_parameter *param); 91 | 92 | const char *svm_check_parameter(const struct svm_problem *prob, const struct svm_parameter *param); 93 | int svm_check_probability_model(const struct svm_model *model); 94 | 95 | void svm_set_print_string_function(void (*print_func)(const char *)); 96 | 97 | #ifdef __cplusplus 98 | } 99 | #endif 100 | 101 | #endif /* _LIBSVM_H */ 102 | -------------------------------------------------------------------------------- /king/MemoryAllocators.cpp: -------------------------------------------------------------------------------- 1 | #include "MemoryAllocators.h" 2 | 3 | #include 4 | 5 | char *** AllocateCharCube(int n, int rows, int cols) 6 | { 7 | char *** cube = new char ** [n]; 8 | 9 | // Stop early if we are out of memory 10 | if (cube == NULL) 11 | return NULL; 12 | 13 | for (int i = 0; i < n; i++) 14 | { 15 | cube[i] = AllocateCharMatrix(rows, cols); 16 | 17 | // Safely unravel allocation if we run out of memory 18 | if (cube[i] == NULL) 19 | { 20 | while (i--) 21 | FreeCharMatrix(cube[i], rows); 22 | 23 | delete [] cube; 24 | 25 | return NULL; 26 | } 27 | } 28 | 29 | return cube; 30 | } 31 | 32 | int ** AllocateIntMatrix(int rows, int cols) 33 | { 34 | int ** matrix = new int * [rows]; 35 | 36 | // Stop early if we are out of memory 37 | if (matrix == NULL) 38 | return NULL; 39 | 40 | for (int i = 0; i < rows; i++) 41 | { 42 | matrix[i] = new int [cols]; 43 | 44 | // Safely unravel allocation if we run out of memory 45 | if (matrix[i] == NULL) 46 | { 47 | while (i--) 48 | delete [] matrix[i]; 49 | 50 | delete [] matrix; 51 | 52 | return NULL; 53 | } 54 | } 55 | 56 | return matrix; 57 | } 58 | 59 | char ** AllocateCharMatrix(int rows, int cols) 60 | { 61 | char ** matrix = new char * [rows]; 62 | 63 | // Stop early if we are out of memory 64 | if (matrix == NULL) 65 | return NULL; 66 | 67 | for (int i = 0; i < rows; i++) 68 | { 69 | matrix[i] = new char [cols]; 70 | 71 | // Safely unravel allocation if we run out of memory 72 | if (matrix[i] == NULL) 73 | { 74 | while (i--) 75 | delete [] matrix[i]; 76 | 77 | delete [] matrix; 78 | 79 | return NULL; 80 | } 81 | } 82 | 83 | return matrix; 84 | } 85 | 86 | float ** AllocateFloatMatrix(int rows, int cols) 87 | { 88 | float ** matrix = new float * [rows]; 89 | 90 | // Stop early if we are out of memory 91 | if (matrix == NULL) 92 | return NULL; 93 | 94 | for (int i = 0; i < rows; i++) 95 | { 96 | matrix[i] = new float [cols]; 97 | 98 | // Safely unravel allocation if we run out of memory 99 | if (matrix[i] == NULL) 100 | { 101 | while (i--) 102 | delete [] matrix[i]; 103 | 104 | delete [] matrix; 105 | 106 | return NULL; 107 | } 108 | } 109 | 110 | return matrix; 111 | } 112 | 113 | void FreeCharCube(char *** & cube, int n, int rows) 114 | { 115 | for (int i = 0; i < n; i++) 116 | FreeCharMatrix(cube[i], rows); 117 | 118 | delete [] cube; 119 | 120 | cube = NULL; 121 | } 122 | 123 | void FreeCharMatrix(char ** & matrix, int rows) 124 | { 125 | for (int i = 0; i < rows; i++) 126 | delete [] matrix[i]; 127 | 128 | delete [] matrix; 129 | 130 | matrix = NULL; 131 | } 132 | 133 | void FreeFloatMatrix(float ** & matrix, int rows) 134 | { 135 | for (int i = 0; i < rows; i++) 136 | delete [] matrix[i]; 137 | 138 | delete [] matrix; 139 | 140 | matrix = NULL; 141 | } 142 | 143 | void FreeIntMatrix(int ** & matrix, int rows) 144 | { 145 | for (int i = 0; i < rows; i++) 146 | delete [] matrix[i]; 147 | 148 | delete [] matrix; 149 | 150 | matrix = NULL; 151 | } 152 | 153 | 154 | 155 | -------------------------------------------------------------------------------- /king/Input.cpp: -------------------------------------------------------------------------------- 1 | #include "Input.h" 2 | #include "Error.h" 3 | #include "Constant.h" 4 | 5 | #include 6 | #include 7 | 8 | int InputPromptWidth = 25; 9 | 10 | void safe_gets(char * buffer, int n) 11 | { 12 | buffer[0] = 0; 13 | 14 | fgets(buffer, n, stdin); 15 | 16 | for (char * ptr = buffer; *ptr != 0; ptr++) 17 | if (*ptr == '\n') 18 | *ptr = 0; 19 | } 20 | 21 | void Input(const char * prompt, int & n, int _default) 22 | { 23 | char buffer[BUFSIZE]; 24 | 25 | int success; 26 | do { 27 | printf("%*s [%8d]: ", InputPromptWidth, prompt, _default); 28 | safe_gets(buffer, BUFSIZE); 29 | success = sscanf(buffer, "%d", &n); 30 | if (success == EOF) 31 | n = _default; 32 | } while (success == 0); 33 | } 34 | 35 | void Input(const char * prompt, char & ch, char _default) 36 | { 37 | char buffer[BUFSIZE]; 38 | 39 | int success; 40 | do { 41 | printf("%*s [%8c]: ", InputPromptWidth, prompt, _default); 42 | safe_gets(buffer, BUFSIZE); 43 | success = sscanf(buffer, "%c", &ch); 44 | if (success == EOF) 45 | ch = _default; 46 | } while (success == 0); 47 | } 48 | 49 | void Input(const char * prompt, double & d, double _default) 50 | { 51 | char buffer[BUFSIZE]; 52 | 53 | int success; 54 | do { 55 | printf("%*s [%8.2f]: ", InputPromptWidth, prompt, _default); 56 | safe_gets(buffer, BUFSIZE); 57 | success = sscanf(buffer, "%lf", &d); 58 | if (success == EOF) 59 | d = _default; 60 | } while (success == 0); 61 | } 62 | 63 | void Input(const char * prompt, bool & b, bool _default) 64 | { 65 | char buffer[BUFSIZE]; 66 | int success; 67 | char c; 68 | 69 | do { 70 | printf("%*s [%8s]: ", InputPromptWidth, prompt, _default ? "Y/n" : "y/N"); 71 | safe_gets(buffer, BUFSIZE); 72 | success = sscanf(buffer, "%c", &c); 73 | if (success == EOF) 74 | b = _default; 75 | else 76 | switch (c) 77 | { 78 | case 'y' : 79 | case 'Y' : 80 | b = true; 81 | break; 82 | case 'n' : 83 | case 'N' : 84 | b = false; 85 | break; 86 | default : 87 | success = 0; 88 | } 89 | } while (success == 0); 90 | } 91 | 92 | 93 | void Input(const char * prompt, char * s, char * _default) 94 | { 95 | char buffer[BUFSIZE]; 96 | 97 | int success; 98 | do { 99 | printf("%*s [%8s]: ", InputPromptWidth, prompt, _default); 100 | safe_gets(buffer, BUFSIZE); 101 | success = sscanf(buffer, " %[^\n]", s); 102 | if (success == EOF) 103 | strcpy(s, _default); 104 | } while (success == 0); 105 | } 106 | 107 | void InputBounds(const char * prompt, int & n, int min, int max, 108 | int _default) 109 | { 110 | Input(prompt, n, _default); 111 | while ((n < min) || (n > max)) 112 | { 113 | printf("\n*** Input value must be between %d and %d ***\n", min, max); 114 | Input(prompt, n, _default); 115 | } 116 | } 117 | 118 | void InputBounds(const char * prompt, double & d, double min, double max, 119 | double _default) 120 | { 121 | Input(prompt, d, _default); 122 | while ((d < min) || (d > max)) 123 | { 124 | printf("\n*** Input value must be between %.2f and %.2f ***\n", min, max); 125 | Input(prompt, d, _default); 126 | } 127 | } 128 | 129 | 130 | -------------------------------------------------------------------------------- /libsvm/tools/subset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from sys import argv, exit, stdout, stderr 3 | from random import randint 4 | 5 | method = 0 6 | global n 7 | global dataset_filename 8 | subset_filename = "" 9 | rest_filename = "" 10 | 11 | def exit_with_help(): 12 | print("""\ 13 | Usage: {0} [options] dataset number [output1] [output2] 14 | 15 | This script selects a subset of the given dataset. 16 | 17 | options: 18 | -s method : method of selection (default 0) 19 | 0 -- stratified selection (classification only) 20 | 1 -- random selection 21 | 22 | output1 : the subset (optional) 23 | output2 : rest of the data (optional) 24 | If output1 is omitted, the subset will be printed on the screen.""".format(argv[0])) 25 | exit(1) 26 | 27 | def process_options(): 28 | global method, n 29 | global dataset_filename, subset_filename, rest_filename 30 | 31 | argc = len(argv) 32 | if argc < 3: 33 | exit_with_help() 34 | 35 | i = 1 36 | while i < len(argv): 37 | if argv[i][0] != "-": 38 | break 39 | if argv[i] == "-s": 40 | i = i + 1 41 | method = int(argv[i]) 42 | if method < 0 or method > 1: 43 | print("Unknown selection method {0}".format(method)) 44 | exit_with_help() 45 | i = i + 1 46 | 47 | dataset_filename = argv[i] 48 | n = int(argv[i+1]) 49 | if i+2 < argc: 50 | subset_filename = argv[i+2] 51 | if i+3 < argc: 52 | rest_filename = argv[i+3] 53 | 54 | def main(): 55 | class Label: 56 | def __init__(self, label, index, selected): 57 | self.label = label 58 | self.index = index 59 | self.selected = selected 60 | 61 | process_options() 62 | 63 | # get labels 64 | i = 0 65 | labels = [] 66 | f = open(dataset_filename, 'r') 67 | for line in f: 68 | labels.append(Label(float((line.split())[0]), i, 0)) 69 | i = i + 1 70 | f.close() 71 | l = i 72 | 73 | # determine where to output 74 | if subset_filename != "": 75 | file1 = open(subset_filename, 'w') 76 | else: 77 | file1 = stdout 78 | split = 0 79 | if rest_filename != "": 80 | split = 1 81 | file2 = open(rest_filename, 'w') 82 | 83 | # select the subset 84 | warning = 0 85 | if method == 0: # stratified 86 | labels.sort(key = lambda x: x.label) 87 | 88 | label_end = labels[l-1].label + 1 89 | labels.append(Label(label_end, l, 0)) 90 | 91 | begin = 0 92 | label = labels[begin].label 93 | for i in range(l+1): 94 | new_label = labels[i].label 95 | if new_label != label: 96 | nr_class = i - begin 97 | k = i*n//l - begin*n//l 98 | # at least one instance per class 99 | if k == 0: 100 | k = 1 101 | warning = warning + 1 102 | for j in range(nr_class): 103 | if randint(0, nr_class-j-1) < k: 104 | labels[begin+j].selected = 1 105 | k = k - 1 106 | begin = i 107 | label = new_label 108 | elif method == 1: # random 109 | k = n 110 | for i in range(l): 111 | if randint(0,l-i-1) < k: 112 | labels[i].selected = 1 113 | k = k - 1 114 | i = i + 1 115 | 116 | # output 117 | i = 0 118 | if method == 0: 119 | labels.sort(key = lambda x: int(x.index)) 120 | 121 | f = open(dataset_filename, 'r') 122 | for line in f: 123 | if labels[i].selected == 1: 124 | file1.write(line) 125 | else: 126 | if split == 1: 127 | file2.write(line) 128 | i = i + 1 129 | 130 | if warning > 0: 131 | stderr.write("""\ 132 | Warning: 133 | 1. You may have regression data. Please use -s 1. 134 | 2. Classification data unbalanced or too small. We select at least 1 per class. 135 | The subset thus contains {0} instances. 136 | """.format(n+warning)) 137 | 138 | # cleanup 139 | f.close() 140 | 141 | file1.close() 142 | 143 | if split == 1: 144 | file2.close() 145 | 146 | main() 147 | -------------------------------------------------------------------------------- /king/MathCholesky.cpp: -------------------------------------------------------------------------------- 1 | #include "MathCholesky.h" 2 | #include "Error.h" 3 | 4 | #include 5 | 6 | void Cholesky::Decompose(Matrix & A) 7 | { 8 | L.Dimension(A.rows, A.rows); 9 | L.Zero(); 10 | FastDecompose(A); 11 | } 12 | 13 | void Cholesky::FastDecompose(Matrix & A) 14 | { 15 | if (A.rows != A.cols) 16 | error("Cholesky.Decompose: Matrix %s is not square", 17 | (const char *) A.label); 18 | 19 | L.Dimension(A.rows, A.rows); 20 | 21 | for (int i=0; idata[j]; 25 | for (int k = i - 1; k >= 0; k--) 26 | sum -= L.data[i]->data[k] * L.data[j]->data[k]; 27 | if (i == j) 28 | if (sum <= 0.0) 29 | error("Cholesky - matrix %s is not positive definite", 30 | (const char *) A.label); 31 | else 32 | L.data[i]->data[i] = sqrt(sum); 33 | else 34 | L.data[j]->data[i] = sum / L.data[i]->data[i]; 35 | } 36 | } 37 | 38 | bool Cholesky::TryDecompose(Matrix & A) 39 | { 40 | L.Dimension(A.rows, A.rows); 41 | L.Zero(); 42 | 43 | if (A.rows != A.cols) 44 | return false; 45 | 46 | L.Dimension(A.rows, A.rows); 47 | 48 | for (int i=0; i= 0; k--) 53 | sum -= L.data[i]->data[k] * L.data[j]->data[k]; 54 | if (i == j) 55 | if (sum <= 0.0) 56 | return false; 57 | else 58 | L.data[i]->data[i] = sqrt(sum); 59 | else 60 | L.data[j]->data[i] = sum / L.data[i]->data[i]; 61 | } 62 | 63 | return true; 64 | } 65 | 66 | void Cholesky::BackSubst0(Vector & b) 67 | { 68 | x.Dimension(L.rows); 69 | 70 | // Solve L*v = b (store v in x) 71 | for (int i = 0; i < L.rows; i++) 72 | { 73 | double sum = b.data[i]; 74 | for (int k = i-1; k>=0; k--) 75 | sum -= L.data[i]->data[k] * x.data[k]; 76 | x.data[i] = sum / L.data[i]->data[i]; 77 | } 78 | } 79 | 80 | void Cholesky::BackSubst(Vector & b) 81 | { 82 | x.Dimension(L.rows); 83 | 84 | // Solve L*v = b (store v in x) 85 | for (int i = 0; i < L.rows; i++) 86 | { 87 | double sum = b[i]; 88 | for (int k = i-1; k>=0; k--) 89 | sum -= L.data[i]->data[k] * x.data[k]; 90 | x.data[i] = sum / L.data[i]->data[i]; 91 | } 92 | 93 | // Solve transpose(L)*x = v 94 | // End result is ... A*x = L*t(L)*x = L*v = b 95 | for (int i=L.rows-1; i>=0; i--) 96 | { 97 | double sum = x[i]; 98 | for (int k = i+1; k < L.rows; k++) 99 | sum -= L.data[k]->data[i] * x.data[k]; 100 | x.data[i] = sum / L.data[i]->data[i]; 101 | } 102 | 103 | // Done! 104 | } 105 | 106 | void Cholesky::Invert() 107 | { 108 | inv.Dimension(L.rows, L.rows); 109 | 110 | inv.Identity(); 111 | 112 | for(int i = 0; i < L.rows; i++) 113 | { 114 | BackSubst(inv[i]); 115 | inv[i] = x; 116 | } 117 | } 118 | 119 | double Cholesky::lnDeterminantL() 120 | { 121 | double sum = 0; 122 | for (int i = 0; i < L.rows; i++) 123 | sum += log(L[i][i]); 124 | return sum; 125 | } 126 | 127 | double Cholesky::DeterminantL() 128 | { 129 | double product = 1; 130 | for (int i=0; ifounders; 28 | int & count = ped.families[f]->count; 29 | int * & path = ped.families[f]->path; 30 | 31 | // Define a score for each individual based on... 32 | // * No. of genotyped markers (primarily) 33 | // * Affection status (tie-breaker) 34 | // 35 | scores.Dimension(count); 36 | 37 | for (int i = founders; i < count; i++) 38 | scores[i] = ped[path[i]].ngeno * 2 + (ped.affectionCount == 0 ? 0 : 39 | ped[path[i]].affections[0] == 2); 40 | 41 | // Optimize path so descendants with higher scores appear first 42 | for (int i = founders + 1; i < count; i++) 43 | { 44 | // Non-founders must always follow founders 45 | int new_pos = founders; 46 | 47 | // In addition they must follow their father and any of his MZ twins 48 | Person & father = *ped[path[i]].father; 49 | 50 | if (father.traverse >= new_pos) 51 | new_pos = father.traverse + 1; 52 | 53 | // If father is an MZ twin, can't move this individual above his co-twins 54 | if (father.zygosity & 1) 55 | for (int j = 0; j < father.sibCount; j++) 56 | if (father.sibs[j]->zygosity == father.zygosity && 57 | father.sibs[j]->traverse >= new_pos) 58 | new_pos = father.sibs[j]->traverse + 1; 59 | 60 | // In addition they must follow their mother and any of his MZ twins 61 | Person & mother = *ped[path[i]].mother; 62 | 63 | if (mother.traverse >= new_pos) 64 | new_pos = mother.traverse + 1; 65 | 66 | // If mother is an MZ twin, can't move this individual above her co-twins 67 | if (mother.zygosity & 1) 68 | for (int j = 0; j < mother.sibCount; j++) 69 | if (mother.zygosity == mother.sibs[j]->zygosity && 70 | mother.sibs[j]->traverse >= new_pos) 71 | new_pos = mother.sibs[j]->traverse + 1; 72 | 73 | // Subject to these constraints, place individual above any others 74 | // with lower informativeness scores 75 | while (scores[new_pos] > scores[i] && new_pos < i) 76 | new_pos++; 77 | 78 | if (new_pos != i) 79 | { 80 | int person_to_move = path[i]; 81 | int saved_score = scores[i]; 82 | 83 | for (int move = i; move > new_pos; move--) 84 | { 85 | scores[move] = scores[move-1]; 86 | path[move] = path[move-1]; 87 | ped[path[move]].traverse++; 88 | } 89 | 90 | ped[person_to_move].traverse = new_pos; 91 | path[new_pos] = person_to_move; 92 | scores[new_pos] = saved_score; 93 | } 94 | } 95 | } 96 | } 97 | 98 | 99 | -------------------------------------------------------------------------------- /king/BasicHash.cpp: -------------------------------------------------------------------------------- 1 | #include "BasicHash.h" 2 | #include "Error.h" 3 | 4 | #include 5 | 6 | BasicHash::BasicHash(int startsize) 7 | { 8 | count = 0; 9 | size = startsize; 10 | mask = startsize - 1; 11 | 12 | // In this implementation, the size of hash tables must be a power of two 13 | if (startsize & mask) 14 | error("BasicHash: Hash table size must be a power of two.\n"); 15 | 16 | objects = new void * [size]; 17 | keys = new unsigned int [size]; 18 | 19 | for (unsigned int i = 0; i < size; i++) 20 | { objects[i] = NULL; } 21 | }; 22 | 23 | BasicHash::~BasicHash() 24 | { 25 | delete [] objects; 26 | delete [] keys; 27 | } 28 | 29 | void BasicHash::Clear() 30 | { 31 | // printf("Clearing...\n"); 32 | 33 | count = 0; 34 | 35 | if (size > 16) 36 | SetSize(16); 37 | 38 | for (unsigned int i = 0; i < size; i++) 39 | objects[i] = NULL; 40 | } 41 | 42 | void BasicHash::SetSize(int newsize) 43 | { 44 | int newmask = newsize - 1; 45 | 46 | void ** newobjects = new void * [newsize]; 47 | unsigned int * newkeys = new unsigned int [newsize]; 48 | 49 | for (int i = 0; i < newsize; i++) 50 | { newobjects[i] = NULL; } 51 | 52 | if (count) 53 | for (unsigned int i = 0; i < size; i++) 54 | if (objects[i] != NULL) 55 | { 56 | unsigned int key = keys[i]; 57 | unsigned int h = key & newmask; 58 | 59 | while ( newobjects[h] != NULL && newkeys[h] != h) 60 | h = (h + 1) & newmask; 61 | 62 | newkeys[h] = key; 63 | newobjects[h] = objects[i]; 64 | } 65 | 66 | delete [] objects; 67 | delete [] keys; 68 | 69 | objects = newobjects; 70 | keys = newkeys; 71 | size = newsize; 72 | mask = newmask; 73 | } 74 | 75 | int BasicHash::Add(int key, void * object) 76 | { 77 | if (count * 2 > size) 78 | Grow(); 79 | 80 | unsigned int h = Iterate(key); 81 | 82 | while ((objects[h] != NULL) && (objects[h] != object)) 83 | h = ReIterate(key, h); 84 | 85 | if (objects[h] == NULL) 86 | { 87 | // printf("At position %d, inserted %x\n", h, key); 88 | keys[h] = key; 89 | count++; 90 | } 91 | 92 | objects[h] = object; 93 | 94 | return h; 95 | } 96 | 97 | int BasicHash::Find(int key) 98 | { 99 | int h = Iterate(key); 100 | 101 | return objects[h] == NULL ? -1 : h; 102 | } 103 | 104 | int BasicHash::Rehash(int key, int h) 105 | { 106 | h = ReIterate(key, h); 107 | 108 | return objects[h] == NULL ? -1 : h; 109 | } 110 | 111 | void BasicHash::Delete(unsigned int index) 112 | { 113 | if (index >= size || objects[index] == NULL) 114 | return; 115 | 116 | objects[index] = NULL; 117 | count--; 118 | 119 | if (count * 8 < size && size > 32) 120 | Shrink(); 121 | else 122 | { 123 | // rehash the next entries until we find empty slot 124 | index = (index + 1) & mask; 125 | 126 | while (objects[index] != NULL) 127 | { 128 | if ((keys[index] & mask) != index) 129 | { 130 | unsigned int h = Iterate(keys[index]); 131 | 132 | while ((objects[h] != NULL) && (objects[h] != objects[index])) 133 | h = ReIterate(keys[index], h); 134 | 135 | if (h != (unsigned int) index) 136 | { 137 | keys[h] = keys[index]; 138 | objects[h] = objects[index]; 139 | objects[index] = NULL; 140 | } 141 | } 142 | 143 | index = (index + 1) & mask; 144 | } 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /king/StringArray.h: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////// 2 | // libsrc/StringArray.h 3 | // (c) 2000-2007 Goncalo Abecasis 4 | // 5 | // This file is distributed as part of the MERLIN source code package 6 | // and may not be redistributed in any form, without prior written 7 | // permission from the author. Permission is granted for you to 8 | // modify this file for your own personal use, but modified versions 9 | // must retain this copyright notice and must not be distributed. 10 | // 11 | // Permission is granted for you to use this file to compile MERLIN. 12 | // 13 | // All computer programs have bugs. Use this file at your own risk. 14 | // 15 | // Tuesday December 18, 2007 16 | // 17 | 18 | #ifndef __STRING_ARRAY_H__ 19 | #define __STRING_ARRAY_H__ 20 | 21 | #include "StringBasics.h" 22 | 23 | class StringArray 24 | { 25 | protected: 26 | String ** strings; 27 | int size, count; 28 | 29 | public: 30 | static int alloc; 31 | 32 | StringArray(int startsize = 0); 33 | StringArray(StringArray & original); 34 | virtual ~StringArray(); 35 | 36 | // Each line in a file is parsed into a separate array element 37 | // 38 | 39 | void Read(FILE * f); 40 | void Write(FILE * f); 41 | void WriteLine(FILE * f); 42 | void Read(const char * filename); 43 | void Write(const char * filename); 44 | void WriteLine(const char * filename); 45 | 46 | #ifdef __ZLIB_AVAILABLE__ 47 | void Read(IFILE & f); 48 | #endif 49 | 50 | // Write all strings to the screen 51 | void Print(); 52 | void PrintLine(); 53 | 54 | void Grow(int newsize); 55 | void Clear(); 56 | 57 | int Length() const { return count; } 58 | int Dimension(int newcount); 59 | int CharLength(); 60 | 61 | String & operator [] (int i) { return *(strings[i]); } 62 | const String & operator [] (int i) const { return *(strings[i]); } 63 | 64 | // These functions divide a string into tokens and append these to the 65 | // array. Return value is the new array length 66 | // 67 | 68 | int AddColumns(const String & s, char ch = '\t'); 69 | int AddTokens(const String & s, char ch); 70 | int AddTokens(const String & s, const String & separators = " \t\r\n"); 71 | 72 | int ReplaceColumns(const String & s, char ch = '\t') 73 | { Clear(); return AddColumns(s, ch); } 74 | int ReplaceTokens(const String & s, const String & separators = " \t\r\n") 75 | { Clear(); return AddTokens(s, separators); } 76 | 77 | // These functions add, insert or remove a single array element 78 | // 79 | 80 | int Add(const String & s); 81 | void InsertAt(int position, const String & s); 82 | void Delete(int position); 83 | 84 | // These functions manipulate a string as a stack 85 | // 86 | 87 | String & Last() const; 88 | int Push(const String & s) { return Add(s); } 89 | String Pop(); 90 | 91 | // Linear search (N/2 comparisons on average) for a single element 92 | // If searching is required, StringMaps are a better option 93 | // 94 | 95 | int Find(const String & s) const; 96 | int FastFind(const String & s) const; 97 | int SlowFind(const String & s) const; 98 | 99 | // Alphetically orders strings 100 | // 101 | void Sort(); 102 | 103 | // Trims strings to remove whitespace 104 | void Trim(); 105 | 106 | StringArray & operator = (const StringArray & rhs); 107 | 108 | bool operator == (const StringArray & rhs); 109 | bool operator != (const StringArray & rhs) 110 | { return !(*this == rhs); } 111 | 112 | private: 113 | static int ComparisonForSort(const void * a, const void * b); 114 | }; 115 | 116 | #endif 117 | 118 | 119 | -------------------------------------------------------------------------------- /king/InputFile.h: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////// 2 | // libsrc/InputFile.h 3 | // (c) 2000-2007 Goncalo Abecasis 4 | // 5 | // This file is distributed as part of the MERLIN source code package 6 | // and may not be redistributed in any form, without prior written 7 | // permission from the author. Permission is granted for you to 8 | // modify this file for your own personal use, but modified versions 9 | // must retain this copyright notice and must not be distributed. 10 | // 11 | // Permission is granted for you to use this file to compile MERLIN. 12 | // 13 | // All computer programs have bugs. Use this file at your own risk. 14 | // 15 | // Tuesday December 18, 2007 16 | // 17 | 18 | #ifndef __INPUTFILE_H__ 19 | #define __INPUTFILE_H__ 20 | 21 | #ifdef __gnu_linux__ 22 | #ifndef __ZLIB_AVAILABLE__ 23 | #define __ZLIB_AVAILABLE__ 24 | #endif 25 | #endif 26 | 27 | #ifdef __ZLIB_AVAILABLE__ 28 | 29 | #include 30 | #include 31 | 32 | class IFILE 33 | { 34 | public: 35 | bool gzMode; 36 | union 37 | { 38 | gzFile gzHandle; 39 | FILE * handle; 40 | }; 41 | 42 | IFILE() 43 | { 44 | gzMode = false; 45 | handle = NULL; 46 | } 47 | 48 | IFILE(const char * filename, const char * mode); 49 | 50 | operator void * () 51 | { return gzMode ? (void *) gzHandle : (void *) handle; } 52 | 53 | IFILE operator = (const IFILE & rhs) 54 | { 55 | if ((gzMode = rhs.gzMode) == true) 56 | gzHandle = rhs.gzHandle; 57 | else 58 | handle = rhs.handle; 59 | 60 | return *this; 61 | } 62 | 63 | IFILE operator = (FILE * rhs) 64 | { 65 | gzMode = false; 66 | handle = rhs; 67 | return *this; 68 | } 69 | 70 | IFILE operator = (gzFile & rhs) 71 | { 72 | gzMode = true; 73 | gzHandle = rhs; 74 | return *this; 75 | } 76 | 77 | bool operator == (void * rhs) 78 | { 79 | if (rhs != NULL) 80 | return false; 81 | return gzMode ? gzHandle == rhs : handle == rhs; 82 | } 83 | }; 84 | 85 | inline IFILE ifopen(const char * filename, const char * mode) 86 | { IFILE file(filename, mode); return file; } 87 | 88 | inline int ifclose(IFILE & file) 89 | { return file.gzMode ? gzclose(file.gzHandle) : fclose(file.handle); } 90 | 91 | inline int ifgetc(IFILE & file) 92 | { return file.gzMode ? gzgetc(file.gzHandle) : fgetc(file.handle); } 93 | 94 | inline void ifrewind(IFILE & file) 95 | { if (file.gzMode) gzrewind(file.gzHandle); else rewind(file.handle); } 96 | 97 | inline int ifeof(IFILE & file) 98 | { return file.gzMode ? gzeof(file.gzHandle) : feof(file.handle); } 99 | 100 | #else 101 | 102 | #include 103 | 104 | class IFILE 105 | { 106 | public: 107 | FILE * handle; 108 | 109 | IFILE() 110 | { handle = NULL; } 111 | IFILE(const char * filename, const char * mode) 112 | { handle = fopen(filename, mode); } 113 | 114 | operator FILE *() 115 | { return handle; } 116 | 117 | IFILE & operator = (FILE * rhs) 118 | { handle = rhs; return *this; } 119 | 120 | IFILE & operator = (const IFILE & rhs) 121 | { handle = rhs.handle; return * this; } 122 | 123 | bool operator == (void * rhs) 124 | { 125 | if (rhs != NULL) 126 | return false; 127 | return handle == rhs; 128 | } 129 | }; 130 | 131 | inline IFILE ifopen(const char * filename, const char * mode) 132 | { IFILE file(filename, mode); return file; } 133 | 134 | inline int ifclose(IFILE & file) 135 | { return fclose(file.handle); } 136 | 137 | inline int ifgetc(IFILE & file) 138 | { return fgetc(file.handle); } 139 | 140 | inline void ifrewind(IFILE & file) 141 | { rewind(file.handle); } 142 | 143 | inline int ifeof(IFILE & file) 144 | { return feof(file.handle); } 145 | 146 | #endif 147 | 148 | #endif 149 | 150 | 151 | -------------------------------------------------------------------------------- /king/StringMap.h: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////// 2 | // libsrc/StringMap.h 3 | // (c) 2000-2007 Goncalo Abecasis 4 | // 5 | // This file is distributed as part of the MERLIN source code package 6 | // and may not be redistributed in any form, without prior written 7 | // permission from the author. Permission is granted for you to 8 | // modify this file for your own personal use, but modified versions 9 | // must retain this copyright notice and must not be distributed. 10 | // 11 | // Permission is granted for you to use this file to compile MERLIN. 12 | // 13 | // All computer programs have bugs. Use this file at your own risk. 14 | // 15 | // Tuesday December 18, 2007 16 | // 17 | 18 | #ifndef __STRINGMAP_H__ 19 | #define __STRINGMAP_H__ 20 | 21 | #include "StringBasics.h" 22 | 23 | class StringMap 24 | { 25 | protected: 26 | ::String ** strings; 27 | void ** objects; 28 | int count, size; 29 | 30 | public: 31 | static int alloc; 32 | 33 | StringMap(int startsize = 0); 34 | virtual ~StringMap(); 35 | 36 | void Grow(int newsize); 37 | void Clear(); 38 | int Length() const { return count; } 39 | 40 | void * Object(int i) const { return objects[i]; } 41 | void * Object(const ::String & key) const 42 | { 43 | int index = Find(key); 44 | return (index >= 0) ? objects[index] : NULL; 45 | } 46 | void * Object(const ::String & key, void * (*create_object)()) 47 | { return objects[Find(key, create_object)]; } 48 | 49 | void SetObject(int i, void * object) 50 | { objects[i] = object; } 51 | void SetObject(const ::String & key, void * object) 52 | { Add(key, object); } 53 | 54 | int Add(const ::String & s, void * object = NULL); 55 | int Find(const ::String & s, void * (*create_object)() = NULL); 56 | int Find(const ::String & s) const; 57 | int FindStem(const ::String & stem) const; 58 | 59 | StringMap & operator = (const StringMap & rhs); 60 | 61 | const ::String & operator [] (int i) const { return *(strings[i]); } 62 | ::String & operator [] (int i) { return *(strings[i]); } 63 | ::String & String(int i) { return *(strings[i]); } 64 | 65 | static void * CreateMap(); 66 | 67 | void Delete(int index); 68 | }; 69 | 70 | class StringIntMap 71 | { 72 | protected: 73 | ::String ** strings; 74 | int * integers; 75 | int count, size; 76 | 77 | public: 78 | static int alloc; 79 | 80 | StringIntMap(int startsize = 0); 81 | virtual ~StringIntMap(); 82 | 83 | void Grow(int newsize); 84 | void Clear(); 85 | int Length() const { return count; } 86 | 87 | int Integer(int i) const { return integers[i]; } 88 | int Integer(const ::String & key) const 89 | { 90 | int index = Find(key); 91 | return (index >= 0) ? (int) integers[index] : -1; 92 | } 93 | 94 | void SetInteger(int i, int value) 95 | { integers[i] = value; } 96 | void SetInteger(const ::String & key, int value) 97 | { Add(key, value); } 98 | 99 | int Add(const ::String & s, int i); 100 | int Find(const ::String & s, int defaultValue); 101 | int Find(const ::String & s) const; 102 | int FindStem(const ::String & stem) const; 103 | 104 | StringIntMap & operator = (const StringIntMap & rhs); 105 | 106 | const ::String & operator [] (int i) const { return *(strings[i]); } 107 | ::String & operator [] (int i) { return *(strings[i]); } 108 | ::String & String(int i) { return *(strings[i]); } 109 | 110 | static void * CreateMap(); 111 | 112 | int IncrementCount(const ::String & key); 113 | int DecrementCount(const ::String & key); 114 | int GetCount(const ::String & key) const; 115 | int GetCount(int index) const { return integers[index]; } 116 | 117 | void Delete(int index); 118 | }; 119 | 120 | #endif 121 | 122 | 123 | -------------------------------------------------------------------------------- /king/Hash.cpp: -------------------------------------------------------------------------------- 1 | #include "Hash.h" 2 | 3 | #include 4 | 5 | // ******************************************************** 6 | // 7 | // This code is based on the original by Robert Jenkins. 8 | // 9 | // http://burtleburtle.net/bob/hash/doobs.html 10 | // 11 | // ******************************************************** 12 | 13 | #define MIX_INTEGERS(a,b,c) \ 14 | { \ 15 | a -= b; a -= c; a ^= (c>>13); \ 16 | b -= c; b -= a; b ^= (a<<8); \ 17 | c -= a; c -= b; c ^= (b>>13); \ 18 | a -= b; a -= c; a ^= (c>>12); \ 19 | b -= c; b -= a; b ^= (a<<16); \ 20 | c -= a; c -= b; c ^= (b>>5); \ 21 | a -= b; a -= c; a ^= (c>>3); \ 22 | b -= c; b -= a; b ^= (a<<10); \ 23 | c -= a; c -= b; c ^= (b>>15); \ 24 | } 25 | 26 | #define ui (unsigned int) 27 | 28 | unsigned int hash ( const unsigned char * key, unsigned int length, unsigned int initval) 29 | { 30 | unsigned int a = 0x9e3779b9; 31 | unsigned int b = 0x9e3779b9; 32 | unsigned int c = initval; 33 | unsigned int len = length; 34 | 35 | /*---------------------------------------- handle most of the key */ 36 | while (len >= 12) 37 | { 38 | a += (key[0] +(ui(key[1])<<8) +(ui(key[2])<<16) +(ui(key[3])<<24)); 39 | b += (key[4] +(ui(key[5])<<8) +(ui(key[6])<<16) +(ui(key[7])<<24)); 40 | c += (key[8] +(ui(key[9])<<8) +(ui(key[10])<<16)+(ui(key[11])<<24)); 41 | MIX_INTEGERS(a,b,c); 42 | key += 12; len -= 12; 43 | } 44 | 45 | /*------------------------------------- handle the last 11 bytes */ 46 | c += length; 47 | switch(len) /* all the case statements fall through */ 48 | { 49 | case 11: c+=(ui(key[10])<<24); 50 | case 10: c+=(ui(key[9])<<16); 51 | case 9 : c+=(ui(key[8])<<8); 52 | /* the first byte of c is reserved for the length */ 53 | 54 | case 8 : b+=(ui(key[7])<<24); 55 | case 7 : b+=(ui(key[6])<<16); 56 | case 6 : b+=(ui(key[5])<<8); 57 | case 5 : b+=key[4]; 58 | 59 | case 4 : a+=(ui(key[3])<<24); 60 | case 3 : a+=(ui(key[2])<<16); 61 | case 2 : a+=(ui(key[1])<<8); 62 | case 1 : a+=key[0]; 63 | /* case 0: nothing left to add */ 64 | } 65 | MIX_INTEGERS(a,b,c); 66 | 67 | /*-------------------------------------------- report the result */ 68 | return c; 69 | } 70 | 71 | unsigned int hash_no_case ( const unsigned char * key, unsigned int length, unsigned int initval) 72 | { 73 | unsigned int a = 0x9e3779b9; 74 | unsigned int b = 0x9e3779b9; 75 | unsigned int c = initval; 76 | unsigned int len = length; 77 | 78 | /*---------------------------------------- handle most of the key */ 79 | while (len >= 12) 80 | { 81 | a += (toupper(key[0]) +(ui(toupper(key[1]))<<8) +(ui(toupper(key[2]))<<16) +(ui(toupper(key[3]))<<24)); 82 | b += (toupper(key[4]) +(ui(toupper(key[5]))<<8) +(ui(toupper(key[6]))<<16) +(ui(toupper(key[7]))<<24)); 83 | c += (toupper(key[8]) +(ui(toupper(key[9]))<<8) +(ui(toupper(key[10]))<<16)+(ui(toupper(key[11]))<<24)); 84 | MIX_INTEGERS(a,b,c); 85 | key += 12; len -= 12; 86 | } 87 | 88 | /*------------------------------------- handle the last 11 bytes */ 89 | c += length; 90 | switch(len) /* all the case statements fall through */ 91 | { 92 | case 11: c+=(ui(toupper(key[10]))<<24); 93 | case 10: c+=(ui(toupper(key[9]))<<16); 94 | case 9 : c+=(ui(toupper(key[8]))<<8); 95 | /* the first byte of c is reserved for the length */ 96 | 97 | case 8 : b+=(ui(toupper(key[7]))<<24); 98 | case 7 : b+=(ui(toupper(key[6]))<<16); 99 | case 6 : b+=(ui(toupper(key[5]))<<8); 100 | case 5 : b+=toupper(key[4]); 101 | 102 | case 4 : a+=(ui(toupper(key[3]))<<24); 103 | case 3 : a+=(ui(toupper(key[2]))<<16); 104 | case 2 : a+=(ui(toupper(key[1]))<<8); 105 | case 1 : a+=toupper(key[0]); 106 | /* case 0: nothing left to add */ 107 | } 108 | MIX_INTEGERS(a,b,c); 109 | 110 | /*-------------------------------------------- report the result */ 111 | return c; 112 | } 113 | -------------------------------------------------------------------------------- /king/IntArray.h: -------------------------------------------------------------------------------- 1 | #ifndef __INTARRAY_H__ 2 | #define __INTARRAY_H__ 3 | 4 | #include 5 | 6 | class IntArray 7 | { 8 | private: 9 | int * items; 10 | int size, count; 11 | 12 | void Grow(int new_size); 13 | static int Compare(int * a, int * b); 14 | 15 | public: 16 | static int alloc; 17 | 18 | IntArray(int start_size = 0); 19 | IntArray(const IntArray & source); 20 | ~IntArray(); 21 | 22 | IntArray & operator = (const IntArray & rhs); 23 | 24 | int & operator [] (int index) { return items[index]; } 25 | int operator [] (int index) const { return items[index]; } 26 | 27 | int & operator [] (double fraction) 28 | { return items[(int) (count * fraction)]; } 29 | int operator [] (double fraction) const 30 | { return items[(int) (count * fraction)]; } 31 | 32 | int Append(int value); 33 | int Append(const IntArray & rhs); 34 | 35 | void Push(int value) { Append(value); } 36 | int Pop() { return items[--count]; } 37 | int Peek() const { return items[count - 1]; } 38 | int &Last() const { return items[count - 1]; } 39 | 40 | void PushIfNew(int value); // used for maintaining list without duplicates 41 | 42 | int Delete(int index); 43 | void InsertAt(int index, int value); 44 | 45 | int Find(int value) const; 46 | int FastFind(int value) const { return BinarySearch(value); } 47 | int BinarySearch(int value) const; 48 | void Sort(); 49 | 50 | void Zero(); 51 | void Set(int value); 52 | void SetSequence(int start = 0, int increment = 1); 53 | 54 | int Length() const { return count; } 55 | void Dimension(int new_count) { Grow(new_count); count = new_count; } 56 | void Clear() { count = 0; } 57 | 58 | int Sum() const { return Sum(0, count - 1); } 59 | int Sum(int start) const { return Sum(start, count - 1); } 60 | int Sum(int start, int end) const; 61 | 62 | int Max() const { return Max(0, count - 1); } 63 | int Max(int start) const { return Max(start, count - 1); } 64 | int Max(int start, int end) const; 65 | 66 | int Min() const { return Min(0, count - 1); } 67 | int Min(int start) const { return Min(start, count - 1); } 68 | int Min(int start, int end) const; 69 | 70 | int Count() const {return count; } 71 | int CountIfGreater(int treshold) const; 72 | int CountIfGreaterOrEqual(int treshold) const; 73 | 74 | void Swap(int i, int j) 75 | { int tmp = items[i]; items[i] = items[j]; items[j] = tmp; } 76 | 77 | void Reverse(); 78 | 79 | operator int * () { return items; } 80 | 81 | void Add(int term); 82 | void Subtract(int term) { Add(-term); } 83 | void Multiply(int factor); 84 | void Divide(int denominator); 85 | 86 | IntArray & operator += (int rhs) 87 | { Add(rhs); return *this; } 88 | 89 | IntArray & operator *= (int rhs) 90 | { Multiply(rhs); return *this; } 91 | 92 | IntArray & operator -= (int rhs) 93 | { Add(-rhs); return *this; } 94 | 95 | IntArray & operator /= (int rhs) 96 | { Divide(rhs); return *this; } 97 | 98 | int InnerProduct(IntArray & v); 99 | 100 | bool operator == (const IntArray & rhs) const; 101 | bool operator != (const IntArray & rhs) const; 102 | 103 | bool isAscending(); 104 | bool isDescending(); 105 | 106 | void Stack(const IntArray & rhs); 107 | 108 | void Swap(IntArray & rhs); 109 | 110 | void Print() { Print(stdout); } 111 | void Print(const char * label) { Print(stdout, label); } 112 | void Print(FILE * output); 113 | void Print(FILE * output, const char * label); 114 | 115 | int Product(); 116 | double DoubleProduct(); 117 | 118 | int Hash(int initval = 0); 119 | }; 120 | 121 | #endif 122 | 123 | 124 | -------------------------------------------------------------------------------- /king/Random.h: -------------------------------------------------------------------------------- 1 | 2 | ////////////////////////////////////////////////////////////////////////////// 3 | // This file includes code derived from the original Mersenne Twister Code 4 | // by Makoto Matsumoto and Takuji Nishimura 5 | // and is subject to their original copyright notice copied below: 6 | ////////////////////////////////////////////////////////////////////////////// 7 | 8 | ////////////////////////////////////////////////////////////////////////////// 9 | // COPYRIGHT NOTICE FOR MERSENNE TWISTER CODE 10 | // 11 | // Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, 12 | // All rights reserved. 13 | // 14 | // Redistribution and use in source and binary forms, with or without 15 | // modification, are permitted provided that the following conditions 16 | // are met: 17 | // 18 | // 1. Redistributions of source code must retain the above copyright 19 | // notice, this list of conditions and the following disclaimer. 20 | // 21 | // 2. Redistributions in binary form must reproduce the above copyright 22 | // notice, this list of conditions and the following disclaimer in the 23 | // documentation and/or other materials provided with the distribution. 24 | // 25 | // 3. The names of its contributors may not be used to endorse or promote 26 | // products derived from this software without specific prior written 27 | // permission. 28 | // 29 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 32 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 33 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 34 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 35 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 36 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 37 | // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 38 | // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 39 | // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 40 | // 41 | /////////////////////////////////////////////////////////////////////////////// 42 | 43 | 44 | #ifndef __RANDOM_H__ 45 | #define __RANDOM_H__ 46 | 47 | // Define a quick and dirty generator 48 | #define RANDMUL 1664525L 49 | #define RANDADD 1013904223L 50 | 51 | #define RAND(seed) ((seed = seed * RANDMUL + RANDADD) & 0xFFFFFFFF) 52 | 53 | class Random 54 | // Implements the Mersenne Twister as default random number generator. 55 | // Compilation flag __NO_MERSENNE sets default generator to 56 | // a minimal Park-Miller with Bays-Durham shuffle and added safe guards. 57 | { 58 | protected: 59 | // values for "minimal random values" 60 | long seed; 61 | long last; 62 | long * shuffler; 63 | 64 | // and for normal deviates 65 | int normSaved; 66 | double normStore; 67 | 68 | double mersenneMult; 69 | 70 | // Array for Mersenne state vector 71 | unsigned long * mt; 72 | 73 | // Used to signal that Mersenne state vector is not initialized 74 | int mti; 75 | 76 | 77 | public: 78 | 79 | Random(long s = 0x7654321); 80 | ~Random(); 81 | 82 | // Next bit in series of 0s and 1s 83 | int Binary(); // Next bit in series of 0s and 1s 84 | 85 | // Next value in series, between 0 and 1 86 | double Next(); 87 | 88 | // Next integer 89 | unsigned long NextInt(); 90 | 91 | // Random number form N(0,1) 92 | double Normal(); 93 | 94 | void Reset(long s); 95 | void InitMersenne(unsigned long s); 96 | 97 | // Random number between 0 and 1 98 | operator double() 99 | { return Next(); } 100 | 101 | // Random number between arbitrary bounds 102 | double Uniform(double lo = 0.0, double hi = 1.0) 103 | { 104 | return lo + (hi - lo) * Next(); 105 | } 106 | 107 | void Choose(int * array, int n, int k); 108 | void Choose(int * array, float * weights, int n, int k); 109 | 110 | }; 111 | 112 | extern Random globalRandom; 113 | 114 | #endif 115 | 116 | -------------------------------------------------------------------------------- /examples/index/list.107.local.crams.index: -------------------------------------------------------------------------------- 1 | NWD100953 crams/NWD100953.recab.cram 2 | NWD119836 crams/NWD119836.recab.cram 3 | NWD119844 crams/NWD119844.recab.cram 4 | NWD136397 crams/NWD136397.recab.cram 5 | NWD146103 crams/NWD146103.recab.cram 6 | NWD155824 crams/NWD155824.recab.cram 7 | NWD165827 crams/NWD165827.recab.cram 8 | NWD176325 crams/NWD176325.recab.cram 9 | NWD183321 crams/NWD183321.recab.cram 10 | NWD191048 crams/NWD191048.recab.cram 11 | NWD230091 crams/NWD230091.recab.cram 12 | NWD231092 crams/NWD231092.recab.cram 13 | NWD234815 crams/NWD234815.recab.cram 14 | NWD245311 crams/NWD245311.recab.cram 15 | NWD259170 crams/NWD259170.recab.cram 16 | NWD263474 crams/NWD263474.recab.cram 17 | NWD285363 crams/NWD285363.recab.cram 18 | NWD290849 crams/NWD290849.recab.cram 19 | NWD293295 crams/NWD293295.recab.cram 20 | NWD296991 crams/NWD296991.recab.cram 21 | NWD298195 crams/NWD298195.recab.cram 22 | NWD315195 crams/NWD315195.recab.cram 23 | NWD315403 crams/NWD315403.recab.cram 24 | NWD316026 crams/NWD316026.recab.cram 25 | NWD319341 crams/NWD319341.recab.cram 26 | NWD373853 crams/NWD373853.recab.cram 27 | NWD422016 crams/NWD422016.recab.cram 28 | NWD428511 crams/NWD428511.recab.cram 29 | NWD433038 crams/NWD433038.recab.cram 30 | NWD434806 crams/NWD434806.recab.cram 31 | NWD444824 crams/NWD444824.recab.cram 32 | NWD446684 crams/NWD446684.recab.cram 33 | NWD455342 crams/NWD455342.recab.cram 34 | NWD463423 crams/NWD463423.recab.cram 35 | NWD465900 crams/NWD465900.recab.cram 36 | NWD470340 crams/NWD470340.recab.cram 37 | NWD479955 crams/NWD479955.recab.cram 38 | NWD480514 crams/NWD480514.recab.cram 39 | NWD490850 crams/NWD490850.recab.cram 40 | NWD492101 crams/NWD492101.recab.cram 41 | NWD495157 crams/NWD495157.recab.cram 42 | NWD496530 crams/NWD496530.recab.cram 43 | NWD502718 crams/NWD502718.recab.cram 44 | NWD512755 crams/NWD512755.recab.cram 45 | NWD518110 crams/NWD518110.recab.cram 46 | NWD524030 crams/NWD524030.recab.cram 47 | NWD535753 crams/NWD535753.recab.cram 48 | NWD548310 crams/NWD548310.recab.cram 49 | NWD578417 crams/NWD578417.recab.cram 50 | NWD578584 crams/NWD578584.recab.cram 51 | NWD580039 crams/NWD580039.recab.cram 52 | NWD585842 crams/NWD585842.recab.cram 53 | NWD611564 crams/NWD611564.recab.cram 54 | NWD614349 crams/NWD614349.recab.cram 55 | NWD626020 crams/NWD626020.recab.cram 56 | NWD626736 crams/NWD626736.recab.cram 57 | NWD635737 crams/NWD635737.recab.cram 58 | NWD651125 crams/NWD651125.recab.cram 59 | NWD651283 crams/NWD651283.recab.cram 60 | NWD651359 crams/NWD651359.recab.cram 61 | NWD652050 crams/NWD652050.recab.cram 62 | NWD672633 crams/NWD672633.recab.cram 63 | NWD677194 crams/NWD677194.recab.cram 64 | NWD684137 crams/NWD684137.recab.cram 65 | NWD697767 crams/NWD697767.recab.cram 66 | NWD704221 crams/NWD704221.recab.cram 67 | NWD714003 crams/NWD714003.recab.cram 68 | NWD716220 crams/NWD716220.recab.cram 69 | NWD725484 crams/NWD725484.recab.cram 70 | NWD742333 crams/NWD742333.recab.cram 71 | NWD746396 crams/NWD746396.recab.cram 72 | NWD754590 crams/NWD754590.recab.cram 73 | NWD759405 crams/NWD759405.recab.cram 74 | NWD760327 crams/NWD760327.recab.cram 75 | NWD761329 crams/NWD761329.recab.cram 76 | NWD762682 crams/NWD762682.recab.cram 77 | NWD763972 crams/NWD763972.recab.cram 78 | NWD768309 crams/NWD768309.recab.cram 79 | NWD768493 crams/NWD768493.recab.cram 80 | NWD769626 crams/NWD769626.recab.cram 81 | NWD778759 crams/NWD778759.recab.cram 82 | NWD790783 crams/NWD790783.recab.cram 83 | NWD791319 crams/NWD791319.recab.cram 84 | NWD805667 crams/NWD805667.recab.cram 85 | NWD812009 crams/NWD812009.recab.cram 86 | NWD821054 crams/NWD821054.recab.cram 87 | NWD831422 crams/NWD831422.recab.cram 88 | NWD832275 crams/NWD832275.recab.cram 89 | NWD841343 crams/NWD841343.recab.cram 90 | NWD842401 crams/NWD842401.recab.cram 91 | NWD855893 crams/NWD855893.recab.cram 92 | NWD866959 crams/NWD866959.recab.cram 93 | NWD875673 crams/NWD875673.recab.cram 94 | NWD881320 crams/NWD881320.recab.cram 95 | NWD886731 crams/NWD886731.recab.cram 96 | NWD897509 crams/NWD897509.recab.cram 97 | NWD901849 crams/NWD901849.recab.cram 98 | NWD905240 crams/NWD905240.recab.cram 99 | NWD909656 crams/NWD909656.recab.cram 100 | NWD910953 crams/NWD910953.recab.cram 101 | NWD918554 crams/NWD918554.recab.cram 102 | NWD929194 crams/NWD929194.recab.cram 103 | NWD952432 crams/NWD952432.recab.cram 104 | NWD953198 crams/NWD953198.recab.cram 105 | NWD967078 crams/NWD967078.recab.cram 106 | NWD968809 crams/NWD968809.recab.cram 107 | NWD991001 crams/NWD991001.recab.cram 108 | -------------------------------------------------------------------------------- /king/MathLu.cpp: -------------------------------------------------------------------------------- 1 | #include "MathLu.h" 2 | #include "Error.h" 3 | 4 | #include 5 | 6 | LU::~LU() 7 | { } 8 | 9 | void LU::Decompose(Matrix & a) 10 | { 11 | Vector vv; // stores the implict scaling of each row 12 | 13 | if (a.rows != a.cols) 14 | error("LU.Decompose: Matrix %s is not square", (const char *) a.label); 15 | 16 | lu.Copy(a); 17 | vv.Dimension(lu.rows); 18 | d = 1.0; 19 | 20 | permutation.Dimension(lu.rows); 21 | 22 | // loop over rows to get implicit scaling information 23 | for (int i = 0; i < lu.rows; i++) 24 | { 25 | double big = 0.0, temp; 26 | for (int j = 0; j < lu.rows; j++) 27 | if ( (temp = fabs(lu[i][j])) > big) big = temp; 28 | if (big == 0.0) 29 | error ("LU.Decompose: Matrix %s is singular", (const char *) a.label); 30 | vv[i] = 1.0 / big; 31 | } 32 | 33 | // Loop over columns as per Crout's method 34 | for (int j=0; j < lu.rows; j++) 35 | { 36 | // Uij = aij - Sum(1 to i - 1)[Lik*Uik] 37 | for (int i=0; i < j; i++) 38 | { 39 | double sum = lu[i][j]; 40 | for (int k=0; k < i; k++) 41 | sum -= lu[i][k] * lu[k][j]; 42 | lu[i][j] = sum; 43 | } 44 | 45 | // find the pivot element 46 | double big = 0.0; 47 | int imax; 48 | 49 | // and compute Lij = 1/Ujj * { aij - Sum(1 to j - 1)[Lik*Uik] } 50 | for (int i = j; i < lu.rows; i++) 51 | { 52 | double sum = lu[i][j]; 53 | for (int k = 0; k < j; k++) 54 | sum -= lu[i][k] * lu[k][j]; 55 | lu[i][j] = sum; 56 | 57 | // check the figure of merit for this pivot 58 | double merit = vv[i] * fabs(sum); 59 | if (merit >= big) 60 | { 61 | big = merit; 62 | imax = i; 63 | } 64 | } 65 | 66 | // interchange rows if necessary 67 | if (j != imax) 68 | { 69 | lu.SwapRows(j, imax); 70 | d = -d; 71 | vv[imax] = vv[j]; 72 | } 73 | 74 | permutation[j] = imax; 75 | 76 | if (lu[j][j] == 0.0) 77 | error("LU.Decompose: Matrix %s has zero pivot",(const char *)a.label); 78 | 79 | // finally divide by pivot element 80 | if (j != lu.rows - 1) 81 | { 82 | double scale = 1.0 / lu[j][j]; 83 | for (int i = j + 1; i < lu.rows; i++) 84 | lu[i][j] *= scale; 85 | } 86 | } 87 | } 88 | 89 | void LU::BackSubst(Vector & b) 90 | { 91 | x.Copy(b); 92 | 93 | // take into account the possibility that b starts with 94 | // a number of leading zeros (ie. for matrix inversion) 95 | 96 | int nonZero = -1, unscramble; 97 | 98 | // forward substitution with unscrambling of the permutation... 99 | for (int i = 0; i < lu.rows; i++) 100 | { 101 | unscramble = permutation[i]; 102 | double sum = x[unscramble]; 103 | x[unscramble] = x[i]; 104 | 105 | if (nonZero != -1) 106 | for (int j = nonZero; j <= i - 1; j++) 107 | sum -= lu[i][j] * x[j]; 108 | else 109 | if (sum) 110 | nonZero = i; 111 | x[i] = sum; 112 | } 113 | 114 | // Now do the backsubstitution 115 | for (int i = lu.rows - 1; i >= 0; i--) 116 | { 117 | double sum = x[i]; 118 | for (int j = i + 1; j < lu.rows; j++) 119 | sum -= lu[i][j] * x[j]; 120 | x[i] = sum / lu[i][i]; 121 | } 122 | } 123 | 124 | void LU::Invert() 125 | { 126 | inv.Dimension(lu.rows, lu.rows); 127 | 128 | inv.Identity(); 129 | 130 | for(int i = 0; i < lu.rows; i++) 131 | { 132 | BackSubst(inv[i]); 133 | inv[i] = x; 134 | } 135 | } 136 | 137 | double LU::Determinant() 138 | { 139 | double det = d; 140 | 141 | for (int i = 0; i < lu.rows; i++) 142 | det *= lu[i][i]; 143 | 144 | return det; 145 | } 146 | 147 | double LU::lnDeterminant() 148 | { 149 | bool minus_sign = d == -1; 150 | double lnDet = 0.0; 151 | 152 | for (int i = 0; i < lu.rows; i++) 153 | if (lu[i][i] > 0) 154 | lnDet += log(lu[i][i]); 155 | else 156 | { 157 | lnDet += log(-lu[i][i]), 158 | minus_sign == !minus_sign; 159 | } 160 | 161 | if (minus_sign) 162 | error("LU::lnDeterminant cannot log negative value\n"); 163 | 164 | return lnDet; 165 | } 166 | 167 | -------------------------------------------------------------------------------- /king/MathFloatVector.h: -------------------------------------------------------------------------------- 1 | #ifndef __MATHFLOATVECTOR_H__ 2 | #define __MATHFLOATVECTOR_H__ 3 | 4 | #include "StringBasics.h" 5 | 6 | #include 7 | #include 8 | 9 | class Matrix; 10 | 11 | class FloatVector 12 | { 13 | public: 14 | int dim, size; 15 | float * data; 16 | String label; 17 | 18 | FloatVector() 19 | { Init(); } 20 | FloatVector(FloatVector & v) 21 | { Init(); Copy(v); } 22 | FloatVector(int d) 23 | { Init(); Dimension(d); } 24 | FloatVector(const char * text) 25 | { Init(); label = text; } 26 | FloatVector(const char * text, int d) 27 | { Init(); label = text; Dimension(d); } 28 | FloatVector(const char * text, FloatVector & v) 29 | { Init(); label = text; Copy(v); } 30 | 31 | ~FloatVector(); 32 | 33 | void Dimension(int d); 34 | int Length() const { return dim; } 35 | 36 | void SetLabel(const char * text) { label = text; } 37 | 38 | void Zero(); 39 | void Set(double k); 40 | void Set(FloatVector & v) { Copy(v); }; 41 | void SetMultiple(double k, FloatVector & v); 42 | 43 | void Negate(); 44 | void Add(double n); 45 | void Multiply(double k); 46 | 47 | double InnerProduct(FloatVector & v); 48 | void Copy(const FloatVector & v); 49 | void Add(FloatVector & v); 50 | void AddMultiple(double k, FloatVector & v); 51 | void Subtract(FloatVector & v); 52 | 53 | void Product(Matrix & m, FloatVector & v); 54 | 55 | float & operator [] (int n) 56 | { assert(n < dim); return data[n]; } 57 | float operator [] (int n) const 58 | { assert(n < dim); return data[n]; } 59 | 60 | float operator [] (double fraction) 61 | { return data[(int) (dim * fraction)]; } 62 | float & operator [] (double fraction) const 63 | { return data[(int) (dim * fraction)]; } 64 | 65 | FloatVector & operator = (const FloatVector & v); 66 | bool operator == (const FloatVector & v) const; 67 | bool operator != (const FloatVector & v) const { return !(*this == v); } 68 | 69 | void Swap(int i, int j) 70 | { double swap = data[i]; data[i] = data[j]; data[j] = swap; } 71 | void Swap(FloatVector & rhs); 72 | 73 | FloatVector & operator *= (double rhs) { Multiply(rhs); return *this; } 74 | FloatVector & operator += (double rhs) { Add(rhs); return *this; } 75 | FloatVector & operator -= (double rhs) { return *this += -rhs; } 76 | FloatVector & operator /= (double rhs) { return *this *= 1/rhs; } 77 | 78 | void DeleteDimension (int n); 79 | void Delete(int n) { DeleteDimension(n); } 80 | void Insert(int n, double value); 81 | 82 | // Calculates average and variance 83 | void AveVar(double & ave, double & var) const; 84 | double Average() const; 85 | double Var() const; 86 | 87 | // Common descriptive functions 88 | double Sum() const; 89 | double SumSquares() const; 90 | double Product() const; 91 | 92 | // Find extreme values 93 | double Min() const; 94 | double Max() const; 95 | 96 | // Return the number of elements in a subset 97 | int CountIfGreater(double treshold) const; 98 | int CountIfGreaterOrEqual(double treshold) const; 99 | 100 | // Append another vector to the end 101 | void Stack(const FloatVector & v); 102 | 103 | void Print(int maxDim = -1) { Print(stdout, maxDim); } 104 | void Print(FILE * output, int maxDim = -1); 105 | 106 | // Routines for creating and searching through sorted vectors 107 | void Sort(); 108 | void Reverse(); 109 | void Sort(FloatVector & freeRider); 110 | int BinarySearch(double element); 111 | 112 | // Remove consecutive duplicate elements from FloatVector 113 | void RemoveDuplicates(); 114 | 115 | // Query first and last elements 116 | // 117 | 118 | float & First() { return data[0]; } 119 | float & Last() { return data[dim - 1]; } 120 | 121 | // Routines for using a vector as a stack of doubles 122 | // 123 | 124 | void Clear() { dim = 0; } 125 | void Push(double value); 126 | double Pop() { return data[--dim]; } 127 | double Peek() const { return data[dim-1]; } 128 | 129 | // This routine adds items to a sorted list 130 | // 131 | 132 | void InsertInSortedList(int item); 133 | 134 | bool isAscending(); 135 | bool isDescending(); 136 | 137 | // Routines for dealing with vectors that include missing data 138 | // 139 | 140 | int SafeCount() const; 141 | double SafeMin() const; 142 | double SafeMax() const; 143 | 144 | private: 145 | static int CompareFloat(const float * a, const float * b); 146 | void Init(); 147 | }; 148 | 149 | #endif 150 | 151 | 152 | 153 | -------------------------------------------------------------------------------- /king/PedigreeAlleles.h: -------------------------------------------------------------------------------- 1 | ////////////////////////////////////////////////////////////////////// 2 | // libsrc/PedigreeAlleles.h 3 | // (c) 2000-2007 Goncalo Abecasis 4 | // 5 | // This file is distributed as part of the MERLIN source code package 6 | // and may not be redistributed in any form, without prior written 7 | // permission from the author. Permission is granted for you to 8 | // modify this file for your own personal use, but modified versions 9 | // must retain this copyright notice and must not be distributed. 10 | // 11 | // Permission is granted for you to use this file to compile MERLIN. 12 | // 13 | // All computer programs have bugs. Use this file at your own risk. 14 | // 15 | // Tuesday December 18, 2007 16 | // 17 | 18 | #ifndef __PEDALLELES_H__ 19 | #define __PEDALLELES_H__ 20 | 21 | #include "LongInt.h" 22 | 23 | class Alleles{ 24 | public: 25 | char geno; 26 | Alleles(){ geno = 0; } 27 | 28 | char operator [] (int i) 29 | { return (i == 1) ? (geno&15) : (geno>>4);} 30 | 31 | void AssignGenotype(int G1, int G2) 32 | {geno = char(G1 + (G2<<4));} 33 | 34 | void AssignGenotype(int G) 35 | {geno = char(G);} 36 | 37 | // is the genotype fully defined? 38 | bool isKnown(){ return geno != 0; } 39 | bool isHeterozygous() 40 | { return isKnown() && ((geno&15) != (geno>>4)); } 41 | bool isHomozygous() 42 | { return isKnown() && ((geno&15) == (geno>>4)); } 43 | bool hasAllele(int a) 44 | { return ((geno&15) == a) || ((geno>>4) == a); } 45 | 46 | // in a bi-allelic system (a, NOT a) 47 | bool isHeterozygousFor(int a){ return isHeterozygous() && hasAllele(a); } 48 | bool isHomozygousFor(int a){ return !(isHeterozygousFor(a)); } 49 | 50 | // how may alleles a in this genotype? 51 | int countAlleles(int a) 52 | { return (((geno&15) == a) ? 1 : 0) + (((geno>>4) == a) ? 1 : 0); } 53 | 54 | // what is the other allele, assuming genotype is (a, X) 55 | int otherAllele(int a) 56 | { return (((geno&15) == a) ? (geno>>4) : (geno&15)); } 57 | 58 | // are two unordered genotypes identical? 59 | int identicalTo(Alleles & al) 60 | { return (al.geno == geno) || 61 | ((al[2]>>4)+(al[1]<<4) == geno);} 62 | 63 | // how many alleles are identical by state 64 | int countIBS(Alleles & al) 65 | { return ((geno&15) == al[1]) ? 66 | (((geno>>4) == al[2]) ? 2 : 1) : 67 | ( ((geno&15) == al[2]) ? 68 | (((geno>>4) == al[1]) ? 2 : 1) : 69 | ((((geno>>4) == al[1]) || ((geno>>4) == al[2])) ? 1 : 0)); 70 | } 71 | 72 | int operator == (Alleles & rhs) { return identicalTo(rhs); } 73 | int operator != (Alleles & rhs) { return !identicalTo(rhs); } 74 | 75 | char Hi() 76 | { return (geno&15) > (geno>>4) ? (geno&15) : (geno>>4); } 77 | char Lo() 78 | { return (geno&15) > (geno>>4) ? (geno>>4) : (geno&15); } 79 | 80 | int SequenceCoded() 81 | { return isKnown() ? Hi() * (Hi() - 1) / 2 + Lo() : 0; } 82 | 83 | longint BinaryCoded() 84 | { 85 | if (isKnown()) 86 | { 87 | longint allele1(1); 88 | longint allele2(1); 89 | 90 | allele1 <<= (geno&15) - 1; 91 | allele2 <<= (geno>>4) - 1; 92 | 93 | return allele1 | allele2; 94 | } 95 | else 96 | return NOTZERO; 97 | } 98 | 99 | void Intersect(Alleles & gen) 100 | { 101 | char a1 = Lo(), a2 = Hi(); 102 | char b1 = gen.Lo(), b2 = gen.Hi(); 103 | 104 | if (a1 == b1 && a2 == b2) 105 | return; 106 | if (a1 == b1 || a1 == b2) 107 | geno = (a1<<4) + a1; 108 | else if (a2 == b1 || a2 == b2) 109 | geno = (a2<<4) + a2; 110 | else 111 | geno = 0; 112 | } 113 | 114 | void Intersect(char allele) 115 | { 116 | if ((geno&15) != allele && (geno>>4) != allele) 117 | geno = 0; 118 | else 119 | geno = (allele << 4) + allele; 120 | } 121 | 122 | bool AddAllele(char allele) 123 | { 124 | if ((geno&15) == allele || (geno>>4) == allele) 125 | return true; 126 | 127 | if ((geno&15) != 0 && (geno>>4) != 0) 128 | return false; 129 | 130 | if ((geno&15) == 0) geno |= allele; else geno |= (allele>>4); 131 | return true; 132 | } 133 | 134 | void Wipe() {geno=0;} 135 | }; 136 | 137 | #endif 138 | 139 | 140 | -------------------------------------------------------------------------------- /scripts/e04-filter-vars.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | use strict; 4 | 5 | my $chr = $ARGV[2]; 6 | 7 | my $milk = $ARGV[1]; #"fixed0/milk.filt/milk.chr$chr.merged.sites.vcf.gz"; 8 | my $svm = $ARGV[0]; #"analysis/filt/ld/frz9/svm.hm3ld.fmis10/frz9.milk_svm.hm3ld_fmis10.chr$chr.sites.vcf.gz"; 9 | my $outprefix = $ARGV[3]; #"fixed0/svm.ld.fmis/frz9.milk_svm.release.chr$chr.sites"; 10 | my $vcfsummary2 = "$ENV{'EXE_PREFIX'}/apigenome/bin/vcf-summary-v2"; 11 | my $ref = "resources/ref/hs38DH.fa"; 12 | my $dbsnp = "resources/ref/dbsnp_142.b38.vcf.gz"; 13 | my @posVcfs = qw(resources/ref/hapmap_3.3.b38.sites.vcf.gz resources/ref/1000G_omni2.5.b38.sites.PASS.vcf.gz); 14 | 15 | open(SVM,"zcat $svm|") || die "Cannot open file\n"; 16 | open(MILK,"zcat $milk | grep -v ^# |") || die "Cannot open file\n"; 17 | open(OUT1," | $ENV{'EXE_PREFIX'}/htslib/bgzip -c > $outprefix.vcf.gz") || die "Cannot open file\n"; 18 | open(OUT2, "| $vcfsummary2 --ref $ref --db $dbsnp --FNRvcf $posVcfs[0] --chr $chr --tabix $ENV{'EXE_PREFIX'}/htslib/tabix --bgzip $ENV{'EXE_PREFIX'}/htslib/bgzip > $outprefix.summary_v2") || die "Cannot open file\n"; 19 | while() { 20 | if ( /^#/ ) { 21 | next if ( /^##FILTER=\n"; 26 | print OUT1 "##INFO=\n"; 27 | print OUT1 "##INFO=\n"; 28 | print OUT1 "##INFO=\n"; 29 | print OUT1 "##FILTER=\n"; 30 | print OUT1 "##FILTER=\n"; 31 | print OUT1 "##FILTER=\n"; 32 | } 33 | } 34 | else { 35 | my @F = split(/[\t\r\n]/); 36 | my @M = split(/[\t\r\n]/,); 37 | die unless ( $F[1] == $M[1] ); 38 | 39 | #print STDERR "Processing $F[0]:$F[1]\n" if ( $. % 1000000 == 0 ); 40 | 41 | my ($dup,$trio) = ($1,$2) if ( $M[7] =~ /;DUP_CONC_THRES=([^;]+);.*;TRIO_CONC_THRES=([^;]+)/ ); 42 | my $fmis = $1 if ( $F[7] =~ /;FMIS10=([^;]+)/ ); 43 | my @dups = split(/,/,$dup); 44 | my @trios = split(/,/,$trio); 45 | 46 | my $duphet = $dups[4]; 47 | my $dupdisc = $dups[1]+$dups[2]+$dups[3]+$dups[5]+$dups[6]+$dups[7]; 48 | my $dupnhom = $duphet + $dupdisc; 49 | my $dupnref = $dupnhom + $dups[8]; 50 | my $dupall = $dupnref + $dups[0]; 51 | my $dupFilt = ( ($dupdisc > 1 ) && ( $dupdisc > 0.02*$dupnhom) ) ? 1 : 0; 52 | 53 | ## 0,1,2 : 0,0,0 C 0,0,1 D 0,0,2 D 54 | ## 3,4,5 : 0,1,0 A 0,1,1 A 0,1,2 D 55 | ## 6,7,8 : 0,2,0 D 0,2,1 C 0,2,2 D 56 | ## 9,10,11 : 1,0,0 A 1,0,1 A 1,0,2 D 57 | ## 12,13,14 : 1,1,0 A 1,1,1 A 1,1,2 A 58 | ## 15,16,17 : 1,2,0 D 1,2,1 A 1,2,2 A 59 | ## 18,19,20 : 2,0,0 D 2,0,1 C 2,0,2 D 60 | ## 21,22,23 : 2,1,0 D 2,1,1 A 2,1,2 A 61 | ## 24,25,26 : 2,2,0 D 2,2,1 D 2,2,2 C 62 | my @idxD = (1,2,5,6,8,11,15,18,20,21,24,25); 63 | my @idxC = (0,7,19,26); 64 | my ($trioconc,$triodisc) = (0,0); 65 | foreach my $i (@idxD) { $triodisc += $trios[$i]; } 66 | foreach my $i (@idxC) { $trioconc += $trios[$i]; } 67 | 68 | my $trioall = $trioconc + $triodisc; 69 | my $trionref = $trioall - $trios[0]; 70 | my $trionhom = $trionref - $trios[26]; 71 | my $triFilt = ( ($triodisc > 1 ) && ( $triodisc > 0.02*$trionhom) ) ? 1 : 0; 72 | my $misFilt = ( $fmis > 0.02 ) ? 1 : 0; 73 | 74 | my @filts = split(/;/,$F[6]); 75 | my @newfilts = (); 76 | foreach my $f (@filts) { 77 | push(@newfilts,$f) if ( ( $f ne "PASS" ) && ( $f ne "DISC" ) ); 78 | } 79 | push(@newfilts,"DUP2") if ( $dupFilt == 1 ); 80 | push(@newfilts,"TRI2") if ( $triFilt == 1 ); 81 | push(@newfilts,"MIS2") if ( $misFilt == 1 ); 82 | push(@newfilts,"PASS") if ( $#newfilts < 0 ); 83 | 84 | $F[7] =~ s/;SVM=/;DUP_NH_ALL=$dupnhom;DUP_NH_DIS=$dupdisc;TRI_NH_ALL=$trionhom;TRI_NH_DIS=$triodisc;SVM=/; 85 | print OUT1 join("\t",@F[0..5],join(";",@newfilts),$F[7])."\n"; 86 | print OUT2 join("\t",@F[0..5],join(";",@newfilts),$F[7])."\n"; 87 | } 88 | } 89 | close OUT1; 90 | close OUT2; 91 | close MILK; 92 | close SVM; 93 | 94 | print `$ENV{'EXE_PREFIX'}/htslib/tabix -f -pvcf $outprefix.vcf.gz`; 95 | -------------------------------------------------------------------------------- /king/OptimizerConstraints.cpp: -------------------------------------------------------------------------------- 1 | #include "OptimizerConstraints.h" 2 | 3 | #include 4 | 5 | #define CONSTRAIN_NONE 0 6 | #define CONSTRAIN_MIN 1 7 | #define CONSTRAIN_MAX 2 8 | #define CONSTRAIN_RANGE 3 9 | 10 | void OptimizerInterface::Dimension(int parameters) 11 | { 12 | point.Dimension(parameters); 13 | min.Dimension(parameters); 14 | max.Dimension(parameters); 15 | 16 | constraints.Dimension(parameters); 17 | constraints.Zero(); 18 | } 19 | 20 | void OptimizerInterface::SetMin(int parameter, double value) 21 | { 22 | constraints[parameter] |= CONSTRAIN_MIN; 23 | min[parameter] = value; 24 | } 25 | 26 | void OptimizerInterface::SetMax(int parameter, double value) 27 | { 28 | constraints[parameter] |= CONSTRAIN_MAX; 29 | max[parameter] = value; 30 | } 31 | 32 | void OptimizerInterface::SetRange(int parameter, double MIN, double MAX) 33 | { 34 | constraints[parameter] = CONSTRAIN_RANGE; 35 | min[parameter] = MIN; 36 | max[parameter] = MAX; 37 | } 38 | 39 | void OptimizerInterface::Fix(int parameter, double value) 40 | { 41 | constraints[parameter] = CONSTRAIN_RANGE; 42 | min[parameter] = max[parameter] = value; 43 | } 44 | 45 | void OptimizerInterface::ClearConstraints(int parameter) 46 | { 47 | constraints[parameter] = CONSTRAIN_NONE; 48 | } 49 | 50 | void OptimizerInterface::ClearConstraints() 51 | { 52 | constraints.Zero(); 53 | } 54 | 55 | void OptimizerInterface::SetObjectiveFunction(ObjectiveFunction & function) 56 | { 57 | f = &function; 58 | } 59 | 60 | double OptimizerInterface::Evaluate(Vector & vector) 61 | { 62 | Translate(vector, point); 63 | 64 | return f->Evaluate(point); 65 | } 66 | 67 | void OptimizerInterface::Translate(Vector & unconstrained, Vector & constrained) 68 | { 69 | constrained.Dimension(constraints.Length()); 70 | 71 | for (int i = 0, j = 0; i < constraints.Length(); i++) 72 | switch (constraints[i]) 73 | { 74 | case CONSTRAIN_NONE : 75 | constrained[i] = unconstrained[j++]; 76 | break; 77 | case CONSTRAIN_MIN : 78 | constrained[i] = min[i] + exp(unconstrained[j++]); 79 | break; 80 | case CONSTRAIN_MAX : 81 | constrained[i] = max[i] - exp(unconstrained[j++]); 82 | break; 83 | case CONSTRAIN_RANGE : 84 | if (min[i] == max[i]) 85 | constrained[i] = min[i]; 86 | else 87 | { 88 | double x = unconstrained[j++]; 89 | 90 | if (x >= 36) 91 | constrained[i] = max[i]; 92 | else 93 | constrained[i] = min[i] + (max[i] - min[i]) * exp(x) / (1 + exp(x)); 94 | } 95 | } 96 | } 97 | 98 | void OptimizerInterface::BackTranslate(Vector & constrained, Vector & unconstrained) 99 | { 100 | unconstrained.Dimension(constraints.Length()); 101 | 102 | int j = 0; 103 | for (int i = 0; i < constraints.Length(); i++) 104 | switch (constraints[i]) 105 | { 106 | case CONSTRAIN_NONE : 107 | unconstrained[j++] = constrained[i]; 108 | break; 109 | case CONSTRAIN_MIN : 110 | assert(constrained[i] >= min[i]); 111 | unconstrained[j++] = log(constrained[i] + min[i] + 1e-16); 112 | break; 113 | case CONSTRAIN_MAX : 114 | assert(constrained[i] <= max[i]); 115 | unconstrained[j++] = exp(max[i] - constrained[i] + 1e-16); 116 | break; 117 | case CONSTRAIN_RANGE : 118 | if (min[i] == max[i]) 119 | assert(constrained[i] == min[i]); 120 | else 121 | { 122 | assert(constrained[i] >= min[i]); 123 | assert(constrained[i] <= max[i]); 124 | 125 | double x = (constrained[i] - min[i]) / (max[i] - min[i]); 126 | 127 | if (x >= 0.999999999) x = 0.999999999; 128 | if (x <= 1e-16) x = 1e-16; 129 | 130 | unconstrained[j++] = log(x/(1-x)); 131 | } 132 | } 133 | 134 | unconstrained.Dimension(j); 135 | } 136 | 137 | int OptimizerInterface::CountParameters() 138 | { 139 | return constraints.Length(); 140 | } 141 | 142 | int OptimizerInterface::CountFreeParameters() 143 | { 144 | int parameters = constraints.Length(); 145 | 146 | for (int i = 0; i < constraints.Length(); i++) 147 | if (constraints[i] == CONSTRAIN_RANGE && min[i] == max[i]) 148 | parameters--; 149 | 150 | return parameters; 151 | } 152 | -------------------------------------------------------------------------------- /king/BrentC.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "analysis.h" 5 | 6 | /// Brent's method for minimizing a 1d function 7 | // Machine eps 8 | double Engine::MACHEPS = (double)2.2204460492503131e-016; 9 | double Engine::MACHEPS_SQRT = sqrt(MACHEPS); 10 | double Engine::cbrent = ((double)3.0 - sqrt((double)5.0)) / (double)2.0; 11 | 12 | /// Minimize the function over the interval [a, b] 13 | /// Function to minimize 14 | /// Left side of the bracket 15 | /// Right side of the bracket 16 | /// Stopping tolerance 17 | /// Function evaluated at the minimum 18 | /// number of function evaluations 19 | /// maximum number of function evaluations allowed 20 | /// print out function evaluations? 21 | /// Point that minimizes the function 22 | /// This implements the algorithm from Brent's book, "Algorithms for Minimization without Derivatives" 23 | //double BrentC::minimize(BrentFunctor &f, double a, double b, double eps, double &funcx, size_t &numiter, size_t maxIter, bool quiet) 24 | double Engine::minimize(double a, double b, double eps, double &funcx, int &numiter, int maxIter, bool quiet) 25 | { 26 | if (a >= b){ 27 | printf("Exception: a must be < b"); 28 | throw(1); 29 | } 30 | 31 | double x = a + cbrent * (b - a); 32 | double v = x; 33 | double w = x; 34 | double e = 0; 35 | 36 | double fx = fLL(x); 37 | double fv = fx; 38 | double fw = fx; 39 | 40 | numiter = 0; 41 | 42 | while (true) 43 | { 44 | double m = (double)0.5 * (a + b); 45 | double tol = MACHEPS_SQRT * abs(x) + eps; 46 | double tol2 = (double)2 * tol; 47 | 48 | // Check the stopping criterion 49 | if (abs(x - m) <= tol2 - 0.5 * (b - a)){ break; } 50 | 51 | // Stop if we've exceeded the maximum number of iterations 52 | numiter++; 53 | if (numiter > maxIter){ 54 | printf("Exception: Exceeded maximum number of iterations."); 55 | throw(2); 56 | } 57 | double p = 0.0, q = 0.0, r = 0.0; 58 | double d = 0.0; 59 | double u = 0.0; 60 | 61 | if (abs(e) > tol) 62 | { 63 | // Fit parabola 64 | r = (x - w) * (fx - fv); 65 | q = (x - v) * (fx - fw); 66 | p = (x - v) * q - (x - w)*r; 67 | q = (double)2.0 * (q - r); 68 | if (q > (double)0.0) 69 | p = -p; 70 | else 71 | q = -q; 72 | r = e; 73 | e = d; 74 | } 75 | 76 | if ((abs(p) < abs((double)0.5*q*r)) && (p < q*(a-x)) && (p < q*(b-x))) 77 | { 78 | // Parabolic interpolation step 79 | d = p / q; 80 | u = x + d; 81 | // f must not be evaluated too close to a or b 82 | if (u - a < tol2 || b - u < tol2) 83 | d = (x < m) ? tol : -tol; 84 | } 85 | else 86 | { 87 | // Golden section step 88 | e = (x < m) ? b - x : a - x; 89 | d = cbrent * e; 90 | } 91 | 92 | // f must not be evaluated too close to x 93 | if (abs(d) >= tol) 94 | u = x + d; 95 | else if (d > 0.0) 96 | u = x + tol; 97 | else 98 | u = x - tol; 99 | double fu = fLL(u); 100 | 101 | // Update 102 | if (fu <= fx) 103 | { 104 | if (u < x) 105 | b = x; 106 | else 107 | a = x; 108 | v = w; fv = fw; 109 | w = x; fw = fx; 110 | x = u; fx = fu; 111 | } 112 | else 113 | { 114 | if (u < x) 115 | a = u; 116 | else 117 | b = u; 118 | 119 | if (fu <= fw || w == x) 120 | { 121 | v = w; fv = fw; 122 | w = u; fw = fu; 123 | } 124 | else if (fu <= fv || v == x || v == w) 125 | { 126 | v = u; fv = fu; 127 | } 128 | } 129 | 130 | if ( !quiet ){ 131 | printf("Iteration %d, min_x = %.4lf, f(min_x) = %.4lf\n", numiter, x, fx); 132 | 133 | /* 134 | #if defined( _MSC_VER ) // Windows/VC uses a %Iu specifier for size_t 135 | const char *szFmt1 = "Iteration %Iu, min_x = %f, f(min_x) = %f, "; 136 | #else // Linux/g++ uses a %zu specifier for size_t 137 | const char *szFmt1 = "Iteration %zu, min_x = %f, f(min_x) = %f, "; 138 | #endif 139 | printf( szFmt1, numiter, x, fx ); 140 | */ 141 | } 142 | } 143 | funcx = fLL(x); 144 | return x; 145 | } 146 | 147 | -------------------------------------------------------------------------------- /king/MathGenMin.h: -------------------------------------------------------------------------------- 1 | #ifndef __MATHPOWELL_H__ 2 | #define __MATHPOWELL_H__ 3 | 4 | #include "MathGold.h" 5 | #include "MathVector.h" 6 | #include "MathMatrix.h" 7 | #include "Random.h" 8 | 9 | // Multidimensional minimization of a continuous function 10 | // starting with a user supplied starting point and 11 | // direction vector 12 | // 13 | class GeneralMinimizer 14 | { 15 | public: 16 | VectorFunc * func; // Function to be minimized 17 | Matrix directions; 18 | Vector point; 19 | double fmin; 20 | 21 | // Setup matrices assuming ndim point 22 | virtual void Reset(int ndim, double scale = 1.0); 23 | 24 | // Find a minimum using direction set and starting point 25 | virtual double Minimize(double ftol = TOL) = 0; 26 | 27 | GeneralMinimizer(); 28 | virtual ~GeneralMinimizer() { } 29 | 30 | double f(Vector & v) 31 | { return func->Evaluate(v); } 32 | 33 | void df(Vector & v, Vector & d, double scale = 1.0) 34 | { func->Derivative(v, d, scale); } 35 | }; 36 | 37 | // Powell's conjugate direction method 38 | // After each round, the direction of largest decrease is replaces 39 | // its biggest component among the original directions 40 | // 41 | 42 | class PowellMinimizer : public GeneralMinimizer 43 | { 44 | public: 45 | int iter; 46 | 47 | virtual ~PowellMinimizer() { } 48 | virtual double Minimize(double ftol = TOL); 49 | }; 50 | 51 | 52 | // Simulated annealing using simplex method of Nelder and Mead 53 | // 54 | class SAMinimizer : public GeneralMinimizer 55 | { 56 | public: 57 | int iter; 58 | bool freeRand; 59 | Random * rand; 60 | 61 | Vector y; // evaluation of entropy at y 62 | Matrix simplex; // volume in n dimensions (n+1) points 63 | 64 | SAMinimizer(); 65 | SAMinimizer(Random & rand); 66 | 67 | virtual ~SAMinimizer(); 68 | 69 | virtual void Reset(int ndim, double scale = 1.0); 70 | 71 | // Lowers temperature T from maxT to minT in Tcycles linear decay cycles 72 | // Titer iterations at each temperature 73 | virtual double Minimize(double ftol = TOL); 74 | double MinimizeLoop(double ftol = TOL); 75 | 76 | double T, maxT, minT; // Temperature 77 | int Tcycles, Titer; // Cycling parameters 78 | 79 | private: 80 | Vector psum; 81 | Vector ptry; 82 | double yhi; 83 | 84 | void Constructor(); 85 | double Amoeba(int ihi, double factor); 86 | }; 87 | 88 | // Multidimensional minimization of a continuous function by 89 | // the down-hill simplex method of Nelder and Mead 90 | // (Computer Journal 1965) 91 | // 92 | class AmoebaMinimizer : public GeneralMinimizer 93 | { 94 | public: 95 | Matrix simplex; 96 | long cycleCount, cycleMax; // number of function evaluations 97 | 98 | AmoebaMinimizer(); 99 | virtual ~AmoebaMinimizer() { } 100 | 101 | virtual void Reset(int dimensions, double scale = 1.0); 102 | virtual double Minimize(double ftol = TOL); 103 | 104 | private: 105 | Vector psum, ptry, y; 106 | 107 | double Amoeba(int ihi, double factor); 108 | }; 109 | 110 | // Differential Evolution minimizer 111 | // A stochastic minimizer based on the algorithm of Storn and Price, 1996 112 | 113 | class EvolutionaryMinimizer : public GeneralMinimizer 114 | { 115 | public: 116 | Matrix points; 117 | Vector y; 118 | 119 | double crossover; // This is the CR parameter of Storn and Price 120 | double step_size; // This is the L parameter of Storn and Price 121 | int multiples; // The NP paraemter of Storn and Price will be dimensions * multiple 122 | 123 | Random * rand; 124 | 125 | bool generate_random_points; 126 | 127 | int generations; 128 | int max_generations; 129 | 130 | EvolutionaryMinimizer(); 131 | EvolutionaryMinimizer(Random & randomSeries); 132 | 133 | ~EvolutionaryMinimizer() { } 134 | 135 | virtual void Reset(int dimensions, double scale = 1.0); 136 | virtual double Minimize(double ftol = TOL); 137 | 138 | private: 139 | void Init(Random & randomSeries); 140 | }; 141 | 142 | // Conjugate gradient minimizer 143 | // Polak-Ribiere improvement on Fletcher-Reeves algorithm for 144 | // multidimensional minimization. 145 | // 146 | 147 | class FletcherMinimizer : public GeneralMinimizer 148 | { 149 | public: 150 | int iter; 151 | 152 | FletcherMinimizer() { } 153 | 154 | virtual void Reset(int dimensions, double scale = 1.0); 155 | virtual double Minimize(double ftol = TOL); 156 | 157 | private: 158 | Vector g, h; 159 | }; 160 | 161 | #endif 162 | 163 | 164 | -------------------------------------------------------------------------------- /king/MathNormal.h: -------------------------------------------------------------------------------- 1 | #ifndef __NORMALEQUATIONS_H__ 2 | #define __NORMALEQUATIONS_H__ 3 | 4 | #include "IntArray.h" 5 | #include "MathMatrix.h" 6 | #include "MathVector.h" 7 | #include "MathCholesky.h" 8 | 9 | #define NORMAL_AMOEBA_MIN 0 10 | #define NORMAL_POWELL_MIN 1 11 | #define NORMAL_FLETCHER_MIN 2 12 | 13 | class NormalEquations 14 | { 15 | public: 16 | Vector means, variances; 17 | 18 | Matrix * varComponents; 19 | Matrix linearModel; 20 | Vector scores; 21 | 22 | double likelihood; 23 | 24 | NormalEquations(); 25 | virtual ~NormalEquations(); 26 | 27 | void Dimension(int vcCount); 28 | 29 | virtual void Prepare(); 30 | virtual void SetParameters(Vector & means, Vector & variances); 31 | virtual double Evaluate(); 32 | 33 | Cholesky cholesky; 34 | Matrix varMatrix; 35 | Vector residuals; 36 | double constant; 37 | bool includeLikelihoodConstant; 38 | int multiple; 39 | 40 | bool operator == (const NormalEquations & rhs); 41 | 42 | void EnableConstant(); 43 | void DisableConstant(); 44 | 45 | // Diagnostic statistics 46 | // see JL Hopper and JD Matthews Ann Hum Genet (1992) 46:373 - 383 47 | double rawQ; // This is a chi-square with n degrees of freedom 48 | double Q; // This is Q1 and has a standard normal distribution 49 | Vector Qi; // Each Qi is approximately chi-square with 1 df 50 | void Diagnostics(); 51 | 52 | protected: 53 | void Free(); 54 | 55 | void CalculateResiduals(); 56 | void CalculateCovariances(); 57 | 58 | bool meanChange, varChange, init; 59 | IntArray meanFlags; 60 | }; 61 | 62 | class NormalSet 63 | { 64 | public: 65 | NormalEquations ** sets; 66 | Vector weights; 67 | IntArray operators; 68 | 69 | double precision; 70 | int numericMinimizer; 71 | int size; 72 | int count; 73 | int maxThreads; 74 | double likelihood; 75 | Vector variances, means; 76 | 77 | // Number of function evaluations 78 | int evaluations; 79 | 80 | NormalSet(int threads = 0); 81 | 82 | virtual ~NormalSet() { Free(); } 83 | 84 | void Dimension(int setCount, int vcCount, int vcDerived = 0); 85 | double Evaluate(); 86 | void SelectPoint(Vector & v); 87 | void Solve(); 88 | int CountObservations(); 89 | virtual int CountParameters(); 90 | 91 | NormalEquations & operator [] (int n) 92 | { return *(sets[n]); } 93 | 94 | void EnableConstant(); 95 | void DisableConstant(); 96 | 97 | // Vector for storing intermediate likelihoods 98 | Vector recordedLikelihoods; 99 | 100 | // This function should be over-ridden to calculate constrained variance 101 | // components appropriately 102 | virtual void CalculateConstrainedVariances(); 103 | 104 | protected: 105 | // for multi-threading 106 | static void * EvaluateOneSet(void * which); 107 | 108 | // house-keeping 109 | void Free(); 110 | virtual void AllocateSets(); 111 | 112 | // Helpers for solver 113 | void EditLinearDegenerates(); 114 | virtual void GetStartingPoint(Vector & startPoint); 115 | void RemoveRedundancy(); 116 | 117 | // Intermediate results when calculating likelihoods 118 | Vector logLikelihoods; 119 | 120 | // How many variance components should be estimated? 121 | int vcEstimated; 122 | 123 | // And how many variance components are constrained by the other parameters? 124 | int vcConstrained; 125 | }; 126 | 127 | class NonLinearNormalSet : public NormalSet 128 | { 129 | public: 130 | virtual void CalculateConstrainedVariances(); 131 | 132 | IntArray nonLinearVariances; 133 | IntArray component1, component2; 134 | }; 135 | 136 | class NormalSolver : public VectorFunc 137 | { 138 | public: 139 | NormalSet * normal; 140 | 141 | NormalSolver(NormalSet * n) : VectorFunc() 142 | { normal = n; } 143 | 144 | virtual ~NormalSolver() { } 145 | 146 | virtual double Evaluate(Vector & point); 147 | }; 148 | 149 | // Constants for setting elements of operations array 150 | // Which tell normal set class how to combine partial likelihoods 151 | // 152 | 153 | #define NORMAL_OP_MASK 7 154 | #define NORMAL_NOP 0 155 | #define NORMAL_MUL_LK 1 156 | #define NORMAL_SCALE_LLK 2 157 | #define NORMAL_SUM_LK 3 158 | #define NORMAL_DIV_LK 4 159 | #define NORMAL_POP 5 160 | #define NORMAL_RECORD_LLK 6 161 | 162 | #define NORMAL_OP(a,b,c,d) ((a) | ((b) << 3) | ((c) << 6) | ((d) << 9)) 163 | #define NORMAL_LAST_OP(a) ((a) << 12) 164 | 165 | #endif 166 | 167 | 168 | --------------------------------------------------------------------------------