├── CONVERTF ├── example.pheno ├── example.bed ├── example.eigenstratgeno ├── example.packedancestrymap ├── example.packedancestrymapgeno ├── example.pedind ├── example.ind ├── example.ind.packedancestrymap ├── par.EIGENSTRAT.PED ├── par.ANCESTRYMAP.EIGENSTRAT ├── par.PACKEDANCESTRYMAP.ANCESTRYMAP ├── example.map ├── par.PED.PACKEDPED ├── example.ped ├── par.PED.EIGENSTRAT ├── example.pedsnp ├── par.PACKEDPED.PACKEDANCESTRYMAP ├── example.perl ├── example.snp ├── example.snp.packedancestrymap ├── ind2pheno.perl └── example.ancestrymapgeno ├── POPGEN ├── HGDP.X.perl ├── example.geno ├── grmjunk.id ├── lsqproject.pdf ├── example.plot.pdf ├── example.eval ├── example.pca ├── example.ind ├── twexample.perl ├── example.plot2.xtxt ├── example.plot.xtxt ├── grmjunk ├── par.example ├── example.evec ├── example.snp ├── example.perl ├── example.log ├── twexample.eval ├── smartpca.info └── twtable ├── EIGENSTRAT ├── example.pheno ├── example.geno ├── example.plot.pdf ├── example.eval ├── example.pca ├── example.chisq ├── example.QTL.chisq ├── example.ind ├── example.QTL.chisq.GC ├── example.chisq.GC ├── example.QTL.ind ├── example.log ├── example.chisq.par ├── example.QTL.chisq.par ├── example.plot.xtxt ├── example.pca.par ├── example.pca.evec ├── example.snp ├── example.oldstyle.perl ├── example.perl └── example.QTL.perl ├── src ├── .gitignore ├── eigensrc │ ├── .gitignore │ ├── pcatoy.c │ ├── smartsubs.c │ ├── exclude.c │ ├── twstats.c │ ├── eigsubs.c │ └── eigx.c ├── ksrc │ ├── Makefile │ ├── kjg_fpca.c │ └── kjg_gsl.c ├── nicksrc │ ├── Makefile │ ├── gauss.c │ ├── qqq.c │ ├── LICENSE.txt │ ├── xsearch.c │ ├── twtable.c │ └── sortit.c ├── gval.h ├── README ├── LICENSE.txt ├── h2d.c ├── Makefile ├── egsubs.c ├── qmakef ├── gval.c ├── twsubs.c ├── smarttables │ └── twtable └── baseprog.c ├── include ├── globals.h ├── packit.h ├── smartsubs.h ├── nicklib.h ├── gval.h ├── egsubs.h ├── eigsubs.h ├── xsearch.h ├── badpairs.h ├── exclude.h ├── workqueue.h ├── ldsubs.h ├── sortit.h ├── getpars.h ├── regsubs.h ├── mcmcpars.h ├── xpsubs.h ├── not-thread-h ├── kjg_fpca.h ├── linsubs.h ├── ranmath.h ├── kjg_gsl.h ├── strsubs.h ├── statsubs.h ├── twtable.h ├── admutils.h └── mcio.h ├── bin ├── .gitignore ├── evec2pca.perl ├── gc.perl ├── smarteigenstrat.perl ├── evec2pca-ped.perl ├── smartpca.perl └── ploteig ├── .gitignore ├── LICENSE.txt └── README /CONVERTF/example.pheno: -------------------------------------------------------------------------------- 1 | 11000 2 | -------------------------------------------------------------------------------- /POPGEN/HGDP.X.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin 2 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.pheno: -------------------------------------------------------------------------------- 1 | 11000 2 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | baseprog 2 | convertf 3 | mergeit 4 | pca 5 | smshrink 6 | -------------------------------------------------------------------------------- /CONVERTF/example.bed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nevrome/EIG/master/CONVERTF/example.bed -------------------------------------------------------------------------------- /EIGENSTRAT/example.geno: -------------------------------------------------------------------------------- 1 | 11100 2 | 01212 3 | 21101 4 | 00122 5 | 21100 6 | 00111 7 | 22110 8 | -------------------------------------------------------------------------------- /POPGEN/example.geno: -------------------------------------------------------------------------------- 1 | 11100 2 | 01212 3 | 21101 4 | 00122 5 | 21100 6 | 00111 7 | 22110 8 | -------------------------------------------------------------------------------- /POPGEN/grmjunk.id: -------------------------------------------------------------------------------- 1 | NA SAMPLE0 2 | NA SAMPLE1 3 | NA SAMPLE2 4 | NA SAMPLE3 5 | NA SAMPLE4 6 | -------------------------------------------------------------------------------- /POPGEN/lsqproject.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nevrome/EIG/master/POPGEN/lsqproject.pdf -------------------------------------------------------------------------------- /POPGEN/example.plot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nevrome/EIG/master/POPGEN/example.plot.pdf -------------------------------------------------------------------------------- /CONVERTF/example.eigenstratgeno: -------------------------------------------------------------------------------- 1 | 11100 2 | 01212 3 | 21101 4 | 00122 5 | 21100 6 | 00111 7 | 22110 8 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.plot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nevrome/EIG/master/EIGENSTRAT/example.plot.pdf -------------------------------------------------------------------------------- /EIGENSTRAT/example.eval: -------------------------------------------------------------------------------- 1 | 3.145365 2 | 0.479177 3 | 0.279889 4 | 0.095570 5 | -0.000000 6 | -------------------------------------------------------------------------------- /POPGEN/example.eval: -------------------------------------------------------------------------------- 1 | 3.145365 2 | 0.479177 3 | 0.279889 4 | 0.095570 5 | -0.000000 6 | -------------------------------------------------------------------------------- /CONVERTF/example.packedancestrymap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nevrome/EIG/master/CONVERTF/example.packedancestrymap -------------------------------------------------------------------------------- /CONVERTF/example.packedancestrymapgeno: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nevrome/EIG/master/CONVERTF/example.packedancestrymapgeno -------------------------------------------------------------------------------- /src/eigensrc/.gitignore: -------------------------------------------------------------------------------- 1 | eigenstrat 2 | eigenstratQTL 3 | pcatoy 4 | smarteigenstrat 5 | smartpca 6 | smartrel 7 | twstats 8 | -------------------------------------------------------------------------------- /POPGEN/example.pca: -------------------------------------------------------------------------------- 1 | 2 2 | 3.1450 3 | 0.4790 4 | 0.6502 0.0875 5 | 0.3589 0.0804 6 | -0.0781 -0.6017 7 | -0.3982 0.7316 8 | -0.5328 -0.2977 9 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.pca: -------------------------------------------------------------------------------- 1 | 2 2 | 3.1450 3 | 0.4790 4 | 0.6502 0.0875 5 | 0.3589 0.0804 6 | -0.0781 -0.6017 7 | -0.3982 0.7316 8 | -0.5328 -0.2977 9 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.chisq: -------------------------------------------------------------------------------- 1 | Chisq EIGENSTRAT 2 | 2.2222 0.9615 3 | 2.9167 0.2219 4 | 2.0833 0.0612 5 | 4.1667 0.1258 6 | 2.9167 2.0997 7 | 5.0000 3.0000 8 | 3.8095 0.0764 9 | -------------------------------------------------------------------------------- /include/globals.h: -------------------------------------------------------------------------------- 1 | #ifndef _GLOBALS_ 2 | #define _GLOBALS_ 3 | 4 | int numchrom = 22; 5 | int fancynorm = YES, verbose = NO, plotmode = NO, outnum = -1; 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /include/packit.h: -------------------------------------------------------------------------------- 1 | int packmode = NO; 2 | unsigned char *packgenos = NULL; 3 | long packlen = 0; 4 | long rlen = -1; 5 | int rdismode = NO; 6 | unsigned char *packepath; 7 | -------------------------------------------------------------------------------- /include/smartsubs.h: -------------------------------------------------------------------------------- 1 | void setoutliermode (int mode); 2 | int 3 | ridoutlier (double *evecs, int n, int neigs, 4 | double thresh, int *badlist, OUTLINFO ** outinfo); 5 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.QTL.chisq: -------------------------------------------------------------------------------- 1 | Chisq EIGENSTRAT 2 | 2.2222 0.9615 3 | 2.9167 0.2219 4 | 2.0833 0.0612 5 | 4.1667 0.1258 6 | 2.9167 2.0997 7 | 5.0000 3.0000 8 | 3.8095 0.0764 9 | -------------------------------------------------------------------------------- /CONVERTF/example.pedind: -------------------------------------------------------------------------------- 1 | 1 SAMPLE0 0 0 2 2 2 | 2 SAMPLE1 0 0 1 2 3 | 3 SAMPLE2 0 0 2 1 4 | 4 SAMPLE3 0 0 1 1 5 | 5 SAMPLE4 0 0 2 1 6 | -------------------------------------------------------------------------------- /bin/.gitignore: -------------------------------------------------------------------------------- 1 | baseprog 2 | convertf 3 | eigenstrat 4 | eigenstratQTL 5 | mergeit 6 | pca 7 | pcatoy 8 | smarteigenstrat 9 | smartpca 10 | smartrel 11 | smshrink 12 | twstats 13 | -------------------------------------------------------------------------------- /include/nicklib.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | -------------------------------------------------------------------------------- /CONVERTF/example.ind: -------------------------------------------------------------------------------- 1 | SAMPLE0 F Case 2 | SAMPLE1 M Case 3 | SAMPLE2 F Control 4 | SAMPLE3 M Control 5 | SAMPLE4 F Control 6 | -------------------------------------------------------------------------------- /POPGEN/example.ind: -------------------------------------------------------------------------------- 1 | SAMPLE0 F Case 2 | SAMPLE1 M Case 3 | SAMPLE2 F Control 4 | SAMPLE3 M Control 5 | SAMPLE4 F Control 6 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.ind: -------------------------------------------------------------------------------- 1 | SAMPLE0 F Case 2 | SAMPLE1 M Case 3 | SAMPLE2 F Control 4 | SAMPLE3 M Control 5 | SAMPLE4 F Control 6 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.QTL.chisq.GC: -------------------------------------------------------------------------------- 1 | Chisq EIGENSTRAT 2 | lambda=6.396 lambda=1.000 3 | 0.3475 0.9615 4 | 0.4560 0.2219 5 | 0.3257 0.0612 6 | 0.6515 0.1258 7 | 0.4560 2.0997 8 | 0.7818 3.0000 9 | 0.5956 0.0764 10 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.chisq.GC: -------------------------------------------------------------------------------- 1 | Chisq EIGENSTRAT 2 | lambda=6.396 lambda=1.000 3 | 0.3475 0.9615 4 | 0.4560 0.2219 5 | 0.3257 0.0612 6 | 0.6515 0.1258 7 | 0.4560 2.0997 8 | 0.7818 3.0000 9 | 0.5956 0.0764 10 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.QTL.ind: -------------------------------------------------------------------------------- 1 | SAMPLE0 F 0.999999 2 | SAMPLE1 M 1.000001 3 | SAMPLE2 F -0.000001 4 | SAMPLE3 M 0.000001 5 | SAMPLE4 F -0.000001 6 | -------------------------------------------------------------------------------- /CONVERTF/example.ind.packedancestrymap: -------------------------------------------------------------------------------- 1 | SAMPLE0 F Case 2 | SAMPLE1 M Case 3 | SAMPLE2 F Control 4 | SAMPLE3 M Control 5 | SAMPLE4 F Control 6 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.log: -------------------------------------------------------------------------------- 1 | parameter file: example.chisq.par 2 | genotypename: example.geno 3 | snpname: example.snp 4 | indivname: example.ind 5 | pcaname: example.pca 6 | outputname: example.chisq 7 | numpc: 1 8 | qtmode: NO 9 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.chisq.par: -------------------------------------------------------------------------------- 1 | genotypename: example.geno 2 | snpname: example.snp 3 | indivname: example.ind 4 | pcaname: example.pca 5 | outputname: example.chisq 6 | numpc: 1 7 | qtmode: NO 8 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.QTL.chisq.par: -------------------------------------------------------------------------------- 1 | genotypename: example.geno 2 | snpname: example.snp 3 | indivname: example.QTL.ind 4 | pcaname: example.pca 5 | outputname: example.QTL.chisq 6 | numpc: 1 7 | qtmode: YES 8 | -------------------------------------------------------------------------------- /POPGEN/twexample.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | $command = "../bin/twstats"; 4 | $command .= " -t twtable "; 5 | $command .= " -i twexample.eval "; 6 | $command .= " -o twexample.out"; 7 | print("$command\n"); 8 | system("$command"); 9 | 10 | -------------------------------------------------------------------------------- /CONVERTF/par.EIGENSTRAT.PED: -------------------------------------------------------------------------------- 1 | genotypename: example.eigenstratgeno 2 | snpname: example.snp 3 | indivname: example.ind 4 | outputformat: PED 5 | genotypeoutname: example.ped 6 | snpoutname: example.pedsnp 7 | indivoutname: example.pedind 8 | -------------------------------------------------------------------------------- /include/gval.h: -------------------------------------------------------------------------------- 1 | void setgval (SNP ** xsnps, int nrows, Indiv ** indivmarkers, int numindivs, 2 | int *xindex, int *xtypes, int ncols); 3 | void unsetgval (); 4 | int getgval (int row, int col, double *val); 5 | int getggval (int indindx, int col, double *val); 6 | -------------------------------------------------------------------------------- /POPGEN/example.plot2.xtxt: -------------------------------------------------------------------------------- 1 | set terminal postscript color 2 | set title "" 3 | set xlabel "eigenvector 1" 4 | set ylabel "eigenvector 2" 5 | plot "example.evec.1:2:Case" using 2:3 title "Case" , \ 6 | "example.evec.1:2:Control" using 2:3 title "Control" 7 | ## pause 9999 8 | -------------------------------------------------------------------------------- /CONVERTF/par.ANCESTRYMAP.EIGENSTRAT: -------------------------------------------------------------------------------- 1 | genotypename: example.ancestrymapgeno 2 | snpname: example.snp 3 | indivname: example.ind 4 | outputformat: EIGENSTRAT 5 | genotypeoutname: example.eigenstratgeno 6 | snpoutname: example.snp 7 | indivoutname: example.ind 8 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.plot.xtxt: -------------------------------------------------------------------------------- 1 | set terminal postscript color 2 | set title "" 3 | set xlabel "eigenvector 1" 4 | set ylabel "eigenvector 2" 5 | plot "example.pca.evec.1:2:Case" using 2:3 title "Case" , \ 6 | "example.pca.evec.1:2:Control" using 2:3 title "Control" 7 | ## pause 9999 8 | -------------------------------------------------------------------------------- /CONVERTF/par.PACKEDANCESTRYMAP.ANCESTRYMAP: -------------------------------------------------------------------------------- 1 | genotypename: example.packedancestrymapgeno 2 | snpname: example.snp 3 | indivname: example.ind 4 | outputformat: ANCESTRYMAP 5 | genotypeoutname: example.ancestrymapgeno 6 | snpoutname: example.snp 7 | indivoutname: example.ind 8 | -------------------------------------------------------------------------------- /POPGEN/example.plot.xtxt: -------------------------------------------------------------------------------- 1 | set terminal postscript color 2 | set title "" 3 | set key outside 4 | set xlabel "eigenvector 1" 5 | set ylabel "eigenvector 2" 6 | plot "example.evec.1:2:Case" using 2:3 title "Case" , \ 7 | "example.evec.1:2:Control" using 2:3 title "Control" 8 | ## pause 9999 9 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.pca.par: -------------------------------------------------------------------------------- 1 | genotypename: example.geno 2 | snpname: example.snp 3 | indivname: example.ind 4 | evecoutname: example.pca.evec 5 | evaloutname: example.eval 6 | altnormstyle: NO 7 | numoutevec: 2 8 | numoutlieriter: 5 9 | numoutlierevec: 2 10 | outliersigmathresh: 6.0 11 | qtmode: 0 12 | -------------------------------------------------------------------------------- /POPGEN/grmjunk: -------------------------------------------------------------------------------- 1 | 1 1 7 1.789380 2 | 2 1 7 0.789216 3 | 2 2 7 0.670580 4 | 3 1 7 -0.286950 5 | 3 2 7 -0.108997 6 | 3 3 7 0.317350 7 | 4 1 7 -0.993409 8 | 4 2 7 -0.527105 9 | 4 3 7 -0.108997 10 | 4 4 7 0.958931 11 | 5 1 7 -1.298237 12 | 5 2 7 -0.823694 13 | 5 3 7 0.187593 14 | 5 4 7 0.670580 15 | 5 5 7 1.263758 16 | -------------------------------------------------------------------------------- /POPGEN/par.example: -------------------------------------------------------------------------------- 1 | genotypename: ../CONVERTF/example.ped 2 | snpname: ../CONVERTF/example.map 3 | indivname: ../CONVERTF/example.ped 4 | evecoutname: example.evec 5 | evaloutname: example.eval 6 | altnormstyle: NO 7 | numoutevec: 2 8 | familynames: NO 9 | grmoutname: grmjunk 10 | -------------------------------------------------------------------------------- /src/ksrc/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS += -I../../include -I/opt/openblas/include 2 | 3 | ifeq ($(DEBUG), 1) 4 | CFLAGS += -g # enable debugging 5 | endif 6 | 7 | ifeq ($(PROFILING), 1) 8 | CFLAGS += -pg # enable profiling 9 | endif 10 | 11 | .PHONY: all clean 12 | 13 | all: kjg_gsl.o kjg_fpca.o 14 | 15 | clean: 16 | rm *.o 17 | -------------------------------------------------------------------------------- /CONVERTF/example.map: -------------------------------------------------------------------------------- 1 | 11 rs0000 0.000000 0 2 | 11 rs1111 0.001000 100000 3 | 11 rs2222 0.002000 200000 4 | 11 rs3333 0.003000 300000 5 | 11 rs4444 0.004000 400000 6 | 11 rs5555 0.005000 500000 7 | 11 rs6666 0.006000 600000 8 | -------------------------------------------------------------------------------- /CONVERTF/par.PED.PACKEDPED: -------------------------------------------------------------------------------- 1 | genotypename: example.ped 2 | snpname: example.pedsnp # or example.map, either works 3 | indivname: example.pedind # or example.ped, either works 4 | outputformat: PACKEDPED 5 | genotypeoutname: example.bed 6 | snpoutname: example.pedsnp 7 | indivoutname: example.pedind 8 | familynames: NO 9 | -------------------------------------------------------------------------------- /CONVERTF/example.ped: -------------------------------------------------------------------------------- 1 | 1 SAMPLE0 0 0 2 2 1 2 3 3 1 1 1 1 3 3 1 1 3 3 2 | 2 SAMPLE1 0 0 1 2 1 2 1 3 1 4 1 1 1 3 1 1 3 3 3 | 3 SAMPLE2 0 0 2 1 1 2 1 1 1 4 1 2 1 3 1 4 3 4 4 | 4 SAMPLE3 0 0 1 1 2 2 1 3 4 4 2 2 1 1 1 4 3 4 5 | 5 SAMPLE4 0 0 2 1 2 2 1 1 1 4 2 2 1 1 1 4 4 4 6 | -------------------------------------------------------------------------------- /CONVERTF/par.PED.EIGENSTRAT: -------------------------------------------------------------------------------- 1 | genotypename: example.ped 2 | snpname: example.pedsnp # or example.map, either works 3 | indivname: example.pedind # or example.ped, either works 4 | outputformat: EIGENSTRAT 5 | genotypeoutname: example.eigenstratgeno 6 | snpoutname: example.snp 7 | indivoutname: example.ind 8 | familynames: NO 9 | -------------------------------------------------------------------------------- /CONVERTF/example.pedsnp: -------------------------------------------------------------------------------- 1 | 11 rs0000 0.000000 0 A C 2 | 11 rs1111 0.001000 100000 A G 3 | 11 rs2222 0.002000 200000 A T 4 | 11 rs3333 0.003000 300000 C A 5 | 11 rs4444 0.004000 400000 G A 6 | 11 rs5555 0.005000 500000 T A 7 | 11 rs6666 0.006000 600000 G T 8 | -------------------------------------------------------------------------------- /CONVERTF/par.PACKEDPED.PACKEDANCESTRYMAP: -------------------------------------------------------------------------------- 1 | genotypename: example.bed 2 | snpname: example.pedsnp # or example.map, either works 3 | indivname: example.pedind # or example.ped, either works 4 | outputformat: PACKEDANCESTRYMAP 5 | genotypeoutname: example.packedancestrymapgeno 6 | snpoutname: example.snp 7 | indivoutname: example.ind 8 | familynames: NO 9 | -------------------------------------------------------------------------------- /POPGEN/example.evec: -------------------------------------------------------------------------------- 1 | #eigvals: 3.145 0.479 2 | SAMPLE0 0.6502 0.0875 Case 3 | SAMPLE1 0.3589 0.0804 Case 4 | SAMPLE2 -0.0781 -0.6017 Control 5 | SAMPLE3 -0.3982 0.7316 Control 6 | SAMPLE4 -0.5328 -0.2977 Control 7 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.pca.evec: -------------------------------------------------------------------------------- 1 | #eigvals: 3.145 0.479 2 | SAMPLE0 0.6502 0.0875 Case 3 | SAMPLE1 0.3589 0.0804 Case 4 | SAMPLE2 -0.0781 -0.6017 Control 5 | SAMPLE3 -0.3982 0.7316 Control 6 | SAMPLE4 -0.5328 -0.2977 Control 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Object files 2 | *.o 3 | *.ko 4 | *.obj 5 | *.elf 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Libraries 12 | *.lib 13 | *.a 14 | *.la 15 | *.lo 16 | 17 | # Shared objects (inc. Windows DLLs) 18 | *.dll 19 | *.so 20 | *.so.* 21 | *.dylib 22 | 23 | # Executables 24 | *.exe 25 | *.out 26 | *.app 27 | *.i*86 28 | *.x86_64 29 | *.hex 30 | 31 | # Debug files 32 | *.dSYM/ 33 | -------------------------------------------------------------------------------- /CONVERTF/example.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | # $parfile = "par.ANCESTRYMAP.EIGENSTRAT"; 4 | $parfile = "par.EIGENSTRAT.PED"; 5 | # $parfile = "par.PED.EIGENSTRAT"; 6 | # $parfile = "par.PED.PACKEDPED"; 7 | # $parfile = "par.PACKEDPED.PACKEDANCESTRYMAP"; 8 | # $parfile = "par.PACKEDANCESTRYMAP.ANCESTRYMAP"; 9 | 10 | system("../bin/convertf -p $parfile"); 11 | -------------------------------------------------------------------------------- /POPGEN/example.snp: -------------------------------------------------------------------------------- 1 | rs0000 11 0.000000 0 A C 2 | rs1111 11 0.001000 100000 A G 3 | rs2222 11 0.002000 200000 A T 4 | rs3333 11 0.003000 300000 C A 5 | rs4444 11 0.004000 400000 G A 6 | rs5555 11 0.005000 500000 T A 7 | rs6666 11 0.006000 600000 G T 8 | -------------------------------------------------------------------------------- /CONVERTF/example.snp: -------------------------------------------------------------------------------- 1 | rs0000 11 0.000000 0 A C 2 | rs1111 11 0.001000 100000 A G 3 | rs2222 11 0.002000 200000 A T 4 | rs3333 11 0.003000 300000 C A 5 | rs4444 11 0.004000 400000 G A 6 | rs5555 11 0.005000 500000 T A 7 | rs6666 11 0.006000 600000 G T 8 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.snp: -------------------------------------------------------------------------------- 1 | rs0000 11 0.000000 0 A C 2 | rs1111 11 0.001000 100000 A G 3 | rs2222 11 0.002000 200000 A T 4 | rs3333 11 0.003000 300000 C A 5 | rs4444 11 0.004000 400000 G A 6 | rs5555 11 0.005000 500000 T A 7 | rs6666 11 0.006000 600000 G T 8 | -------------------------------------------------------------------------------- /include/egsubs.h: -------------------------------------------------------------------------------- 1 | #include "admutils.h" 2 | 3 | 4 | int makeeglist (char **eglist, int maxnumeg, Indiv ** indivmarkers, 5 | int numindivs); 6 | int mkeglist (Indiv ** indm, int numindivs, char **eglist); 7 | void seteglist (Indiv ** indm, int nindiv, char *eglistname); 8 | void seteglistv (Indiv ** indm, int nindiv, char *eglistname, int val); 9 | int loadlist (char **list, char *listname); 10 | int loadlist_type (char **list, char *listname, int *ztypes, int off); 11 | -------------------------------------------------------------------------------- /CONVERTF/example.snp.packedancestrymap: -------------------------------------------------------------------------------- 1 | rs0000 11 0.000000 0 A C 2 | rs1111 11 0.001000 100000 A G 3 | rs2222 11 0.002000 200000 A T 4 | rs3333 11 0.003000 300000 C A 5 | rs4444 11 0.004000 400000 G A 6 | rs5555 11 0.005000 500000 T A 7 | rs6666 11 0.006000 600000 G T 8 | -------------------------------------------------------------------------------- /include/eigsubs.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | void eigvals (double *mat, double *evals, int n); 7 | void eigvecs (double *mat, double *evals, double *evecs, int n); 8 | void eigb (double *lam, double *a, double *b, int n); 9 | void eigc (double *lam, double *a, double *b, int n); 10 | double twestxx (double *lam, int m, double *pzn, double *pzvar); 11 | 12 | typedef struct 13 | { 14 | int vecno; 15 | double score; 16 | } OUTLINFO;; 17 | -------------------------------------------------------------------------------- /CONVERTF/ind2pheno.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | $in = $ARGV[0]; # .ind file 4 | $out = $ARGV[1]; # .pheno file 5 | 6 | open(IN,$in) || die("COF"); 7 | open(OUT,">$out") || die("COF"); 8 | 9 | while($line = ) 10 | { 11 | if($line =~ /Case/) { print OUT ("1"); $case=1; } 12 | elsif($line =~ /Control/) { print OUT ("0"); $control=1; } 13 | else { print OUT ("9"); } 14 | } 15 | print OUT ("\n"); 16 | unless($case) { print("WARNING: no cases\n"); } 17 | unless($control) { print("WARNING: no controls\n"); } 18 | 19 | -------------------------------------------------------------------------------- /src/nicksrc/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS = -I../../include -D_GNU_SOURCE 2 | 3 | ifeq ($(DEBUG), 1) 4 | CFLAGS += -g # enable debugging 5 | endif 6 | 7 | ifeq ($(PROFILING), 1) 8 | CFLAGS += -pg # enable profiling 9 | endif 10 | 11 | NLIB = libnick.a 12 | NLIBOBJS = gauss.o gds.o getpars.o linsubs.o sortit.o statsubs.o strsubs.o vsubs.o xsearch.o twtable.o 13 | 14 | .PHONY: all clean 15 | 16 | all: $(NLIB) 17 | 18 | clean: 19 | rm *.a *.o 20 | 21 | # ----- build nicksrc/libnick.a 22 | $(NLIB): $(NLIBOBJS) 23 | ar -r $@ $^ 24 | -------------------------------------------------------------------------------- /src/gval.h: -------------------------------------------------------------------------------- 1 | void setgval (SNP ** xsnps, int nrows, Indiv ** indivmarkers, int numindivs, 2 | int *xindex, int *xtypes, int ncols); 3 | void unsetgval (); 4 | int getgval (int row, int col, double *val); 5 | int getggval (int indindx, int col, double *val); 6 | 7 | void set_ind_mask (); 8 | 9 | size_t get_nrows (); 10 | size_t get_ncols (); 11 | 12 | void kjg_geno_get_normalized_row (const size_t snp_index, double *y); 13 | size_t kjg_geno_get_normalized_rows (const size_t i, const size_t r, 14 | double *Y); 15 | -------------------------------------------------------------------------------- /include/xsearch.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int xfindit(char *ss) ; 4 | int xloadsearchx(char **ss, int n) ; 5 | int finddup(char **ss, int n) ; 6 | void xloadsearch(char **ss, int n) ; 7 | void xdestroy() ; 8 | 9 | void xhcreate (int n) ; 10 | void xhdestroy() ; 11 | ENTRY *xhsearch(ENTRY item, ACTION act) ; 12 | 13 | int xlookup(char *key, ACTION act) ; 14 | int xhash (char *key) ; 15 | int xhash1(int ww) ; 16 | int xhash2 (int x) ; 17 | int xcshift(int x, int shft) ; 18 | int stringhash(char *key) ; 19 | 20 | -------------------------------------------------------------------------------- /include/badpairs.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "admutils.h" 3 | 4 | void dobadpairs (char *badpairsname, SNP ** snpm, int numsnps); 5 | void dogood (char *goodsnpname, SNP ** snpm, int numsnps); 6 | void getsnpsc (char *snpscname, SNP ** snpm, int numsnps); 7 | int killsnps (Indiv ** indivmarkers, SNP ** snpmarkers, int numsnps, 8 | int mincasenum); 9 | void loadbadpsc (SNP ** snpm, int numsnps, int rmode, char *gname); 10 | 11 | double entrop (double *a, int n); 12 | double xxlog2 (double t); 13 | double mutx (double *dd); 14 | double mutxx (double *dd, int m, int n); 15 | -------------------------------------------------------------------------------- /POPGEN/example.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | $ENV{'PATH'} = "../bin:$ENV{'PATH'}"; 4 | 5 | $command = "smartpca"; 6 | $command .= " -p par.example >example.log"; 7 | print("$command\n"); 8 | system("$command"); 9 | 10 | $command = "ploteig"; 11 | $command .= " -i example.evec "; 12 | $command .= " -c 1:2 "; 13 | $command .= " -p Case:Control "; 14 | $command .= " -x "; 15 | $command .= " -o example.plot.xtxt "; # must end in .xtxt 16 | print("$command\n"); 17 | system("$command"); 18 | 19 | $command = "evec2pca.perl 2 example.evec example.ind example.pca"; 20 | print("$command\n"); 21 | system("$command"); 22 | -------------------------------------------------------------------------------- /include/exclude.h: -------------------------------------------------------------------------------- 1 | #ifndef _EXCLUDE_ 2 | #define _EXCLUDE_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | /* file name parameter : xregionname */ 9 | /* HW filter parameter : nhwfilter (-1 means no-filter) */ 10 | /* maximum number of regions : 1000 11 | closed intervals in physical position include endpoints */ 12 | /* read file and set ignore flag for SNPs */ 13 | void excluderegions (char *xregionname, SNP ** snps, int nsnps, 14 | char *deletesnpoutname); 15 | void hwfilter (SNP ** snps, int nsnps, int nindiv, double nhwfilter, 16 | char *deletesnpoutname); 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /src/README: -------------------------------------------------------------------------------- 1 | Partial list of Authors of eigensoft 2 | 3 | Nick Patterson 4 | Alkes Price 5 | Sam Pollack 6 | Sasha Gusev 7 | Chris Chang 8 | Kevin Galinsky 9 | 10 | ======================================================== 11 | fastmode: YES 12 | optional parameters 13 | fastdim: (default 2 * numeigs) 14 | fastiter: (default numeigs) 15 | 16 | In fastmode 17 | A .evec and .eval file wiil be produced on request but: 18 | 19 | NO outlier removal 20 | NO Tracy-Widom significance testing 21 | NO lsqproject mode 22 | NO F_st. (Use fstonly: YES) if this is wanted. 23 | 24 | If fastmode and easymode set, NO projection 25 | 26 | -------------------------------------------------------------------------------- /src/nicksrc/gauss.c: -------------------------------------------------------------------------------- 1 | #include "ranmath.h" 2 | 3 | double 4 | gauss () 5 | { 6 | 7 | /** 8 | Numer alg. in C pp 289 ff 9 | */ 10 | 11 | static int iset = 0; 12 | static double gset; 13 | double v1, v2, rsq, fac; 14 | 15 | if (iset == 1) { 16 | iset = 0; 17 | return gset; 18 | } 19 | 20 | do { 21 | v1 = 2.0 * DRAND2 () - 1.0; 22 | v2 = 2.0 * DRAND2 () - 1.0; 23 | rsq = v1 * v1 + v2 * v2; 24 | } while (rsq >= 1.0 || rsq == 0.0); 25 | 26 | fac = sqrt (-2.0 * log (rsq) / rsq); 27 | gset = v1 * fac; 28 | iset = 1; 29 | return v2 * fac; 30 | 31 | } 32 | 33 | void 34 | gaussa (double *a, int n) 35 | { 36 | int i; 37 | for (i = 0; i < n; i++) 38 | a[i] = gauss (); 39 | 40 | } 41 | -------------------------------------------------------------------------------- /include/workqueue.h: -------------------------------------------------------------------------------- 1 | #ifndef WORKQUEUE_H 2 | #define WORKQUEUE_H 3 | 4 | #ifdef HAVE_PTHREAD 5 | #include 6 | #endif 7 | 8 | typedef struct work_task_t 9 | { 10 | #ifdef HAVE_PTHREAD 11 | pthread_t thread; 12 | #endif 13 | void *(*start_routine) (void *); 14 | void *argument; 15 | } work_task; 16 | 17 | typedef struct work_queue_t 18 | { 19 | // Which tasks are pending? 20 | work_task *tasks; 21 | int num_tasks; 22 | } work_queue; 23 | 24 | void create_work_queue (work_queue ** the_queue); 25 | void destroy_work_queue (work_queue ** the_queue); 26 | void queue_task (work_queue * queue, const work_task * task); 27 | void wait_for_queue_to_complete (const work_queue * queue); 28 | 29 | #endif // WORKQUEUE_H 30 | -------------------------------------------------------------------------------- /include/ldsubs.h: -------------------------------------------------------------------------------- 1 | 2 | typedef struct 3 | { 4 | double S0; 5 | double S1; 6 | double S2; 7 | double S11; 8 | double S12; 9 | double S22; 10 | double m1; 11 | double m2; 12 | double v11; 13 | double v12; 14 | double v22; 15 | double corr; 16 | double Z; 17 | } CORR; 18 | 19 | int calccorr (CORR * corrpt, int mode, int ztrans); 20 | void printcorr (CORR * corrpt); 21 | void clearcorr (CORR * corrpt); 22 | void addcorr (CORR * corrpt, double x1, double x2); 23 | void addcorrn (CORR * corrpt, double x1, double x2, double yn); 24 | void minuscorr (CORR * out, CORR * c1, CORR * c2); 25 | 26 | double lddip (double *xc); 27 | double zdip (double *xc); 28 | void setzdipmode (int mode); 29 | void setzdphasedmode (int mode); 30 | -------------------------------------------------------------------------------- /include/sortit.h: -------------------------------------------------------------------------------- 1 | 2 | void sortit(double *a, int *ind, int len) ; 3 | double median (double *a, int n) ; 4 | int compit (int *a1, int *a2) ; 5 | void isortit(int *a, int *ind, int len) ; 6 | void lsortit(long *a, int *ind, int len) ; 7 | int icompit (int *a1, int *a2) ; 8 | int lcompit (int *a1, int *a2) ; 9 | void invperm(int *a, int *b, int n) ; 10 | int ipcompit (int *a1, int *a2) ; 11 | int comparr(double *a, double *b, int len) ; 12 | int compiarr(int *a, int *b, int len) ; 13 | void ipsortit(int **a, int *ind, int len, int rlen) ; 14 | void ipsortitp(int **a, int *ind, int len, int rlen, int *pp) ; 15 | void setorder (int *pp, int rlen) ; 16 | void mkirank(int *rank, int *xin, int n) ; 17 | void mkrank(int *rank, double *xin, int n) ; 18 | 19 | -------------------------------------------------------------------------------- /include/getpars.h: -------------------------------------------------------------------------------- 1 | typedef struct { 2 | int numpars ; 3 | FILE *fx ; 4 | char **ppars ; 5 | char **pdata ; 6 | } phandle ; 7 | 8 | void writepars(phandle *pp) ; 9 | void closepars(phandle *pp) ; 10 | phandle *openpars(char *fname) ; 11 | 12 | int getstring(phandle *pp, char *parname, char **kret) ; 13 | int getint(phandle *pp, char *parname, int *kret) ; 14 | int getints(phandle *pp, char *parname, int *aint, int nint) ; 15 | int getintss(phandle *pp, char *parname, int *aint, int *xint) ; 16 | 17 | int getdbl(phandle *pp, char *parname, double *dbl) ; 18 | int getdbls(phandle *pp, char *parname, double *dbl, int ndbl) ; 19 | int getdblss(phandle *pp, char *parname, double *dbl, int *ndbl) ; 20 | int subst(char *outstr, char *instr, char *ins, char *outs) ; 21 | void dostrsub(phandle *pp) ; 22 | int upstring (char *ss) ; 23 | void subcolon(char *ss) ; 24 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.oldstyle.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | $ENV{'PATH'} = "../bin:$ENV{'PATH'}"; 4 | # MUST put pca bin directory in path for smartpca.perl to work 5 | 6 | $command = "pca"; 7 | $command .= " -i example.geno "; 8 | $command .= " -o example.pca "; 9 | $command .= " -e example.eval "; 10 | $command .= " -l example.log "; 11 | $command .= " -k 2 "; 12 | $command .= " -m 5 "; 13 | $command .= " -t 2 "; 14 | $command .= " -s 6.0 "; 15 | print("$command\n"); 16 | system("$command"); 17 | 18 | $command = "eigenstrat"; # or eigenstrat.big.perl for large data sets 19 | $command .= " -i example.geno "; 20 | $command .= " -j example.pheno "; 21 | $command .= " -p example.pca "; 22 | $command .= " -l 1 "; 23 | $command .= " -o example.chisq "; 24 | print("$command\n"); 25 | system("$command"); 26 | 27 | $command = "gc.perl example.chisq example.chisq.GC"; 28 | print("$command\n"); 29 | system("$command"); 30 | -------------------------------------------------------------------------------- /include/regsubs.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | double regressit (double *ans, double *eq, double *rhs, int m, int n); 8 | void regressitall (char **vname, double *eq, double *rhs, int m, int n); 9 | void add1 (int *a, int *b, int n); 10 | 11 | void ptoz (double *p, double *z, int n); 12 | void ztop (double *p, double *z, int n); 13 | double logregressit (double *ans, double *eq, double **rhs, int neq, int nv); 14 | double logrscore (double *eq, double **rhs, int neq, int nv); 15 | 16 | void calcgh (double *grad, double *hess, double *eq, double *z, 17 | double *n0, double *n1, int neq, int nv); 18 | 19 | double zlike (double *eq, double *n0, double *n1, 20 | double *ans, int neq, int nv); 21 | 22 | void squish (double *xmat, double *mat, int nrow, int oldc, int newc); 23 | 24 | void 25 | calcres (double *res, double *ans, double *eq, double *rhs, int neq, int nv); 26 | -------------------------------------------------------------------------------- /include/mcmcpars.h: -------------------------------------------------------------------------------- 1 | 2 | double thp1 = 1.0, thp2 = 5.0; /* params for theta */ 3 | double thxp1 = 1.0, thxp2 = 10.0; /* params for theta X */ 4 | double thxp0 = 40; /* cross term */ 5 | /* -1 is default value (=0 logically) */ 6 | 7 | double lp1 = 10.2, lp2 = 2; 8 | double lxp1 = 10.2, lxp2 = 2; 9 | 10 | double priorlmean = 6; 11 | double priorlmsig = 5; 12 | /* hyperprior on mean, s.dev for gamma prior on lambda */ 13 | 14 | double qtrbase = 0.0; 15 | 16 | double loclip = -20.0; 17 | double hiclip = 15.0; 18 | 19 | double a1 = 2, b1 = 8; 20 | double aa2 = 2, bb2 = 14, cc2 = 85; 21 | double p1 = 18, psi1 = 3; 22 | /* for toys */ 23 | 24 | double muval = 0.0, tmumean = 0.2, muval1; 25 | /* for bridge sampler */ 26 | 27 | int pubxindiv = -1; 28 | int alkesmode = NO; 29 | int malexhet = NO; 30 | int familynames = YES; 31 | 32 | int decim = 0, dmindis = 200000, dmaxdis = 500000; // decimation parameters 33 | int hashcheck = YES; 34 | int outputall = NO; 35 | int sevencolumnped = NO; 36 | 37 | FILE *fstdetails = NULL; 38 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | $ENV{'PATH'} = "../bin:$ENV{'PATH'}"; 4 | # MUST put smartpca bin directory in path for smartpca.perl to work 5 | 6 | $command = "smartpca.perl"; 7 | $command .= " -i example.geno "; 8 | $command .= " -a example.snp "; 9 | $command .= " -b example.ind " ; 10 | $command .= " -k 2 "; 11 | $command .= " -o example.pca "; 12 | $command .= " -p example.plot "; 13 | $command .= " -e example.eval "; 14 | $command .= " -l example.log "; 15 | $command .= " -m 5 "; 16 | $command .= " -t 2 "; 17 | $command .= " -s 6.0 "; 18 | print("$command\n"); 19 | system("$command"); 20 | 21 | $command = "smarteigenstrat.perl "; 22 | $command .= " -i example.geno "; 23 | $command .= " -a example.snp "; 24 | $command .= " -b example.ind "; 25 | $command .= " -p example.pca "; 26 | $command .= " -k 1 "; 27 | $command .= " -o example.chisq "; 28 | $command .= " -l example.log "; 29 | print("$command\n"); 30 | system("$command"); 31 | 32 | $command = "gc.perl example.chisq example.chisq.GC"; 33 | print("$command\n"); 34 | system("$command"); 35 | -------------------------------------------------------------------------------- /src/nicksrc/qqq.c: -------------------------------------------------------------------------------- 1 | long expmod(long a, long b, long n) 2 | { 3 | int t ; 4 | long ax=1, bx, z, z2 ; 5 | t = b % 2 ; 6 | if (t==1) ax = a ; 7 | bx = b/2; 8 | if (bx == 0) return ax % n ; 9 | z = expmod(a, bx, n) ; 10 | z2 = (z*z) % n ; 11 | z2 = (ax*z2) % n ; 12 | 13 | return z2 ; 14 | 15 | } 16 | 17 | long 18 | nextprime (long num) 19 | // return nextprime >= num 20 | { 21 | long x, q; 22 | int t; 23 | 24 | for (x = num;; ++x) { 25 | q = expmod(2, x-1, x) ; 26 | if (q != 1 ) continue ; 27 | t = isprime (x); 28 | if (t == YES) 29 | return x; 30 | } 31 | } 32 | 33 | int 34 | isprime (long num) 35 | // naive algorithm. Implement Pollard rho at some time 36 | { 37 | int top, x, t; 38 | 39 | if (num < 2) 40 | return NO; 41 | if (num == 2) 42 | return YES; 43 | top = nnint (sqrt (num)); 44 | 45 | t = num % 2 ; 46 | if (t==0) return NO ; 47 | 48 | for (x = 3; x <= top; x += 2) { 49 | t = num % x; 50 | if (t == 0) 51 | return NO; 52 | } 53 | 54 | return YES; 55 | 56 | } 57 | 58 | 59 | -------------------------------------------------------------------------------- /include/xpsubs.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "admutils.h" 4 | 5 | extern int verbose; 6 | 7 | double xpest (double **gg, int *gobs, int *na, int *nb, 8 | int neq, double *ppa, double *ppb); 9 | 10 | void mk2from3ml (double *xd, double *xc, double p, double pp); 11 | void mk2from2 (double *xd, double *xc); 12 | 13 | void loadpprob (double *pprob, double pa, double pb); 14 | void gen3 (double *ww, double a, double b); 15 | 16 | double xpest2like (double **gg, int *gobs, int *na, int *nb, 17 | int *iscasearr, 18 | int neq, double ppa, double ppb, double risk); 19 | 20 | 21 | // clean up SANS when finalized 22 | 23 | typedef struct 24 | { 25 | SNP *cupt; 26 | int numsamps; 27 | double admbayessc; 28 | double *baymodelsc; 29 | double admfsc; 30 | double admzscore; 31 | double admbsc; 32 | double admfccsc; 33 | double simpsc[2]; 34 | double admyl[3]; 35 | double lrmax; 36 | double lrsig; 37 | double maxlod; 38 | /* now start of fine-mapping scores */ 39 | double gscore; 40 | double gcheck; 41 | double gbayes; 42 | } SANS; 43 | -------------------------------------------------------------------------------- /include/not-thread-h: -------------------------------------------------------------------------------- 1 | #ifndef _THREAD_ 2 | #define _THREAD_ 3 | 4 | #include 5 | 6 | // globals defined in smartpca.c 7 | extern int ldregress; 8 | extern int minallelecnt; 9 | extern int maxmissing; 10 | extern int numthreads; 11 | extern double ldlimit; 12 | 13 | extern pthread_mutex_t mutex_xtx; 14 | extern pthread_mutex_t mutex_nkill; 15 | 16 | 17 | typedef struct thread_args { 18 | 19 | int mythreadnum; 20 | 21 | SNP **xsnplist; 22 | double *xmean; 23 | double *xfancy; 24 | int *xindex; 25 | int *xtypes; 26 | 27 | int numindivs; 28 | int nrows; 29 | int ncols; 30 | int blocksize; 31 | int weightmode; 32 | 33 | int nkill; // out 34 | int nused; // out 35 | 36 | double *XTX; 37 | 38 | } thread_args_t; 39 | 40 | 41 | void *thread_function(void *args); 42 | 43 | thread_args_t *pack_args(int mythreadnum, SNP **xsnplist, double *xmean, double *xfancy, int *xindex, int *xtypes, double *XTX, 44 | int numindivs, int nrows, int ncols, int blocksize, int weightmode); 45 | 46 | void unpack_args(thread_args_t *ta, int *nkill, int *nused); 47 | 48 | #endif 49 | 50 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.QTL.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | $ENV{'PATH'} = "../bin:$ENV{'PATH'}"; 4 | # MUST put smartpca bin directory in path for smartpca.perl to work 5 | 6 | $command = "smartpca.perl"; 7 | $command .= " -i example.geno "; 8 | $command .= " -a example.snp "; 9 | $command .= " -b example.QTL.ind " ; 10 | $command .= " -k 2 "; 11 | $command .= " -o example.pca "; 12 | $command .= " -p example.plot "; 13 | $command .= " -e example.eval "; 14 | $command .= " -l example.log "; 15 | $command .= " -m 5 "; 16 | $command .= " -t 2 "; 17 | $command .= " -s 6.0 "; 18 | $command .= " -q YES "; 19 | print("$command\n"); 20 | system("$command"); 21 | 22 | $command = "smarteigenstrat.perl "; 23 | $command .= " -i example.geno "; 24 | $command .= " -a example.snp "; 25 | $command .= " -b example.QTL.ind "; 26 | $command .= " -p example.pca "; 27 | $command .= " -k 1 "; 28 | $command .= " -o example.QTL.chisq "; 29 | $command .= " -l example.log "; 30 | $command .= " -q YES "; 31 | print("$command\n"); 32 | system("$command"); 33 | 34 | $command = "gc.perl example.QTL.chisq example.QTL.chisq.GC"; 35 | print("$command\n"); 36 | system("$command"); 37 | -------------------------------------------------------------------------------- /src/eigensrc/pcatoy.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "eigsubs.h" 5 | #include 6 | 7 | int 8 | main () 9 | { 10 | int NSAMPLES, n, k; 11 | double *eval, *evec, *XTX; 12 | 13 | NSAMPLES = 2; 14 | 15 | /* malloc */ 16 | if ((eval = (double *) malloc (NSAMPLES * sizeof (*eval))) == NULL) { 17 | fprintf (stderr, "CM\n"); 18 | exit (1); 19 | } 20 | if ((evec = 21 | (double *) malloc (NSAMPLES * NSAMPLES * sizeof (*evec))) == NULL) { 22 | fprintf (stderr, "CM\n"); 23 | exit (1); 24 | } 25 | if ((XTX = (double *) malloc (NSAMPLES * NSAMPLES * sizeof (*XTX))) == NULL) { 26 | fprintf (stderr, "CM\n"); 27 | exit (1); 28 | } 29 | 30 | XTX[0] = 1; 31 | XTX[1] = 0; 32 | XTX[2] = 0; 33 | XTX[3] = 1; /* 2x2 identity matrix */ 34 | 35 | eigvecs (XTX, eval, evec, NSAMPLES); /* eigenvector k is evec[k*NSAMPLES+n] */ 36 | 37 | /* print eval and evec */ 38 | printf ("The eigenvectors of the 2x2 identity matrix are:\n"); 39 | for (n = 0; n < NSAMPLES; n++) { 40 | for (k = 0; k < NSAMPLES; k++) { 41 | printf (" "); 42 | printf ("%.02f", evec[k * NSAMPLES + n]); 43 | } 44 | printf ("\n"); 45 | } 46 | return 0; 47 | } 48 | -------------------------------------------------------------------------------- /include/kjg_fpca.h: -------------------------------------------------------------------------------- 1 | /** @file kjg_fpca.h 2 | * @brief Runs fastPCA. 3 | * This module also has methods to multiply a genotype matrix against the GSL 4 | * matrices. 5 | */ 6 | 7 | #ifndef KJG_FPCA_H_ 8 | #define KJG_FPCA_H_ 9 | 10 | #include 11 | 12 | extern size_t KJG_FPCA_ROWS; // number of rows to process at once 13 | 14 | /** Performs a fast PCA 15 | * @param *eval eigenvalues 16 | * @param *evec eigenvectors 17 | * @param K number of eigenvalues/vectors 18 | * @param L width of projection matrix 19 | * @param I iterations to do exponentiation 20 | */ 21 | 22 | void kjg_fpca (size_t K, size_t L, size_t I, double *eval, double *evec); 23 | 24 | /** Multiplies B=X*A1 and A2 = XT*B = XT*X*A1 25 | * @param *A1 some matrix 26 | * @param *B intermediate matrix 27 | * @param *A2 next matrix 28 | */ 29 | 30 | void kjg_fpca_XTXA (const gsl_matrix * A1, gsl_matrix * B, gsl_matrix * A2); 31 | 32 | /** Multiplies B = X*A 33 | * @param *A some matrix 34 | * @param *B another matrix 35 | */ 36 | 37 | void kjg_fpca_XA (const gsl_matrix * A, gsl_matrix * B); 38 | 39 | /** Multiplies A = XT*B 40 | * @param *B some matrix 41 | * @param *A another matrix 42 | */ 43 | 44 | void kjg_fpca_XTB (const gsl_matrix * B, gsl_matrix * A); 45 | 46 | #endif /* KJG_FPCA_H_ */ 47 | -------------------------------------------------------------------------------- /src/nicksrc/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009-2016, Broad Institute, Inc. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * 8 | Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | 12 | * 13 | Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | 18 | * 19 | Neither the name Broad Institute, Inc. nor the names of its 20 | contributors may be used to endorse or promote products derived from 21 | this software without specific prior written permission. 22 | 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 24 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 27 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 29 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 32 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE 33 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2006-2016, Broad Institute, Inc. and Harvard Medical School 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * 8 | Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | 12 | * 13 | Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | 18 | * 19 | Neither the name Broad Institute, Inc. Harvard University, nor the names of its 20 | contributors may be used to endorse or promote products derived from 21 | this software without specific prior written permission. 22 | 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 24 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 27 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 29 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 32 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE 33 | -------------------------------------------------------------------------------- /src/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2006-2016, Broad Institute, Inc. and Harvard Medical School 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * 8 | Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | 12 | * 13 | Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | 18 | * 19 | Neither the name Broad Institute, Inc. Harvard University, nor the names of its 20 | contributors may be used to endorse or promote products derived from 21 | this software without specific prior written permission. 22 | 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 24 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 27 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 29 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 32 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE 33 | -------------------------------------------------------------------------------- /bin/evec2pca.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | ### translate .evec file to .pca file expected by eigenstrat program 4 | ### Note: .evec file does not contain entries for outliers 5 | ### .pca file does contain entries (set to all 0.0) for outliers 6 | 7 | $k = $ARGV[0]; 8 | $evec = $ARGV[1]; 9 | $ind = $ARGV[2]; 10 | $pca = $ARGV[3]; 11 | 12 | open(EVEC,$evec) || die("OOPS couldn't open file $evec for reading"); 13 | open(IND,$ind) || die("OOPS couldn't open indiv file $ind for reading"); 14 | open(PCA,">$pca") || die("OOPS couldn't open file $pca for writing"); 15 | 16 | print PCA ("$k\n"); # number of output eigenvectors/eigenvalues 17 | $line = ; chomp($line); # eigvals line 18 | my @array = split(/[\t ]+/,$line); 19 | for($x=0; $x<$k; $x++) { printf PCA ("%.04f\n",$array[$x+2]); } # x-th eval 20 | while($line = ) 21 | { 22 | chomp($line); 23 | $line = " " . $line; 24 | my @array = split(/[\t ]+/,$line); 25 | $l = @array; 26 | unless($l == 3+$k) { die("OOPS #evec in $evec is different from $k"); } 27 | $sample = $array[1]; 28 | for($x=0; $x<$k; $x++) { $evecarray{$sample}[$x] = $array[$x+2]; } 29 | $found{$sample} = 1; 30 | } 31 | 32 | while($line = ) 33 | { 34 | chomp($line); 35 | my @array = split(/[\t ]+/,$line); 36 | $sample = $array[0]; 37 | if($sample eq "") { $sample = $array[1]; } 38 | unless($found{$sample}) 39 | { 40 | for($x=0; $x<$k; $x++) { $evecarray{$sample}[$x] = 0.0; } 41 | } 42 | for($x=0; $x<$k; $x++) 43 | { 44 | printf PCA (" "); 45 | if($evecarray{$sample}[$x] > 0) { printf PCA (" "); } 46 | printf PCA ("%.04f",$evecarray{$sample}[$x]); 47 | } 48 | printf PCA ("\n"); 49 | } 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /CONVERTF/example.ancestrymapgeno: -------------------------------------------------------------------------------- 1 | rs0000 SAMPLE0 1 2 | rs0000 SAMPLE1 1 3 | rs0000 SAMPLE2 1 4 | rs0000 SAMPLE3 0 5 | rs0000 SAMPLE4 0 6 | rs1111 SAMPLE0 0 7 | rs1111 SAMPLE1 1 8 | rs1111 SAMPLE2 2 9 | rs1111 SAMPLE3 1 10 | rs1111 SAMPLE4 2 11 | rs2222 SAMPLE0 2 12 | rs2222 SAMPLE1 1 13 | rs2222 SAMPLE2 1 14 | rs2222 SAMPLE3 0 15 | rs2222 SAMPLE4 1 16 | rs3333 SAMPLE0 0 17 | rs3333 SAMPLE1 0 18 | rs3333 SAMPLE2 1 19 | rs3333 SAMPLE3 2 20 | rs3333 SAMPLE4 2 21 | rs4444 SAMPLE0 2 22 | rs4444 SAMPLE1 1 23 | rs4444 SAMPLE2 1 24 | rs4444 SAMPLE3 0 25 | rs4444 SAMPLE4 0 26 | rs5555 SAMPLE0 0 27 | rs5555 SAMPLE1 0 28 | rs5555 SAMPLE2 1 29 | rs5555 SAMPLE3 1 30 | rs5555 SAMPLE4 1 31 | rs6666 SAMPLE0 2 32 | rs6666 SAMPLE1 2 33 | rs6666 SAMPLE2 1 34 | rs6666 SAMPLE3 1 35 | rs6666 SAMPLE4 0 36 | -------------------------------------------------------------------------------- /include/linsubs.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | void bal(double *a, double *b, int n) ; 7 | 8 | /* linear algebra */ 9 | void mulmat(double *a, double *b, double *c, int a1, int a2, int a3) ; 10 | int solvit (double *prod, double *rhs,int n, double *ans); 11 | int solvitfix (double *prod, double *rhs, int n, double *ans, int *vfix, double *vvals, int nfix) ; 12 | int oldsolvitfix (double *prod, double *rhs, int n, double *ans, int *vfix, double *vvals, int nfix) ; 13 | double pdinv(double *cinv, double *coeff, int n) ; 14 | 15 | /* numer recipes p 97 */ 16 | double logdet(double *mat, int n) ; 17 | int choldc (double *a, int n, double p[]); 18 | void cholsl (double *a, int n, double p[], double b[], double x[]); 19 | void cholesky(double *cf, double *a, int n) ; 20 | void pmat(double *mat, int n) ; 21 | void imulmat(int *a, int *b, int *c, int a1, int a2, int a3) ; 22 | int linsolv(int n, double* pfMatr, double* pfVect, double* sol) ; // Developer: Henry Guennadi Levkin 23 | 24 | double qval(double *vv, double *q, double *l, int n) ; 25 | void qgrad(double *grad, double *vv, double *q, double *l, int n) ; 26 | double mquad(double y0, double y1, double y2, double *pmx) ; 27 | double qminpos(double *vv, double *q, double *l, int n) ; 28 | double qminposfix(double *vv, double *q, double *l, int n, int *fixlist, double *fixvals, int nfix) ; 29 | double qminposfixc(double *vv, double *q, double *l, int n, int *fixlist, double *fixvals, int nfix, int *constraint) ; 30 | double qmin(double *vv, double *q, double *l, int n) ; 31 | double qminfix(double *vv, double *q, double *l, int n, int *fixlist, double *fixvals, int nfix) ;; 32 | double qmpc (double *vnew, double *vold, double *q, double *l, int *dead, int level, int *constraint, int n) ; 33 | double qmp (double *vnew, double *vold, double *q, double *l, int *dead, int level, int n) ; 34 | -------------------------------------------------------------------------------- /POPGEN/example.log: -------------------------------------------------------------------------------- 1 | parameter file: par.example 2 | ### THE INPUT PARAMETERS 3 | ##PARAMETER NAME: VALUE 4 | genotypename: ../CONVERTF/example.ped 5 | snpname: ../CONVERTF/example.map 6 | indivname: ../CONVERTF/example.ped 7 | evecoutname: example.evec 8 | evaloutname: example.eval 9 | altnormstyle: NO 10 | numoutevec: 2 11 | familynames: NO 12 | grmoutname: grmjunk 13 | ## smartpca version: 13050 14 | norm used 15 | 16 | genotype file processed 17 | number of samples used: 5 number of snps used: 7 18 | Using 2 threads, and partial sum lookup algorithm. 19 | total number of snps killed in pass: 0 used: 7 20 | grm dumped 21 | 22 | ## To get Tracy-Widom statistics: recompile smartpca with TWTAB correctly specified in Makefile, or 23 | just run twstats (see README file in POPGEN directory) 24 | kurtosis snps indivs 25 | eigenvector 1 1.271 1.505 26 | eigenvector 2 1.796 2.127 27 | population: 0 Case 2 28 | population: 1 Control 3 29 | 30 | ## Average divergence between populations: 31 | Case Control popsize 32 | Case 0.849 3.298 2 33 | Control 3.298 1.151 3 34 | 35 | 36 | number of blocks for moving block jackknife: 1 37 | fst *1000: 38 | C C 39 | C 0 0 40 | C 0 0 41 | 42 | s.dev * 1000000: 43 | C C 44 | C 0 0 45 | C 0 0 46 | 47 | eigenvector 1:means 48 | Control -0.336 49 | Case 0.505 50 | ## Anova statistics for population differences along each eigenvector: 51 | p-value 52 | eigenvector_1_Case_Control_ 0.0262738 53 | eigenvector 2:means 54 | Control -0.056 55 | Case 0.084 56 | eigenvector_2_Case_Control_ 0.805663 57 | 58 | ## Statistical significance of differences beween populations: 59 | pop1 pop2 chisq p-value |pop1| |pop2| 60 | popdifference: Case Control 9.554 0.00842201 2 3 61 | 62 | ##end of smartpca run 63 | -------------------------------------------------------------------------------- /bin/gc.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | $P = $ARGV[0]; 4 | $out = $ARGV[1]; 5 | 6 | # get data 7 | $m=0; 8 | open(P,"$P") || die("COF"); 9 | while($line =

) { if($line =~ /Chisq/) { last; } } # header lines 10 | while($line =

) 11 | { 12 | chomp($line); 13 | if($line =~ /NA/) 14 | { 15 | $chisq1[$m] = -100; 16 | $chisq2[$m] = -100; 17 | $m++; 18 | next; 19 | } 20 | my @array = split(/[\t ]+/,$line); 21 | $chisq1[$m] = $array[0]; # Chisq 22 | $chisq2[$m] = $array[1]; # EIGENSTRAT 23 | $m++; 24 | $mvalid++; 25 | } 26 | close(P); 27 | $nSNP = $m; 28 | $nSNPvalid = $mvalid; 29 | 30 | # compute $lambda1 (Chisq) 31 | $CHISQTHRESH = 0.456; 32 | $step = 0.25; 33 | $oktoreducestep = 0; 34 | for($iter=0; $iter<20; $iter++) 35 | { 36 | $mm = 0; 37 | for($m=0; $m<$nSNP; $m++) 38 | { 39 | if($chisq1[$m] > $CHISQTHRESH) { $mm++; } 40 | } 41 | $frac = $mm/$nSNPvalid; # frac of SNPs exceeding CHISQTHRESH 42 | if($frac > 0.5) { $CHISQTHRESH += $step; } 43 | else { $CHISQTHRESH -= $step; $oktoreducestep = 1; } 44 | if($oktoreducestep) { $step *= 0.5; } 45 | } 46 | $lambda1 = $CHISQTHRESH/0.456; # 0.456 is median if no inflation 47 | if($lambda1 < 1) { $lambda1 = 1; } # not allowed to be less than 1 48 | 49 | # compute $lambda2 (EIGENSTRAT) 50 | $CHISQTHRESH = 0.456; 51 | $step = 0.25; 52 | $oktoreducestep = 1; 53 | for($iter=0; $iter<20; $iter++) 54 | { 55 | $mm = 0; 56 | for($m=0; $m<$nSNP; $m++) 57 | { 58 | if($chisq2[$m] > $CHISQTHRESH) { $mm++; } 59 | } 60 | $frac = $mm/$nSNPvalid; # frac of SNPs exceeding CHISQTHRESH 61 | if($frac > 0.5) { $CHISQTHRESH += $step; } 62 | else { $CHISQTHRESH -= $step; $oktoreducestep = 1; } 63 | if($oktoreducestep) { $step *= 0.5; } 64 | } 65 | $lambda2 = $CHISQTHRESH/0.456; # 0.456 is median if no inflation 66 | if($lambda2 < 1) { $lambda2 = 1; } # not allowed to be less than 1 67 | 68 | # output 69 | open(OUT,">$out") || die("COF"); 70 | print OUT ("Chisq EIGENSTRAT\n"); 71 | printf OUT ("lambda=%.03f lambda=%.03f\n",$lambda1,$lambda2); 72 | for($m=0; $m<$nSNP; $m++) 73 | { 74 | if($chisq1[$m] < 0) { print OUT ("NA NA\n"); next; } 75 | printf OUT ("%.04f %.04f\n",$chisq1[$m]/$lambda1,$chisq2[$m]/$lambda2); 76 | } 77 | -------------------------------------------------------------------------------- /POPGEN/twexample.eval: -------------------------------------------------------------------------------- 1 | 3.896803 2 | 1.823842 3 | 1.233871 4 | 1.227676 5 | 1.212209 6 | 1.200049 7 | 1.198344 8 | 1.188785 9 | 1.188666 10 | 1.185873 11 | 1.178875 12 | 1.172266 13 | 1.168976 14 | 1.168512 15 | 1.164908 16 | 1.157360 17 | 1.149275 18 | 1.146300 19 | 1.142036 20 | 1.137177 21 | 1.133412 22 | 1.128474 23 | 1.123303 24 | 1.121108 25 | 1.117918 26 | 1.112247 27 | 1.111754 28 | 1.108400 29 | 1.101575 30 | 1.098672 31 | 1.095195 32 | 1.092344 33 | 1.087750 34 | 1.085886 35 | 1.083626 36 | 1.081931 37 | 1.079625 38 | 1.078805 39 | 1.071742 40 | 1.068232 41 | 1.067591 42 | 1.063070 43 | 1.060513 44 | 1.058145 45 | 1.056450 46 | 1.054916 47 | 1.050893 48 | 1.048894 49 | 1.043173 50 | 1.042661 51 | 1.040887 52 | 1.036872 53 | 1.034616 54 | 1.030792 55 | 1.028936 56 | 1.027220 57 | 1.025316 58 | 1.022074 59 | 1.016391 60 | 1.015529 61 | 1.013203 62 | 1.008569 63 | 1.007275 64 | 1.003458 65 | 1.001885 66 | 1.001046 67 | 0.996586 68 | 0.995739 69 | 0.992099 70 | 0.988173 71 | 0.985532 72 | 0.982139 73 | 0.979806 74 | 0.976317 75 | 0.974724 76 | 0.971432 77 | 0.970185 78 | 0.966816 79 | 0.965728 80 | 0.962983 81 | 0.958791 82 | 0.956462 83 | 0.954499 84 | 0.952054 85 | 0.949986 86 | 0.945780 87 | 0.944417 88 | 0.939229 89 | 0.937765 90 | 0.933657 91 | 0.931285 92 | 0.928772 93 | 0.927551 94 | 0.922779 95 | 0.919530 96 | 0.917899 97 | 0.914707 98 | 0.909525 99 | 0.907726 100 | 0.902872 101 | 0.900004 102 | 0.899288 103 | 0.898200 104 | 0.896129 105 | 0.892450 106 | 0.889483 107 | 0.886985 108 | 0.880826 109 | 0.880009 110 | 0.877975 111 | 0.875729 112 | 0.870950 113 | 0.867717 114 | 0.866428 115 | 0.864954 116 | 0.861732 117 | 0.859268 118 | 0.858495 119 | 0.854881 120 | 0.851066 121 | 0.846823 122 | 0.842529 123 | 0.841246 124 | 0.838740 125 | 0.838459 126 | 0.834131 127 | 0.832612 128 | 0.827219 129 | 0.823981 130 | 0.821442 131 | 0.818558 132 | 0.815683 133 | 0.814061 134 | 0.810291 135 | 0.808223 136 | 0.806080 137 | 0.801127 138 | 0.795730 139 | 0.793393 140 | 0.790344 141 | 0.787822 142 | 0.784379 143 | 0.777760 144 | 0.776245 145 | 0.774965 146 | 0.764580 147 | 0.758267 148 | 0.747945 149 | 0.747072 150 | -------------------------------------------------------------------------------- /src/eigensrc/smartsubs.c: -------------------------------------------------------------------------------- 1 | #include "qpsubs.h" 2 | #include "eigsubs.h" 3 | #include "smartsubs.h" 4 | extern int fancynorm, verbose, plotmode, outnum; 5 | 6 | // static Indiv **indm ; 7 | // static void wjackestx(double *est, double *sig, double mean, double *jmean, double *jwt, int g) ; 8 | // static void wjackvestx(double *vest, double *var, int d, double *mean, double **jmean, double *jwt, int g) ; 9 | static int outliermode = 0; 10 | 11 | void 12 | setoutliermode (int mode) 13 | { 14 | outliermode = mode; 15 | } 16 | 17 | int 18 | ridoutlier (double *evecs, int n, int neigs, 19 | double thresh, int *badlist, OUTLINFO ** outinfo) 20 | { 21 | /* badlist contains list of outliers */ 22 | double *ww, *w2, y1, y2, yy, zz; 23 | int *vbad; 24 | int i, j; 25 | int nbad = 0; 26 | OUTLINFO *outpt; 27 | 28 | if (outliermode > 1) 29 | return 0; 30 | if (n < 3) 31 | return 0; 32 | ZALLOC (ww, n, double); 33 | ZALLOC (vbad, n, int); 34 | for (j = 0; j < n; j++) { 35 | outpt = outinfo[j]; 36 | outpt->vecno = -1; 37 | } 38 | for (i = 0; i < neigs; ++i) { 39 | copyarr (evecs + i * n, ww, n); 40 | if (outliermode == 0) { 41 | y1 = asum (ww, n) / (double) n; 42 | vsp (ww, ww, -y1, n); 43 | y2 = asum2 (ww, n) / (double) n; 44 | y2 = sqrt (y2); 45 | vst (ww, ww, 1.0 / y2, n); 46 | 47 | for (j = 0; j < n; j++) { 48 | if (fabs (ww[j]) > thresh) { 49 | vbad[j] = 1; 50 | outpt = outinfo[j]; 51 | if (outpt->vecno < 0) { 52 | outpt->vecno = i; 53 | outpt->score = ww[j]; 54 | } 55 | } 56 | } 57 | } 58 | if (outliermode == 1) { 59 | ZALLOC (w2, n, double); 60 | for (j = 0; j < n; j++) { 61 | yy = ww[j]; 62 | ww[j] = 0; 63 | y1 = asum (ww, n) / (double) (n - 1); 64 | vsp (w2, ww, -y1, n); 65 | w2[j] = 0; 66 | y2 = asum2 (w2, n) / (double) n; 67 | y2 = sqrt (y2); 68 | zz = yy - y1; 69 | zz /= y2; 70 | if (fabs (zz) > thresh) { 71 | vbad[j] = 1; 72 | outpt = outinfo[j]; 73 | if (outpt->vecno < 0) { 74 | outpt->vecno = i; 75 | outpt->score = zz; 76 | } 77 | } 78 | ww[j] = yy; 79 | } 80 | free (w2); 81 | } 82 | } 83 | for (j = 0; j < n; j++) { 84 | if (vbad[j] == 1) { 85 | badlist[nbad] = j; 86 | ++nbad; 87 | } 88 | } 89 | free (ww); 90 | free (vbad); 91 | return nbad; 92 | 93 | } 94 | -------------------------------------------------------------------------------- /bin/smarteigenstrat.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | # perl wrapper for smarteigenstrat program. Run smarteigenstrat.perl with no options for usage 4 | 5 | use Getopt::Std; 6 | 7 | my @flaglist = ("i","a","b","o","p","q","l","k"); 8 | $x = @ARGV; 9 | for($n=0;$n<$x;$n++) { 10 | foreach $flag (@flaglist) { 11 | if ($ARGV[$n] eq "-$flag") { 12 | $specified{$flag} = 1; 13 | } 14 | } 15 | } 16 | 17 | # check for mandatory options 18 | foreach $flag ("i","a","b","p","o","l") { 19 | unless ($specified{$flag}) { 20 | usage(); 21 | die("Error: -$flag not specified"); 22 | } 23 | } 24 | 25 | # get opts from hash 26 | getopts('i:a:b:p:o:q:k:l:', \%opts); 27 | $genofilename = $opts{"i"}; 28 | $indfilename = $opts{"b"}; 29 | $snpfilename = $opts{"a"}; 30 | $pcafilename = $opts{"p"}; 31 | $outfilename = $opts{"o"}; 32 | $logfilename = $opts{"l"}; 33 | $qtmode = "NO"; 34 | if ( $specified{"q"} ) { 35 | $qtmode = $opts{"q"}; 36 | } 37 | $k = 1; 38 | if ( $specified{"k"} ) { 39 | $k = $opts{"k"}; 40 | } 41 | 42 | # write parameter file 43 | open(PAR, ">$outfilename.par") || die("Error: unable to open $outfilename.par\n"); 44 | print PAR "genotypename: $genofilename\n"; 45 | print PAR "snpname: $snpfilename\n"; 46 | print PAR "indivname: $indfilename\n"; 47 | print PAR "pcaname: $pcafilename\n"; 48 | print PAR "outputname: $outfilename\n"; 49 | print PAR "numpc: $k\n"; 50 | print PAR "qtmode: $qtmode\n"; 51 | close(PAR); 52 | 53 | # run smarteigenstrat 54 | $cmd = "smarteigenstrat -p $outfilename.par >$logfilename"; 55 | print "$cmd\n"; 56 | system($cmd); 57 | 58 | sub usage { 59 | print "smarteigenstrat.perl -i -a -b -p -o "; 60 | print " -l -k -q "; 61 | print "\n"; 62 | print "-i genotype file (PED, PACKEDPED, EIGENSTRAT, ANCESTRYMAP or PACKEDANCESTRYMAP format)"; 63 | print "-o output file (chisq)\n"; 64 | print "-l logfile (screen output,including error messages)\n"; 65 | print "-q YES for quantitative phenotype or NO otherwise\n"; 66 | print "\n"; 67 | print "For quantitative phenotype, sixth column of .ped file or third column of EIGENSTRAT .ind file\n"; 68 | print "should be real numbers. For non-quantitative phenotype, sixth column of .ped or third column\n"; 69 | print "should be 'Case' or 'Control'\n"; 70 | } 71 | 72 | 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /src/h2d.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | extern int verbose; 10 | 11 | int 12 | mkindh2d (Indiv ** indivmarkers, Indiv *** pindm2, int numindivs) 13 | { 14 | char ss[50]; 15 | Indiv *indx, **indm2, *indp; 16 | int n, len, k; 17 | int numind2; 18 | 19 | numind2 = numindivs / 2; 20 | ZALLOC (*pindm2, numind2, Indiv *); 21 | indm2 = *pindm2; 22 | n = 0; 23 | for (k = 0; k < numindivs; k++) { 24 | indx = indivmarkers[k]; 25 | strcpy (ss, indx->ID); 26 | len = strlen (ss); 27 | if (ss[len - 1] != 'A') 28 | continue; 29 | ss[len - 2] = CNULL; 30 | ZALLOC (indm2[n], 1, Indiv); 31 | indp = indm2[n]; 32 | *indp = *indx; 33 | strcpy (indp->ID, ss); 34 | ++n; 35 | } 36 | if (n != numind2) 37 | fatalx ("(mkindh2d) bug\n"); 38 | return n; 39 | } 40 | 41 | void 42 | remaph2d (SNP ** snpmarkers, int numsnps, Indiv ** indivmarkers, 43 | Indiv ** indm2, int numindivs, int numind2) 44 | { 45 | 46 | int *g1, *g2; 47 | int *x1, *x2; 48 | int *tind, tt, t, i, j, k, j1, j2; 49 | Indiv *indx; 50 | SNP *cupt; 51 | char s1[50], s2[50]; 52 | 53 | ZALLOC (g2, numind2, int); 54 | ZALLOC (g1, numindivs, int); 55 | ZALLOC (x1, numindivs, int); 56 | ZALLOC (x2, numindivs, int); 57 | 58 | for (k = 0; k < numind2; ++k) { 59 | indx = indm2[k]; 60 | sprintf (s1, "%s:A", indx->ID); 61 | sprintf (s2, "%s:B", indx->ID); 62 | t = x1[k] = indindex (indivmarkers, numindivs, s1); 63 | if (t < 0) { 64 | sprintf (s1, "%s_A", indx->ID); 65 | sprintf (s2, "%s_B", indx->ID); 66 | t = x1[k] = indindex (indivmarkers, numindivs, s1); 67 | } 68 | if (t < 0) 69 | fatalx ("bad newindiv: %s\n", indx->ID); 70 | t = x2[k] = indindex (indivmarkers, numindivs, s2); 71 | if (t < 0) 72 | fatalx ("bad newindiv: %s\n", indx->ID); 73 | } 74 | 75 | for (i = 0; i < numsnps; i++) { 76 | cupt = snpmarkers[i]; 77 | 78 | for (j = 0; j < numind2; ++j) { 79 | t = x1[j]; 80 | g1[j] = getgtypes (cupt, t); 81 | t = x2[j]; 82 | g2[j] = getgtypes (cupt, t); 83 | tt = -1; 84 | if ((g1[j] >= 0) && (g2[j] >= 0)) 85 | tt = g1[j] + g2[j]; 86 | putgtypes (cupt, j, tt); 87 | } 88 | } 89 | 90 | free (g1); 91 | free (g2); 92 | free (x1); 93 | free (x2); 94 | 95 | } 96 | -------------------------------------------------------------------------------- /include/ranmath.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | 5 | #include 6 | 7 | #define BIGINT INT_MAX 8 | #define SRAND srandom 9 | #define LRAND random 10 | #define DRAND() ( (double) (random() % BIGINT) / (double) (BIGINT)) 11 | #define DRAND2() ( drand2() ) 12 | /* random must return random integer in range 0 to BIGINT-1 */ 13 | 14 | 15 | #define NORMAL gauss 16 | 17 | double gauss() ; // standard normal 18 | void gaussa(double *a, int n) ; // array of standard normals 19 | double gds(double a) ; // obsolete 20 | double poidev(double mean) ; // obsolete 21 | double ranpoiss(double mean) ; // poisson Note double 22 | double ranpoissx(double mean) ; // poisson | > 0 23 | void ranperm(int *a, int n) ; // randomly permute a; if random permulation wanted : idperm(a,n) ; ranperm(a,n) 24 | double ranexp( void) ; // exponential mean 1 25 | double rangam(double a) ; // standard gamma mean a 26 | int randis(double *a, int n) ; // element from discrete distribution a 27 | void ransamp(int *samp, int nsamp, double *p, int plen) ; // sample nsamp samples from p 28 | void pick2(int n, int *k1, int *k2) ; // pick 2 elements from 0..n-1 29 | int ranmod(int n) ; // random mod n 30 | double ranbeta(double a, double b) ; // beta 31 | int ranbinom(int n, double p) ; // binomial 32 | void setrand(double *vv, int n) ; // filll vv with U[0,1] 33 | int ewens(int *a, int n, double theta) ; // ewens sampling formula 34 | void genmultgauss(double *rvec, int num, int n, double *covar) ; // multivariate 35 | double drand2() ; 36 | void ranmultinom(int *samp, int n, double *p, int len) ; // multinomial 37 | double ranchi (int d) ; // chisq d dof. 38 | void raninvwis(double *wis, int t, int d, double *s) ; // inverse wishart 39 | double uniform(double lo, double hi) ; // uniform (lo..hi) 40 | void ransimplex(double *x, int n) ; // uniform on n-simplex 41 | void randirichlet(double *x, double *pp, int n) ; // dirichlet parameter vector pp 42 | void randirmult(double *pp, int *aa, int len, int m) ; // dirichlet multinomial. Output aa 43 | int prob1(double p) ; 44 | double rant(double df) ; // t distribution 45 | double samppow(double e, double a, double b) ; 46 | double rejnorm(double lo, double hi) ; // usually call ranboundnorm 47 | double ranboundnorm(double lo, double hi) ; // sample standard normal in [lo, hi] 48 | double rantruncnorm(double T, int upper) ; // sample standard normal > T (upper =1) < T (upper = 0) 49 | int ranhprob(int n, int a, int m) ; 50 | -------------------------------------------------------------------------------- /src/eigensrc/exclude.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define MAXRGN 1000 6 | 7 | void 8 | excluderegions (char *xregionname, SNP ** snps, int nsnps, 9 | char *deletesnpoutname) 10 | { 11 | FILE *fp; 12 | 13 | int chr[MAXRGN]; 14 | int lo[MAXRGN]; 15 | int hi[MAXRGN]; 16 | 17 | char line[MAXSTR]; 18 | char *spt[MAXFF]; 19 | int nsplit, nrgn, i, j; 20 | 21 | if ((fp = fopen (xregionname, "r")) == NULL) { 22 | printf ("excluderegions: can't open file %s\n", xregionname); 23 | return; 24 | } 25 | 26 | for (i = 0; i < MAXRGN; i++) { 27 | 28 | if (fgets (line, MAXSTR, fp) == NULL) 29 | break; 30 | 31 | nsplit = splitup (line, spt, MAXFF); 32 | if (nsplit != 3) 33 | continue; 34 | 35 | chr[i] = atoi (spt[0]); 36 | lo[i] = atoi (spt[1]); 37 | hi[i] = atoi (spt[2]); 38 | 39 | } 40 | fclose (fp); 41 | nrgn = i; 42 | 43 | for (i = 0; i < nsnps; i++) { 44 | SNP *cupt = snps[i]; 45 | for (j = 0; j < nrgn; j++) { 46 | if (cupt->chrom == chr[j] && cupt->physpos >= lo[j] 47 | && cupt->physpos <= hi[j]) { 48 | cupt->ignore = YES; 49 | if (deletesnpoutname != NULL) { 50 | logdeletedsnp (cupt->ID, "xregion", deletesnpoutname); 51 | } 52 | } 53 | } 54 | } 55 | 56 | return; 57 | 58 | } 59 | 60 | void 61 | hwfilter (SNP ** snps, int nsnps, int nindiv, double nhwfilter, 62 | char *deletesnpoutname) 63 | { 64 | 65 | int i, k; 66 | 67 | for (i = 0; i < nsnps; i++) { 68 | int num = 0, den = 0, het = 0, n0 = 0, n1 = 0, n2 = 0, nsamples; 69 | double p, Q, stdv; 70 | SNP *cupt = snps[i]; 71 | 72 | for (k = 0; k < nindiv; k++) { 73 | int g = getgtypes (cupt, k); 74 | if (g >= 0) { 75 | num += g; 76 | den += 2; 77 | } 78 | if (g == 1) { 79 | het++; 80 | n1++; 81 | } 82 | else if (g == 0) { 83 | n0++; 84 | } 85 | else if (g == 2) { 86 | n2++; 87 | } 88 | } 89 | 90 | if ((nsamples = den / 2) == 0) 91 | continue; 92 | p = (double) num / den; 93 | Q = 2 * p * (1 - p); 94 | stdv = sqrt (Q * (1 - Q) / nsamples); 95 | if (fabs ((double) het / nsamples - Q) > nhwfilter * stdv) { 96 | printf ("SNP %s removed by Hardy-Weinberg filter\n", cupt->ID); 97 | cupt->ignore = YES; 98 | if (deletesnpoutname != NULL) { 99 | logdeletedsnp (cupt->ID, "hwfilt", deletesnpoutname); 100 | } 101 | } 102 | } 103 | 104 | 105 | } 106 | -------------------------------------------------------------------------------- /bin/evec2pca-ped.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | ### translate .evec file to .pca file expected by eigenstrat program 4 | ### Note: .evec file does not contain entries for outliers 5 | ### .pca file does contain entries (set to all 0.0) for outliers 6 | 7 | # ----- This is a new version for PLINK input files. It differs from the 8 | # ----- original in two ways. (1) The indiv name is in the second column 9 | # ----- of the .fam file, but the first of a .ind file. (2) If the 10 | # ----- indiv names are not found in the .evec file, try the 11 | # ----- familyname:indivname combination. 12 | 13 | $k = $ARGV[0]; 14 | $evec = $ARGV[1]; 15 | $ind = $ARGV[2]; 16 | $pca = $ARGV[3]; 17 | open(EVEC,$evec) || die("OOPS couldn't open file $evec for reading"); 18 | open(PCA,">$pca") || die("OOPS couldn't open file $pca for writing"); 19 | 20 | print PCA ("$k\n"); # number of output eigenvectors/eigenvalues 21 | $line = ; chomp($line); # eigvals line 22 | my @array = split(/[\t ]+/,$line); 23 | for($x=0; $x<$k; $x++) { printf PCA ("%.04f\n",$array[$x+2]); } # x-th eval 24 | while($line = ) 25 | { 26 | chomp($line); 27 | $line = " " . $line; 28 | my @array = split(/[\t ]+/,$line); 29 | $l = @array; 30 | unless($l == 3+$k) { die("OOPS #evec in $evec is different from $k"); } 31 | $sample = $array[1]; 32 | for($x=0; $x<$k; $x++) { $evecarray{$sample}[$x] = $array[$x+2]; } 33 | $found{$sample} = 1; 34 | } 35 | 36 | # ----- Figure out which name convention to use 37 | my $count1 = 0; 38 | my $count2 = 0; 39 | open(IND,$ind) || die("OOPS couldn't open indiv file $ind for reading"); 40 | while ( my $line = ) { 41 | chomp($line); 42 | $line =~ s/^[\s]+//; # remove leading white-space 43 | my @E = split(/[\s]+/,$line); 44 | 45 | my $s = $E[1]; 46 | my $t = $E[0] . ":" . $E[1]; 47 | 48 | if ( exists $found{$s} ) { 49 | $count1++; 50 | } 51 | if ( exists $found{$t} ) { 52 | $count2++; 53 | } 54 | } 55 | close(IND); 56 | 57 | open(IND,$ind) || die("OOPS couldn't open indiv file $ind for reading"); 58 | while($line = ) 59 | { 60 | chomp($line); 61 | $line =~ s/^[\s]+//; 62 | my @array = split(/[\s]+/,$line); 63 | $sample = ($count1 >= $count2 ? $array[1] : $array[0] . ":" . $array[1]); 64 | if($sample eq "") { $sample = $array[1]; } 65 | unless($found{$sample}) 66 | { 67 | for($x=0; $x<$k; $x++) { $evecarray{$sample}[$x] = 0.0; } 68 | } 69 | for($x=0; $x<$k; $x++) 70 | { 71 | printf PCA (" "); 72 | if($evecarray{$sample}[$x] > 0) { printf PCA (" "); } 73 | printf PCA ("%.04f",$evecarray{$sample}[$x]); 74 | } 75 | printf PCA ("\n"); 76 | } 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | override CFLAGS += -I../include -I/usr/include/openblas 2 | #LDLIBS += -lgsl -lopenblas -lrt -lm 3 | override LDLIBS += -lgsl -lopenblas -lm -lpthread 4 | # Some Linux distributions require separate lapacke library 5 | # override LDLIBS += -llapacke 6 | # Mac additions using homebrew installations 7 | #override CFLAGS += -I/usr/local/opt/openblas/include -I/usr/local/opt/gsl/include 8 | #override LDFLAGS += -L/usr/local/opt/openblas/lib -L/usr/local/opt/gsl/lib 9 | # Harvard Medical School O2 cluster additions 10 | ifdef SLURM_CONF 11 | override CFLAGS += -I/n/app/openblas/0.2.19/include -I/n/app/gsl/2.3/include 12 | override LDFLAGS += -L/n/app/openblas/0.2.19/lib -L/n/app/gsl/2.3/lib/ 13 | endif 14 | 15 | ifeq ($(OPTIMIZE), 1) 16 | CFLAGS += -O2 17 | endif 18 | 19 | ifeq ($(DEBUG), 1) 20 | CFLAGS += -g # enable debugging 21 | endif 22 | 23 | ifeq ($(PROFILING), 1) 24 | CFLAGS += -pg # enable profiling 25 | endif 26 | 27 | ND=nicksrc 28 | ED=eigensrc 29 | KD=ksrc 30 | 31 | NLIB = $(ND)/libnick.a 32 | 33 | # ----- phony targets 34 | .PHONY: all clean clobber install 35 | 36 | EXE = baseprog convertf mergeit pca smshrink \ 37 | $(ED)/pcatoy $(ED)/smartrel $(ED)/smarteigenstrat \ 38 | $(ED)/twstats $(ED)/eigenstrat $(ED)/eigenstratQTL $(ED)/smartpca 39 | 40 | all: $(EXE) 41 | 42 | install: all 43 | mv $(EXE) ../bin 44 | 45 | clobber: 46 | rm -f *.o */*.o */*.a $(ND)/*.o $(ED)/*.o $(KD)/*.o 47 | rm -f $(EXE) 48 | cd ../bin/ ; rm -f $(notdir $(EXE)) ; cd ../src 49 | 50 | clean: 51 | rm -f *.o core core.* *.o $(ND)/*.o $(ED)/*.o $(KD)/*.o $(EXE) 52 | 53 | # ----- build nicksrc/libnick.a 54 | $(NLIB): 55 | $(MAKE) -C $(ND) 56 | 57 | baseprog: baseprog.o mcio.o egsubs.o admutils.o h2d.o $(ED)/exclude.o $(NLIB) 58 | 59 | convertf: convertf.o mcio.o egsubs.o admutils.o h2d.o $(ED)/exclude.o $(NLIB) 60 | 61 | mergeit: mergeit.o mcio.o admutils.o $(NLIB) 62 | 63 | pca: pca.o $(ED)/eigsubs.o eigensrc/eigx.o $(NLIB) 64 | 65 | $(ED)/pcatoy: $(ED)/pcatoy.o eigensrc/eigsubs.o eigensrc/eigx.o $(NLIB) 66 | 67 | $(ED)/smartrel: $(ED)/smartrel.o twsubs.o mcio.o qpsubs.o admutils.o egsubs.o regsubs.o \ 68 | $(ED)/eigsubs.o $(ED)/eigx.o $(ED)/smartsubs.o $(NLIB) 69 | 70 | $(ED)/smarteigenstrat: $(ED)/smarteigenstrat.o mcio.o admutils.o $(NLIB) 71 | 72 | $(ED)/twstats: $(ED)/twstats.o $(NLIB) 73 | 74 | $(ED)/smartpca: $(ED)/smartpca.o $(ED)/eigsubs.o $(ED)/exclude.o $(ED)/smartsubs.o $(ED)/eigx.o \ 75 | mcio.o qpsubs.o admutils.o egsubs.o regsubs.o gval.o \ 76 | $(NLIB) \ 77 | $(KD)/kjg_fpca.o $(KD)/kjg_gsl.o 78 | #-lpthread 79 | 80 | smshrink: smshrink.o $(ED)/eigsubs.o $(ED)/exclude.o $(ED)/smartsubs.o $(ED)/eigx.o \ 81 | twsubs.o mcio.o qpsubs.o admutils.o egsubs.o regsubs.o gval.o \ 82 | $(NLIB) \ 83 | $(KD)/kjg_fpca.o $(KD)/kjg_gsl.o 84 | 85 | -------------------------------------------------------------------------------- /POPGEN/smartpca.info: -------------------------------------------------------------------------------- 1 | This file contains documentation of standard output printed by smartpca. For 2 | general documentation of the smartpca program, see README in this directory. 3 | 4 | Parameter values: smartpca prints basic info on parameter values used. 5 | 6 | Outliers: smartpca prints a list of outlier individuals removed, if any. 7 | 8 | Tracy-Widom statistics: the column of interest is the "p-value" column which 9 | indicates the statistical significance of each principal component. 10 | To get Tracy-Widom statistics, you must recompile smartpca in your 11 | local src/ directory (and move it to bin/), 12 | or just run twstats (see README file in POPGEN directory). 13 | 14 | eigbestsnp: the SNP of maximum weight. SNP weights are proportional to 15 | the correlation (across samples) between each SNP and each PC. 16 | Equivalently, PC coordinates of a given sample can be computed as the 17 | weighted sum of normalized SNP genotypes. 18 | 19 | ---------------------------------------------------------------------------- 20 | 21 | POPULATION GENETIC STATISTICS (relevant to studying relationships between 22 | populations whose labels are explicitly specified in input indiv file) -- 23 | 24 | Average divergence between populations: smartpca prints a divergence matrix 25 | describing divergence between each pair of populations. Details: 26 | From the covariance matrix X whose eigenvectors were computed 27 | we can compute a "distance" d for each pair of individuals (i,j): 28 | d(i,j) = X(i,i) + X(j,j) - 2X(i,j) 29 | For each pair of populations (a,b) 30 | now define 31 | D(a,b) = \sum d(i,j) (in pop a, in pop b) / (| popa | * | pop b| ) 32 | an average distance. We then normalize D so that the diagonal has 33 | mean 1 and publish D. 34 | 35 | Fst statistics: prints fst estimate between each pair of populations, 36 | along with standard error of the estimate. 37 | [If there is only 1 population, no fst statistics are printed.] 38 | [If phylipoutname parameter is specified, this information is instead 39 | printed to an output file in PHYLIP format. See ./README for details.] 40 | 41 | Anova statistics for population differences along each eigenvector: 42 | For each eigenvector, a P-value for statistical significance of differences 43 | between each pair of populations along that eigenvector is printed. 44 | +++ is used to highlight P-values less than 1e-06. 45 | *** is used to highlight P-values between 1e-06 and 1e-03. 46 | If there are more than 2 populations, an overall P-value is also printed 47 | for that eigenvector. 48 | If there are more than 2 populations, the populations with minimum (minv) 49 | and maximum (maxv) eigenvector coordinate are also printed. 50 | [If there is only 1 population, no Anova statistics are printed.] 51 | 52 | Statistical significance of differences between populations: 53 | For each pair of populations, the above Anova statistics are summed 54 | across eigenvectors. The result is approximately chisq with 55 | d.o.f. equal to the number of eigenvectors. The chisq statistic and 56 | its p-value are printed. 57 | [If there is only 1 population, no statistics are printed.] 58 | 59 | ----------------------------------------------------------------------- 60 | 61 | Questions? nickp@broadinstitute.org 62 | -------------------------------------------------------------------------------- /src/eigensrc/twstats.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | int verbose = NO; 9 | double nval = -1; 10 | 11 | char *iname = NULL; 12 | char *parname = NULL; 13 | char *oname = NULL; 14 | 15 | char *twxtab = NULL; 16 | 17 | void readcommands (int argc, char **argv); 18 | 19 | #define VERSION "1000" 20 | 21 | int minleneig = 10; 22 | 23 | 24 | int 25 | main (int argc, char **argv) 26 | { 27 | FILE *ofile; 28 | int nlambda = 0; 29 | int i, m; 30 | double zn, zvar, tw, tail; 31 | double *xx[0], *lambda; 32 | 33 | readcommands (argc, argv); 34 | settwxtable (twxtab); 35 | 36 | if (oname == NULL) 37 | ofile = stdout; 38 | else 39 | openit (oname, &ofile, "w"); 40 | 41 | if (iname == NULL) 42 | fatalx ("i paraameter compulsory\n"); 43 | nlambda = numlines (iname); 44 | ZALLOC (lambda, nlambda, double); 45 | xx[0] = lambda; 46 | nlambda = getxx (xx, nlambda, 1, iname); 47 | vst (lambda, lambda, -1.0, nlambda); 48 | sortit (lambda, NULL, nlambda); 49 | vst (lambda, lambda, -1.0, nlambda); 50 | m = numgtz (lambda, nlambda); 51 | 52 | fprintf (ofile, "%4s %12s", "#N", "eigenvalue"); 53 | fprintf (ofile, "%12s", "difference"); 54 | fprintf (ofile, " %9s %12s", "twstat", "p-value"); 55 | fprintf (ofile, " %9s", "effect. n"); 56 | fprintf (ofile, "\n"); 57 | 58 | for (i = 0; i < m; ++i) { 59 | 60 | zn = nval; 61 | tail = dotwcalc (lambda + i, m - i, &tw, &zn, &zvar, minleneig); 62 | fprintf (ofile, "%4d %12.6f", i + 1, lambda[i]); 63 | if (i == 0) 64 | fprintf (ofile, "%12s", "NA"); 65 | else 66 | fprintf (ofile, "%12.6f", lambda[i] - lambda[i - 1]); 67 | if (tail >= 0.0) 68 | fprintf (ofile, " %9.3f %12.6g", tw, tail); 69 | else 70 | fprintf (ofile, " %9s %12s", "NA", "NA"); 71 | if (zn > 0.0) { 72 | fprintf (ofile, " %9.3f", zn); 73 | } 74 | else { 75 | fprintf (ofile, " %9s", "NA"); 76 | } 77 | fprintf (ofile, "\n"); 78 | } 79 | return 0; 80 | } 81 | 82 | void 83 | readcommands (int argc, char **argv) 84 | { 85 | 86 | int i; 87 | char *parname = NULL; 88 | phandle *ph; 89 | 90 | while ((i = getopt (argc, argv, "i:o:p:n:m:t:V")) != -1) { 91 | 92 | switch (i) { 93 | 94 | case 'i': 95 | iname = strdup (optarg); 96 | break; 97 | 98 | case 'o': 99 | oname = strdup (optarg); 100 | break; 101 | 102 | case 't': 103 | twxtab = strdup (optarg); 104 | break; 105 | 106 | case 'n': 107 | nval = atof (optarg); 108 | break; 109 | 110 | case 'm': 111 | minleneig = atoi (optarg); 112 | break; 113 | 114 | case 'p': 115 | parname = strdup (optarg); 116 | break; 117 | 118 | case 'V': 119 | verbose = YES; 120 | break; 121 | 122 | case '?': 123 | printf ("Usage: bad params.... \n"); 124 | fatalx ("bad params\n"); 125 | } 126 | } 127 | 128 | if (parname == NULL) 129 | return; 130 | 131 | printf ("parameter file: %s\n", parname); 132 | ph = openpars (parname); 133 | 134 | getstring (ph, "input:", &iname); 135 | getstring (ph, "output:", &oname); 136 | getdbl (ph, "nval:", &nval); 137 | getint (ph, "minleneig:", &minleneig); 138 | 139 | writepars (ph); 140 | closepars (ph); 141 | 142 | } 143 | -------------------------------------------------------------------------------- /include/kjg_gsl.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file kjg_gsl.h 3 | * @brief Augment GSL functions 4 | */ 5 | 6 | #ifndef KJG_GSL_H_ 7 | #define KJG_GSL_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | /** 14 | * Prints the matrix tab-delimited 15 | * @param *stream output file pointer 16 | * @param *m gsl_matrix to print 17 | * @param *template character template for fprintf 18 | */ 19 | 20 | void kjg_gsl_matrix_fprintf (FILE * stream, gsl_matrix * m, 21 | const char *template); 22 | 23 | /** 24 | * Prints the eigenvalues and then eigenvectors below 25 | * @param *stream output file pointer 26 | * @param *eval eigenvalues 27 | * @param *evec eigenvectors 28 | * @param *template character template for fprintf */ 29 | 30 | void kjg_gsl_evec_fprintf (FILE * stream, 31 | gsl_vector * eval, 32 | gsl_matrix * evec, const char *template); 33 | 34 | /** 35 | * Reads a matrix 36 | * @param *stream input file pointer 37 | * @param *m matrix to store 38 | */ 39 | 40 | void kjg_gsl_matrix_fscanf (FILE * stream, gsl_matrix * m); 41 | 42 | /** 43 | * Reads an evec 44 | * @param *stream input file pointer 45 | * @param *eval eigenvalues vector 46 | * @param *evec eigenvectors matrix 47 | */ 48 | 49 | int kjg_gsl_evec_fscanf (FILE * stream, gsl_vector * eval, gsl_matrix * evec); 50 | 51 | /** 52 | * Initializes random number generation. 53 | */ 54 | 55 | gsl_rng *kjg_gsl_rng_init (); 56 | 57 | /** 58 | * Initializes the matrix with random unit gaussians 59 | * @param *m matrix to be set 60 | * @param *r random number generator 61 | */ 62 | 63 | void kjg_gsl_ran_ugaussian_pair (const gsl_rng * r, double x[2]); 64 | 65 | /** Fills a matrix with unit Gaussian random variates 66 | * @param *r random number generator 67 | * @param *m matrix to be filled 68 | */ 69 | 70 | void kjg_gsl_ran_ugaussian_matrix (const gsl_rng * r, gsl_matrix * m); 71 | 72 | /** 73 | * Normalizes the matrix so the Frobenius norm is M*N 74 | * @param *m matrix to normalize 75 | * @return if error 76 | */ 77 | 78 | int kjg_gsl_matrix_frobenius_normalize (gsl_matrix * m); 79 | 80 | /** 81 | * Calculates the norm of a matrix 82 | * @param norm type of norm to return, see lapack dlange 83 | * @param *m matrix to find norm of 84 | * @return norm 85 | */ 86 | 87 | double kjg_gsl_dlange (const char norm, const gsl_matrix * m); 88 | 89 | /** 90 | * Performs the QR decomposition on the matrix and return Q in the matrix 91 | * @param *m matrix to orthogonalize 92 | */ 93 | 94 | void kjg_gsl_matrix_QR (gsl_matrix * m); 95 | 96 | /** 97 | * Calls LAPACK dgeqrf and return R and compacted Q matrix 98 | * @param *m input matrix 99 | * @param *tau see LAPACK documentation 100 | * @return LAPACK return 101 | */ 102 | 103 | int kjg_gsl_dgeqrf (gsl_matrix * m, gsl_vector * tau); 104 | 105 | /** 106 | * Calls LAPACK dorgqr to extract Q matrix 107 | * @param *m matrix with compacted Q and will store unpacked Q 108 | * @param *tau see LAPACK documentation 109 | * @return LAPACK return 110 | */ 111 | int kjg_gsl_dorgqr (gsl_matrix * m, gsl_vector * tau); 112 | 113 | /** 114 | * Calls LAPACK dgesvd, keeping u (in m) and s, discarding v^T 115 | * @param *m input matrix / where u is stored 116 | * @param *s entries of the diagonal matrix 117 | * @return LAPACK return 118 | */ 119 | int kjg_gsl_SVD (gsl_matrix * M, gsl_matrix * V, gsl_vector * S); 120 | 121 | #endif /* KJG_GSL_H_ */ 122 | -------------------------------------------------------------------------------- /src/egsubs.c: -------------------------------------------------------------------------------- 1 | #include "mcio.h" 2 | #include "egsubs.h" 3 | 4 | 5 | int 6 | makeeglist (char **eglist, int maxnumeg, Indiv ** indivmarkers, int numindivs) 7 | // old routine mkeglist 8 | { 9 | 10 | Indiv *indx; 11 | int i, k, numeg = 0; 12 | for (i = 0; i < numindivs; i++) { 13 | indx = indivmarkers[i]; 14 | if (indx->ignore) 15 | continue; 16 | k = indxindex (eglist, numeg, indx->egroup); 17 | if (k < 0) { 18 | if (numeg >= maxnumeg) { 19 | printf 20 | ("number of populations too large. Increase maxpops if you wish\n"); 21 | fatalx 22 | ("(makeeglist) You really want to analyse more than %d populations?\n", 23 | maxnumeg); 24 | } 25 | eglist[numeg] = strdup (indx->egroup); 26 | ++numeg; 27 | } 28 | } 29 | return numeg; 30 | } 31 | 32 | int 33 | mkeglist (Indiv ** indm, int numindivs, char **eglist) 34 | { 35 | Indiv *indx; 36 | int i, k, numeg = 0; 37 | for (i = 0; i < numindivs; i++) { 38 | indx = indm[i]; 39 | if (indx->ignore) 40 | continue; 41 | k = indxindex (eglist, numeg, indx->egroup); 42 | if (k < 0) { 43 | eglist[numeg] = strdup (indx->egroup); 44 | ++numeg; 45 | } 46 | } 47 | return numeg; 48 | } 49 | 50 | int 51 | loadlist_type (char **list, char *listname, int *ztypes, int off) 52 | // listname is just a list of names ... 53 | { 54 | FILE *lfile; 55 | char line[MAXSTR]; 56 | char *spt[MAXFF]; 57 | char *sx; 58 | Indiv *indx; 59 | int nsplit, i, n = 0, tt; 60 | 61 | if (listname == NULL) 62 | return 0; 63 | openit (listname, &lfile, "r"); 64 | while (fgets (line, MAXSTR, lfile) != NULL) { 65 | nsplit = splitup (line, spt, MAXFF); 66 | if (nsplit == 0) 67 | continue; 68 | sx = spt[0]; 69 | if (sx[0] == '#') { 70 | freeup (spt, nsplit); 71 | continue; 72 | } 73 | if (nsplit < 2) 74 | fatalx ("bad listname: %s\n", sx); 75 | list[n] = strdup (sx); 76 | tt = atoi (spt[1]); 77 | ztypes[n] = tt + off; 78 | ++n; 79 | freeup (spt, nsplit); 80 | } 81 | return n; 82 | } 83 | 84 | 85 | void 86 | seteglist (Indiv ** indm, int nindiv, char *eglistname) 87 | { 88 | FILE *egfile; 89 | char line[MAXSTR]; 90 | char *spt[MAXFF]; 91 | char *sx; 92 | Indiv *indx; 93 | int nsplit, i; 94 | 95 | if (eglistname == NULL) 96 | return; 97 | openit (eglistname, &egfile, "r"); 98 | while (fgets (line, MAXSTR, egfile) != NULL) { 99 | nsplit = splitup (line, spt, MAXFF); 100 | if (nsplit == 0) 101 | continue; 102 | sx = spt[0]; 103 | if (sx[0] == '#') 104 | continue; 105 | setstatus (indm, nindiv, sx); 106 | freeup (spt, nsplit); 107 | } 108 | fclose (egfile); 109 | } 110 | 111 | void 112 | seteglistv (Indiv ** indm, int nindiv, char *eglistname, int val) 113 | { 114 | FILE *egfile; 115 | char line[MAXSTR]; 116 | char *spt[MAXFF]; 117 | char *sx = NULL; 118 | Indiv *indx; 119 | int nsplit, i; 120 | 121 | if (eglistname == NULL) { 122 | setstatusv (indm, nindiv, NULL, val); 123 | } 124 | 125 | openit (eglistname, &egfile, "r"); 126 | while (fgets (line, MAXSTR, egfile) != NULL) { 127 | nsplit = splitup (line, spt, MAXFF); 128 | if (nsplit == 0) 129 | continue; 130 | sx = spt[0]; 131 | if (sx[0] == '#') 132 | continue; 133 | setstatusv (indm, nindiv, sx, val); 134 | freeup (spt, nsplit); 135 | } 136 | fclose (egfile); 137 | } 138 | -------------------------------------------------------------------------------- /include/strsubs.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int splitup (char *strin, char *strpt[],int maxpt) ; 4 | int splitupx(char *strin, char **spt, int maxpt, char splitc) ; 5 | int splitupwxbuff(char *strin, char **spt, int maxpt, char *bigbuff, int bigbufflen) ; 6 | int splitupxbuff(char *strin, char **spt, int maxpt, char splitc, char *bigbuff, int bigbufflen) ; 7 | int oldsplitup (char *strin, char *strpt[],int maxpt) ; 8 | void freeup (char *strpt[],int numpt) ; 9 | int split1 (char *strin, char *strpt[], char splitc); 10 | int first_word(char *string, char *word, char *rest) ; 11 | char *fnwhite (char *ss) ; 12 | char *fwhite (char *ss) ; 13 | char *ftab (char *ss) ; 14 | int NPisnumber (char c) ; 15 | int isnumword (char *str) ; 16 | void fatalx( char *fmt, ...) ; 17 | long seednum() ; 18 | void printbl(int n) ; 19 | void printnl() ; 20 | void striptrail(char *sss, char c) ; 21 | void catx(char *sout, char **spt, int n) ; 22 | void catxx(char *sout, char **spt, int n) ; 23 | void catxc(char *sout, char **spt, int n, char c) ; 24 | void makedfn(char *dirname, char *fname, char *outname, int maxstr) ; 25 | int substring (char **ap, char *inx, char *outx) ; 26 | int numcols (char *name) ; 27 | int numlines(char *name) ; 28 | void openit(char *name, FILE **fff, char *type) ; 29 | int ftest(char *aname) ; 30 | int getxx(double **xx, int maxrow, int numcol, char *fname) ; 31 | int getss(char **ss, char *fname) ; 32 | int loadlist(char **list, char *listname) ; // with dup check 33 | void printdups(char **list, int n) ; 34 | int checkdup(char **list, int n) ; 35 | double clocktime() ; // cpu time in seconds 36 | void crevcomp(char *sout, char *sin) ; 37 | int indxstring(char **namelist, int len, char *strid) ; 38 | int indxstringr(char **namelist, int len, char *strid) ; 39 | char *strstrx(char *s1, char *s2) ; // case insensitive strstr 40 | int getxxnames(char ***pnames, double **xx, int maxrow, int numcol, char *fname); 41 | int getjjnames(char ***pnames, int **xx, int maxrow, int numcol, char *fname); 42 | int getxxnamesf(char ***pnames, double **xx, int maxrow, int numcol, FILE *fff) ; 43 | int getnameslohi(char ****pnames, int maxrow, int numcol, char *fname, int lo, int hi) ; 44 | int getnamesstripcolon(char ****pnames, int maxrow, int numcol, char *fname, int lo, int hi) ; 45 | int getnames(char ****pnames, int maxrow, int numcol, char *fname) ; 46 | char num2iub (int num) ; 47 | char revchar(char c) ; 48 | int iub2num(char c) ; 49 | char num2base (int num) ; 50 | int base2num(char c) ; 51 | char *int_string(int a, int len, int base) ; 52 | char *binary_string(int a, int len) ; 53 | int string_binary(char *sx) ; 54 | void freestring (char **ss) ; 55 | void copystrings(char **sa, char **sb, int n) ; 56 | void printstringsw(char **ss, int n, int slen, int width) ; 57 | void printstrings(char **ss, int n) ; 58 | int ridfile(char *fname) ; 59 | char compbase(char x) ; 60 | void mkupper(char *sx) ; 61 | void mklower(char *sx) ; 62 | int iubdekode(char *a, char iub) ; 63 | int isiub(char iub) ; 64 | int isiub2(char iub) ; 65 | int iubcbases(char *cbases, char iub) ; 66 | int ishet(char c) ; 67 | int cttype(char cc) ; 68 | int char2int(char cc) ; 69 | char int2char(int x) ; 70 | void chomp(char *str) ; 71 | 72 | int numcmatch(char *cc, int len, char c) ; 73 | int numcnomatch(char *cc, int len, char c) ; 74 | char *strnotchar(char *s, char c) ; 75 | char *findupper(char *s) ; 76 | char *fgetstrap(char *buff, int maxlen, FILE *fff, int *ret) ; 77 | char readtonl(FILE *fff) ; 78 | int filehash(char *name) ; 79 | 80 | 81 | 82 | 83 | #define ZALLOC(item,n,type) if ((item = (type *)calloc((n),sizeof(type))) == NULL) \ 84 | fatalx("Unable to allocate %ld unit(s) for item \n", (long) n) 85 | 86 | #undef MAX 87 | #undef MIN 88 | 89 | #define MAX(a,b) ( (a) < (b) ? (b) : (a) ) 90 | #define MIN(a,b) ( (a) < (b) ? (a) : (b) ) 91 | #define YES 1 92 | #define NO 0 93 | #define TRUE 1 94 | #define FALSE 0 95 | #define CNULL '\0' 96 | #define CNL '\n' 97 | #define CTAB '\t' 98 | 99 | -------------------------------------------------------------------------------- /src/qmakef: -------------------------------------------------------------------------------- 1 | # ----- If the user defined OPENBLAS on the command line or use that 2 | ifdef OPENBLAS 3 | $(info $$OPENBLAS is [${OPENBLAS}]) 4 | else 5 | # ----- If this is Broad, check the path to see if "use .openblas-0.2.8" 6 | ifeq ($(DOMAINNAME),broadinstitute.org) 7 | $(info *** Broad Institute users can execute use .openblas-0.2.8 and use GCC-4.9 to link to OpenBLAS ***) 8 | else 9 | # ----- On orchestra this will work 10 | OPENBLAS=/opt/openblas/0.2.14 11 | $(info setting $$OPENBLAS to [${OPENBLAS}]) 12 | endif 13 | endif 14 | 15 | ifdef OPENBLAS 16 | CFLAGS = -I../include -I$(OPENBLAS)/include -D_GNU_SOURCE 17 | LDFLAGS = -L$(OPENBLAS)/lib -D_GNU_SOURCE 18 | LDLIBS = -Wl,-Bdynamic -lgsl -Wl,-Bstatic -lopenblas -Wl,-Bdynamic -lgfortran -pthread -lm 19 | else 20 | CFLAGS = -I../include -D_GNU_SOURCE 21 | LDFLAGS = -D_GNU_SOURCE 22 | LDLIBS = -lgsl -lopenblas -lgfortran -pthread -lm 23 | endif 24 | 25 | CC = gcc 26 | LD = ld 27 | 28 | OPENBLAS=/opt/openblas/0.2.14 29 | LDFLAGS = -L$(OPENBLAS)/lib -D_GNU_SOURCE -fopenmp 30 | 31 | ifeq ($(OPTIMIZE), 1) 32 | CFLAGS += -O2 33 | endif 34 | 35 | ifeq ($(DEBUG), 1) 36 | CFLAGS += -g # enable debugging 37 | endif 38 | 39 | ifeq ($(PROFILING), 1) 40 | CFLAGS += -pg # enable profiling 41 | endif 42 | 43 | CFLAGS = -I../include -I/opt/openblas/include -c -g -pg 44 | 45 | ND=nicksrc 46 | ED=eigensrc 47 | KG=ksrc 48 | NLIB = $(ND)/libnick.a 49 | 50 | # ----- phony targets 51 | .PHONY: all clean clobber install 52 | 53 | EXE = baseprog convertf mergeit pca newsm \ 54 | $(ED)/pcatoy $(ED)/smartrel $(ED)/smarteigenstrat \ 55 | $(ED)/twstats $(ED)/eigenstrat $(ED)/eigenstratQTL $(ED)/smartpca 56 | 57 | all: $(EXE) 58 | 59 | install: all 60 | mv $(EXE) ../bin 61 | 62 | clobber: 63 | rm -f *.o */*.o */*.a 64 | rm -f $(EXE) 65 | cd ../bin/ ; rm -f $(notdir, $(EXE)) ; cd ../src 66 | 67 | clean: 68 | rm -f *.o core core.* *.o 69 | 70 | # ----- build nicksrc/libnick.a 71 | $(NLIB): 72 | $(MAKE) -C $(ND) 73 | 74 | baseprog: baseprog.o mcio.o egsubs.o admutils.o h2d.o $(ED)/exclude.o $(NLIB) 75 | 76 | convertf: convertf.o mcio.o egsubs.o admutils.o h2d.o $(ED)/exclude.o $(NLIB) 77 | 78 | mergeit: mergeit.o mcio.o admutils.o $(NLIB) 79 | 80 | pca: pca.o $(ED)/eigsubs.o eigx.o $(NLIB) 81 | 82 | $(ED)/pcatoy: $(ED)/pcatoy.o eigensrc/eigsubs.o eigensrc/eigx.o $(NLIB) 83 | 84 | $(ED)/smartrel: $(ED)/smartrel.o twsubs.o mcio.o qpsubs.o admutils.o egsubs.o regsubs.o \ 85 | $(ED)/eigsubs.o $(ED)/eigx.o $(ED)/smartsubs.o $(NLIB) 86 | 87 | $(ED)/smarteigenstrat: $(ED)/smarteigenstrat.o mcio.o admutils.o $(NLIB) 88 | 89 | $(ED)/twstats: $(ED)/twstats.o $(NLIB) 90 | 91 | #$(ED)/eigenstrat: $(ED)/eigenstrat.o 92 | 93 | #$(ED)/eigenstratQTL: $(ED)/eigenstratQTL.o 94 | 95 | $(ED)/smartpca: $(ED)/smartpca.o $(ED)/eigsubs.o $(ED)/exclude.o $(ED)/smartsubs.o $(ED)/eigx.o \ 96 | twsubs.o mcio.o qpsubs.o admutils.o egsubs.o regsubs.o gval.o \ 97 | $(NLIB) \ 98 | $(KG)/kjg_fpca.o $(KG)/kjg_gsl.o 99 | 100 | newsm: newsm.o $(ED)/eigsubs.o $(ED)/exclude.o $(ED)/smartsubs.o $(ED)/eigx.o \ 101 | twsubs.o mcio.o qpsubs.o admutils.o egsubs.o regsubs.o gval.o \ 102 | $(NLIB) \ 103 | $(KG)/kjg_fpca.o $(KG)/kjg_gsl.o 104 | 105 | newsma: newsma.o $(ED)/eigsubs.o $(ED)/exclude.o $(ED)/smartsubs.o $(ED)/eigx.o \ 106 | twsubs.o mcio.o qpsubs.o admutils.o egsubs.o regsubs.o gval.o \ 107 | $(NLIB) \ 108 | $(KG)/kjg_fpca.o $(KG)/kjg_gsl.o 109 | 110 | smshrink: smshrink.o $(ED)/eigsubs.o $(ED)/exclude.o $(ED)/smartsubs.o $(ED)/eigx.o \ 111 | twsubs.o mcio.o qpsubs.o admutils.o egsubs.o regsubs.o gval.o \ 112 | $(NLIB) \ 113 | $(KG)/kjg_fpca.o $(KG)/kjg_gsl.o 114 | 115 | smartpca: $(ED)/smartpca 116 | cp $< $@ 117 | 118 | $(ED)/fffpca: $(ED)/fffpca.o $(ED)/eigsubs.o $(ED)/exclude.o $(ED)/smartsubs.o $(ED)/eigx.o \ 119 | twsubs.o mcio.o qpsubs.o admutils.o egsubs.o regsubs.o gval.o \ 120 | $(NLIB) \ 121 | $(KG)/kjg_fpca.o $(KG)/kjg_gsl.o 122 | 123 | fffpca: $(ED)/fffpca 124 | cp $< $@ 125 | -------------------------------------------------------------------------------- /include/statsubs.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "strsubs.h" 7 | 8 | #define CHI_EPSILON 0.000001 /* accuracy of critchi approximation */ 9 | #define CHI_MAX 99999.0 /* maximum chi square value */ 10 | 11 | #define LOG_SQRT_PI 0.5723649429247000870717135 /* log (sqrt (pi)) */ 12 | #define I_SQRT_PI 0.5641895835477562869480795 /* 1 / sqrt (pi) */ 13 | #define I_PI 0.3183098861837906715377675 /* 1 / pi */ 14 | #define PI 3.1415926535897932384626434 15 | #define BIGX 20.0 /* max value to represent exp (x) */ 16 | #define ex(x) (((x) < -BIGX) ? 0.0 : exp (x)) 17 | #define SQRT_PI (1.0/I_SQRT_PI) /* sqrt (pi) */ 18 | 19 | double medchi(int *cls, int len, int *n0, int *n1, double *kstail) ; 20 | double ks2(int *cls, int len, int *n0, int *n1, double *kstail) ; 21 | double probks(double lam) ; 22 | 23 | double nordis(double z) ; 24 | double ndens(double val, double mean, double sig) ; 25 | double ntail(double z) ; 26 | double zprob(double p) ; 27 | void setzptable() ; 28 | double z2x2(double *a) ; 29 | double conchi(double *a, int m, int n) ; 30 | double conchiv(double *a, int m, int n) ; 31 | double chitest(double *a, double *p, int n) ; 32 | double pi() ; 33 | 34 | double xlgamma(double x) ; 35 | double psi(double x) ; 36 | double tau(double x) ; 37 | double logbessi0(double x) ; 38 | double bessi0(double x) ; 39 | double logbessi1(double x) ; 40 | double bessi1(double x) ; 41 | void bernload() ; 42 | double bernum(int x) ; 43 | 44 | void mleg(double a1, double a2, double *p, double *lam) ; 45 | 46 | double dilog(double x) ; 47 | double li2(double x) ; 48 | 49 | double hwstat(double *x) ; 50 | 51 | double gammprob(double x, double p, double lam) ; 52 | double bprob(double p, double a, double b) ; 53 | double lbeta(double a, double b) ; 54 | double poissloglike(int kk, double mean) ; 55 | double dirmult(double *pp, int *aa, int len) ; 56 | double dawson(double t) ; 57 | 58 | double binomtail(int n, int t, double p, char c) ; 59 | double binlogtail(int n, int t, double p, char c) ; 60 | void genbin(double *a, int n, double p) ; 61 | void genlogbin(double *a, int n, double p) ; 62 | int ifirstgt(int val, int *tab, int n) ; 63 | int firstgt(double val, double *tab, int n) ; 64 | 65 | void cinterp(double val, double x0, double x1, 66 | double f0, double f0p, double f1, double f1p, double *fv, double *fvp) ; 67 | int firstgtx(double val, double *tab, int n) ; 68 | int jfirstgtx(int val, int *tab, int n) ; 69 | 70 | double rtlchsq(int df, double z) ; 71 | double critchi(int df, double z) ; 72 | double rtlf(int df1, int df2, double f) ; 73 | 74 | double ltlg(double a, double x) ; 75 | double rtlg(double a, double x) ; 76 | 77 | double twdens(double twstat) ; 78 | double twtail(double twstat) ; 79 | double twtailx(double twstat) ; 80 | double twdensx(double twstat) ; 81 | double twnorm(double lam, double p, double n) ; 82 | void twfree() ; 83 | int settwxtable(char *table) ; 84 | void gettw(double x, double *tailp, double *densp) ; 85 | double dotwcalc(double *lambda, int m, double *ptw, double *pzn, double *pzvar, int minm) ; 86 | int numgtz(double *a, int n) ; 87 | 88 | double betaix(double a, double b, double lo, double hi) ; 89 | double betai(double a, double b, double x) ; 90 | void bpars(double *a, double *b, double mean, double var) ; 91 | void bmoments(double a, double b, double *mean, double *var) ; 92 | double unbiasedest(int *ndx, int ndsize, int **counts) ; 93 | void weightjack(double *est, double *sig, double mean, double *jmean, double *jwt, int g) ; 94 | int modehprob(int n, int a, int m) ; 95 | void calcfc(double *c, int n, double rho) ; 96 | void circconv(double *xout, double *xa, double *xb, int n) ; 97 | 98 | double bino(int a, int b) ; 99 | void setbino(int maxbco) ; 100 | void destroy_bino() ; 101 | double exx(double x) ; 102 | double ubias(int a, int n, int k) ; 103 | double scx(double *W, double *mean, double *x, int d) ; // maybe should be in vsubs 104 | void dither(double *xout, double *xin, int n) ; 105 | void probit(double *xout, double *xin, int n) ; 106 | // Berk-Jones 107 | double bjugauss(double *p, double *u, double *a, int n) ; // return M_n (Berk-Jones) 108 | void bjasympt(double *ptail, double *mtail, double *tail, double mplus, double mminus, int n) ; 109 | void bj2(double *aa, double *bb, int a, int b, double *plpv, double *prpv, double *ppv) ; 110 | double genhp(double **hp, int a, int b) ; 111 | double genhpt(int a, int b, int *lt, int *rt) ; 112 | void gentail(double **ltail, double **rtail, double **hp, int a, int b) ; 113 | void setthresh(int *thresh, double **tail, int a, int b, double stat, int mode) ; 114 | void bj2x(int *type, int a, int b, double *plpv, double *prpv, double *ppv) ; 115 | 116 | -------------------------------------------------------------------------------- /src/ksrc/kjg_fpca.c: -------------------------------------------------------------------------------- 1 | /* 2 | * kjg_fpca.c 3 | * 4 | * Created on: Apr 28, 2014 5 | * Author: Kevin 6 | */ 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "kjg_fpca.h" 16 | #include "kjg_gsl.h" 17 | 18 | #include "admutils.h" 19 | #include "gval.h" 20 | 21 | size_t KJG_FPCA_ROWS = 256; 22 | 23 | void 24 | kjg_fpca (size_t K, size_t L, size_t I, double *eval, double *evec) 25 | { 26 | if (K >= L) 27 | exit (1); 28 | if (I == 0) 29 | exit (1); 30 | 31 | size_t m = get_ncols (); 32 | size_t n = get_nrows (); 33 | 34 | // PART A - compute Q such that X ~ Q * (Q^T) * X 35 | gsl_matrix *G1 = gsl_matrix_alloc (n, L); 36 | gsl_matrix *G2 = gsl_matrix_alloc (n, L); 37 | gsl_matrix *Q = gsl_matrix_alloc (m, (I + 1) * L); 38 | gsl_matrix *Gswap; 39 | 40 | gsl_rng *r = kjg_gsl_rng_init (); 41 | kjg_gsl_ran_ugaussian_matrix (r, G1); 42 | gsl_rng_free (r); 43 | 44 | size_t i; 45 | for (i = 0; i < I; i++) 46 | { 47 | gsl_matrix_view Qi = gsl_matrix_submatrix (Q, 0, i * L, m, L); 48 | 49 | // do the multiplication 50 | kjg_fpca_XTXA (G1, &Qi.matrix, G2); 51 | 52 | // scale to prevent G2 from blowing up 53 | gsl_matrix_scale (G2, 1.0 / m); 54 | 55 | Gswap = G2; 56 | G2 = G1; 57 | G1 = Gswap; 58 | } 59 | 60 | gsl_matrix_view Qi = gsl_matrix_submatrix (Q, 0, I * L, m, L); 61 | kjg_fpca_XA (G1, &Qi.matrix); 62 | 63 | { 64 | gsl_matrix *V = gsl_matrix_alloc (Q->size2, Q->size2); 65 | gsl_vector *S = gsl_vector_alloc (Q->size2); 66 | 67 | kjg_gsl_SVD (Q, V, S); 68 | 69 | gsl_matrix_free (V); 70 | gsl_vector_free (S); 71 | } 72 | 73 | // kjg_gsl_matrix_QR(Q); // QR decomposition is less accurate than SVD 74 | 75 | gsl_matrix_free (G1); 76 | gsl_matrix_free (G2); 77 | 78 | // PART B - compute B matrix, take SVD and return 79 | gsl_matrix *B = gsl_matrix_alloc (n, (I + 1) * L); 80 | kjg_fpca_XTB (Q, B); 81 | 82 | gsl_matrix *Utilda = gsl_matrix_alloc ((I + 1) * L, (I + 1) * L); 83 | gsl_vector *Stilda = gsl_vector_alloc ((I + 1) * L); 84 | 85 | kjg_gsl_SVD (B, Utilda, Stilda); 86 | 87 | gsl_matrix_view Vk = gsl_matrix_submatrix (B, 0, 0, n, K); 88 | gsl_matrix_view evec_view = gsl_matrix_view_array (evec, n, K); 89 | gsl_matrix_memcpy (&evec_view.matrix, &Vk.matrix); 90 | 91 | gsl_vector_view Sk = gsl_vector_subvector (Stilda, 0, K); 92 | gsl_vector_view eval_view = gsl_vector_view_array (eval, K); 93 | gsl_vector_mul (&Sk.vector, &Sk.vector); 94 | gsl_vector_scale (&Sk.vector, 1.0 / m); 95 | gsl_vector_memcpy (&eval_view.vector, &Sk.vector); 96 | 97 | gsl_matrix_free (Q); 98 | gsl_matrix_free (B); 99 | gsl_matrix_free (Utilda); 100 | gsl_vector_free (Stilda); 101 | } 102 | 103 | void 104 | kjg_fpca_XTXA (const gsl_matrix * A1, gsl_matrix * B, gsl_matrix * A2) 105 | { 106 | size_t m = get_ncols (); 107 | size_t n = get_nrows (); 108 | 109 | size_t i, r; // row index 110 | double *Y = malloc (sizeof (double) * n * KJG_FPCA_ROWS); // normalized 111 | 112 | gsl_matrix_view Bi, Xi; 113 | 114 | gsl_matrix_set_zero (A2); 115 | 116 | for (i = 0; i < m; i += KJG_FPCA_ROWS) 117 | { 118 | r = kjg_geno_get_normalized_rows (i, KJG_FPCA_ROWS, Y); 119 | Xi = gsl_matrix_view_array (Y, r, n); 120 | Bi = gsl_matrix_submatrix (B, i, 0, r, B->size2); 121 | gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1, &Xi.matrix, A1, 0, 122 | &Bi.matrix); 123 | gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1, &Xi.matrix, &Bi.matrix, 124 | 1, A2); 125 | } 126 | 127 | free (Y); 128 | } 129 | 130 | void 131 | kjg_fpca_XA (const gsl_matrix * A, gsl_matrix * B) 132 | { 133 | size_t n = get_nrows (); 134 | size_t m = get_ncols (); 135 | 136 | size_t i, r; 137 | double *Y = malloc (sizeof (double) * n * KJG_FPCA_ROWS); 138 | 139 | gsl_matrix_view Hmat, Xmat; 140 | 141 | gsl_matrix_set_zero (B); 142 | 143 | for (i = 0; i < m; i += KJG_FPCA_ROWS) 144 | { 145 | r = kjg_geno_get_normalized_rows (i, KJG_FPCA_ROWS, Y); 146 | Xmat = gsl_matrix_view_array (Y, r, n); 147 | Hmat = gsl_matrix_submatrix (B, i, 0, r, B->size2); 148 | gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1, &Xmat.matrix, A, 0, 149 | &Hmat.matrix); 150 | } 151 | 152 | free (Y); 153 | } 154 | 155 | void 156 | kjg_fpca_XTB (const gsl_matrix * B, gsl_matrix * A) 157 | { 158 | size_t n = get_nrows (); 159 | size_t m = get_ncols (); 160 | 161 | size_t i, r; 162 | double *Y = malloc (sizeof (double) * n * KJG_FPCA_ROWS); 163 | gsl_matrix_view Xmat; 164 | 165 | gsl_matrix_set_zero (A); 166 | 167 | for (i = 0; i < m; i += KJG_FPCA_ROWS) 168 | { 169 | r = kjg_geno_get_normalized_rows (i, KJG_FPCA_ROWS, Y); 170 | Xmat = gsl_matrix_view_array (Y, r, n); 171 | gsl_matrix_const_view Hmat = gsl_matrix_const_submatrix (B, i, 0, r, 172 | B->size2); 173 | gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1, &Xmat.matrix, &Hmat.matrix, 174 | 1, A); 175 | } 176 | 177 | free (Y); 178 | } 179 | -------------------------------------------------------------------------------- /src/gval.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | #include "admutils.h" 15 | #include "mcio.h" 16 | #include "gval.h" 17 | 18 | static SNP **xxsnps = NULL; 19 | static Indiv **xindivmarkers = NULL; 20 | static int xnrows, xncols; 21 | static int xnumindivs; 22 | static int *xxindex = NULL; 23 | static double *xmean, *xfancy; 24 | static double **gtable; 25 | 26 | void 27 | setgval (SNP ** xsnps, int nrows, Indiv ** indivmarkers, int numindivs, 28 | int *xindex, int *xtypes, int ncols) 29 | { 30 | 31 | double *cc; 32 | int t, n0, n1, i, k, col; 33 | SNP *cupt; 34 | double mean, y; 35 | 36 | unsetgval (); 37 | 38 | xxsnps = xsnps; 39 | xnrows = nrows; 40 | xncols = ncols; 41 | xindivmarkers = indivmarkers; 42 | xnumindivs = numindivs; 43 | xxindex = xindex; 44 | 45 | for (i = 1; i < nrows; i++) { 46 | if (xxindex[i] < xxindex[i - 1]) { 47 | fprintf (stderr, "xindex not sorted\n"); 48 | exit (1); 49 | } 50 | } 51 | 52 | ZALLOC (cc, nrows, double); 53 | ZALLOC (xmean, ncols, double); 54 | ZALLOC (xfancy, ncols, double); 55 | vclear (xfancy, 1.0, ncols); 56 | gtable = initarray_2Ddouble (ncols, 4, 0); 57 | 58 | for (i = 0; i < ncols; ++i) { 59 | col = i; 60 | cupt = xsnps[i]; 61 | 62 | /** 63 | if (i>=0) { 64 | printf("zz: %d %s\n", cupt -> ID) ; fflush(stdout) ; 65 | } 66 | */ 67 | getcolxz (cc, cupt, xindex, xtypes, nrows, i, xmean, xfancy, &n0, &n1); 68 | 69 | mean = xmean[col] / xfancy[col]; 70 | for (k = 0; k < 3; ++k) { 71 | y = ((double) k) - mean; 72 | y *= xfancy[col]; 73 | gtable[col][k] = y / sqrt (2.0); 74 | } 75 | gtable[col][3] = 0; 76 | 77 | t = MIN (n0, n1); 78 | if (t == 0) 79 | cupt->ignore = YES; // side-effect 80 | } 81 | 82 | free (cc); 83 | } 84 | 85 | void 86 | unsetgval () 87 | { 88 | if (xxsnps == NULL) 89 | return; 90 | 91 | xxsnps = NULL; 92 | xindivmarkers = NULL; 93 | xxindex = NULL; 94 | 95 | free2D (>able, xncols); 96 | 97 | gtable = NULL; 98 | 99 | free (xmean); 100 | free (xfancy); 101 | } 102 | 103 | int 104 | getgval (int row, int col, double *val) 105 | { 106 | 107 | /** 108 | if (row>=xnrows) fatalx("row index overflow\n") ; 109 | if (col>=xncols) fatalx("col index overflow\n") ; 110 | */ 111 | 112 | return getggval (xxindex[row], col, val); 113 | 114 | } 115 | 116 | int 117 | getggval (int indindx, int col, double *val) 118 | // indindex is index in full array 119 | { 120 | SNP *cupt; 121 | int t, z; 122 | double y, mean; 123 | 124 | *val = 0; 125 | if (xindivmarkers[indindx]->ignore) 126 | return -1; 127 | cupt = xxsnps[col]; 128 | t = getgtypes (cupt, indindx); 129 | if (t < 0) 130 | return t; 131 | 132 | *val = gtable[col][t]; 133 | return t; 134 | 135 | } 136 | 137 | // Unpack lookup table 138 | 139 | // macro to unpack a single byte 140 | #define U(n) { ((n) >> 6) & 3, ((n) >> 4) & 3, ((n) >> 2) & 3, (n) & 3 } 141 | 142 | // macros to build the u(n)packi(n)g table 143 | #define U1(n) U(n), U((n) + 1), U((n) + 2), U((n) + 3) 144 | #define U2(n) U1(n), U1((n) + 4), U1((n) + 8), U1((n) + 12) 145 | #define U3(n) U2(n), U2((n) + 16), U2((n) + 32), U2((n) + 48) 146 | 147 | // the unpacking table 148 | static const uint8_t UL[256][4] = { U3 (0), U3 (64), U3 (128), U3 (192) }; 149 | 150 | size_t 151 | get_nrows () 152 | { 153 | return (xnrows); 154 | } 155 | 156 | size_t 157 | get_ncols () 158 | { 159 | return (xncols); 160 | } 161 | 162 | /** 163 | * Unpacks a SNP column 164 | * @param snp_index 165 | * @param *y arrayref to store data 166 | */ 167 | void 168 | kjg_geno_get_normalized_row (const size_t snp_index, double *y) 169 | { 170 | uint8_t *packed = xxsnps[snp_index]->pbuff; 171 | double *norm_lookup = gtable[snp_index]; 172 | 173 | size_t i = 0, j = xxindex[i]; 174 | while (1) { 175 | size_t k = j / 4; // packed location 176 | size_t jf = (k + 1) * 4; // last index in packed location 177 | 178 | uint8_t p = packed[k]; // packed data 179 | const uint8_t *u = UL[p]; // unpacked data 180 | 181 | while (j < jf) { 182 | size_t o = j % 4; // offset in packed data 183 | size_t t = u[o]; // unpacked data 184 | y[i] = norm_lookup[t]; // normalized data 185 | 186 | if (++i == xnrows) // move onto next entry 187 | return; // break if we are done with SNP 188 | j = xxindex[i]; // perform the lookup 189 | } 190 | } 191 | } 192 | 193 | /** 194 | * Unpacks several SNP coluns 195 | * @param snp_index index of the SNP 196 | * @param *unpacked arrayref to store data 197 | */ 198 | 199 | size_t 200 | kjg_geno_get_normalized_rows (const size_t i, const size_t r, double *Y) 201 | { 202 | size_t j; 203 | for (j = i; j < i + r && j < xncols; j++) { 204 | kjg_geno_get_normalized_row (j, Y); 205 | Y += xnrows; 206 | } 207 | return (j - i); 208 | } 209 | -------------------------------------------------------------------------------- /src/ksrc/kjg_gsl.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include "kjg_gsl.h" 12 | 13 | void 14 | kjg_gsl_matrix_fprintf (FILE * stream, gsl_matrix * m, const char *template) 15 | { 16 | size_t i, j; 17 | for (i = 0; i < m->size1; i++) 18 | { 19 | fprintf (stream, template, gsl_matrix_get (m, i, 0)); 20 | for (j = 1; j < m->size2; j++) 21 | { 22 | fprintf (stream, "\t"); 23 | fprintf (stream, template, gsl_matrix_get (m, i, j)); 24 | } 25 | fprintf (stream, "\n"); 26 | } 27 | } 28 | 29 | void 30 | kjg_gsl_matrix_fscanf (FILE * stream, gsl_matrix * m) 31 | { 32 | size_t i, j; 33 | double x; 34 | for (i = 0; i < m->size1; i++) 35 | { 36 | for (j = 0; j < m->size2; j++) 37 | { 38 | fscanf (stream, "%lg", &x); 39 | gsl_matrix_set (m, i, j, x); 40 | } 41 | } 42 | } 43 | 44 | void 45 | kjg_gsl_evec_fprintf (FILE * stream, 46 | gsl_vector * eval, 47 | gsl_matrix * evec, const char *template) 48 | { 49 | size_t i, j; 50 | fprintf (stream, "#"); 51 | fprintf (stream, template, gsl_vector_get (eval, 0)); 52 | for (i = 1; i < eval->size; i++) 53 | { 54 | fprintf (stream, "\t"); 55 | fprintf (stream, template, gsl_vector_get (eval, i)); 56 | } 57 | fprintf (stream, "\n"); 58 | kjg_gsl_matrix_fprintf (stream, evec, template); 59 | } 60 | 61 | int 62 | kjg_gsl_evec_fscanf (FILE * stream, gsl_vector * eval, gsl_matrix * evec) 63 | { 64 | size_t i, j; 65 | int r; 66 | double x; 67 | 68 | r = fscanf (stream, "#%lg", &x); 69 | if (r != 1) 70 | return (r); 71 | gsl_vector_set (eval, 0, x); 72 | 73 | for (i = 1; i < eval->size; i++) 74 | { 75 | r = fscanf (stream, "%lg", &x); 76 | if (r != 1) 77 | return (r); 78 | gsl_vector_set (eval, i, x); 79 | } 80 | 81 | for (i = 0; i < evec->size1; i++) 82 | { 83 | for (j = 0; j < evec->size2; j++) 84 | { 85 | r = fscanf (stream, "%lg", &x); 86 | if (r != 1) 87 | return (r); 88 | gsl_matrix_set (evec, i, j, x); 89 | } 90 | } 91 | 92 | return (0); 93 | } 94 | 95 | gsl_rng * 96 | kjg_gsl_rng_init () 97 | { 98 | const gsl_rng_type *T; 99 | gsl_rng *r; 100 | extern long seed; 101 | 102 | gsl_rng_env_setup (); 103 | 104 | gsl_rng_default_seed = seed; 105 | 106 | T = gsl_rng_default; 107 | r = gsl_rng_alloc (T); 108 | 109 | // fprintf (stderr, "generator type: %s\n", gsl_rng_name (r)); 110 | // fprintf (stderr, "seed = %lu\n", gsl_rng_default_seed); 111 | 112 | return (r); 113 | } 114 | 115 | int 116 | kjg_gsl_matrix_frobenius_normalize (gsl_matrix * m) 117 | { 118 | double s = kjg_gsl_dlange ('F', m); 119 | double d = m->size1 * m->size2; 120 | return (gsl_matrix_scale (m, d / s)); 121 | } 122 | 123 | double 124 | kjg_gsl_dlange (const char norm, const gsl_matrix * m) 125 | { 126 | return (LAPACKE_dlange (LAPACK_ROW_MAJOR, norm, m->size1, m->size2, m->data, 127 | m->tda)); 128 | } 129 | 130 | int 131 | kjg_gsl_dgeqrf (gsl_matrix * m, gsl_vector * tau) 132 | { 133 | return (LAPACKE_dgeqrf (LAPACK_ROW_MAJOR, m->size1, m->size2, m->data, 134 | m->tda, tau->data)); 135 | } 136 | 137 | int 138 | kjg_gsl_dorgqr (gsl_matrix * m, gsl_vector * tau) 139 | { 140 | return (LAPACKE_dorgqr (LAPACK_ROW_MAJOR, m->size2, m->size2, m->size2, 141 | m->data, m->tda, tau->data)); 142 | } 143 | 144 | void 145 | kjg_gsl_ran_ugaussian_pair (const gsl_rng * r, double x[2]) 146 | { 147 | double r2; 148 | 149 | do 150 | { 151 | /* choose x,y in uniform square (-1,-1) to (+1,+1) */ 152 | x[0] = -1 + 2 * gsl_rng_uniform_pos (r); 153 | x[1] = -1 + 2 * gsl_rng_uniform_pos (r); 154 | 155 | /* see if it is in the unit circle */ 156 | r2 = x[0] * x[0] + x[1] * x[1]; 157 | } 158 | while (r2 > 1.0 || r2 == 0); 159 | 160 | r2 = sqrt (-2.0 * log (r2) / r2); 161 | 162 | x[0] *= r2; 163 | x[1] *= r2; 164 | } 165 | 166 | void 167 | kjg_gsl_ran_ugaussian_matrix (const gsl_rng * r, gsl_matrix * m) 168 | { 169 | size_t i, j; 170 | double *data; 171 | double x, y, r2; 172 | 173 | for (i = 0; i < m->size1; i++) 174 | { 175 | data = gsl_matrix_ptr (m, i, 0); 176 | 177 | for (j = 0; j < m->size2 - 1; j += 2) 178 | { 179 | kjg_gsl_ran_ugaussian_pair (r, data); 180 | data += 2; 181 | } 182 | 183 | if (m->size2 % 2) 184 | *data = gsl_rng_uniform_pos (r); 185 | } 186 | } 187 | 188 | void 189 | kjg_gsl_matrix_QR (gsl_matrix * m) 190 | { 191 | gsl_vector *tau = gsl_vector_alloc (m->size2); 192 | kjg_gsl_dgeqrf (m, tau); 193 | kjg_gsl_dorgqr (m, tau); 194 | gsl_vector_free (tau); 195 | } 196 | 197 | int 198 | kjg_gsl_SVD (gsl_matrix * M, gsl_matrix * V, gsl_vector * S) 199 | { 200 | size_t big_enough = M->size1 + V->size2; 201 | double *superb = malloc (big_enough * sizeof (double)); 202 | double *U; 203 | int info = LAPACKE_dgesvd (LAPACK_ROW_MAJOR, // row major 204 | 'O', 'S', M->size1, M->size2, M->data, M->tda, 205 | S->data, U, 206 | big_enough, V->data, V->tda, superb); 207 | free (superb); 208 | return (info); 209 | } 210 | -------------------------------------------------------------------------------- /bin/smartpca.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use Getopt::Std ; 4 | use File::Basename ; 5 | 6 | ### process flags 7 | # -w poplist is compute eigenvectors using populations in poplist, then project 8 | # -y poplistplot is use populations in poplistplot for plot 9 | # -z badsnpfile is use badsnpname: badsnpfile in call to smartpca 10 | my @flaglist = ("i","a","b","k","o","p","e","l","m","q","t","s","w","y","z"); 11 | $x = @ARGV; 12 | for($n=0; $n<$x; $n++) 13 | { 14 | foreach $flag (@flaglist) 15 | { 16 | if($ARGV[$n] eq "-$flag") { $specified{$flag} = 1; } 17 | } 18 | } 19 | foreach $flag ("i","a","b","o","p","e","l") 20 | { 21 | unless($specified{$flag}) { die("OOPS -$flag flag not specified"); } 22 | } 23 | getopts('i:a:b:k:o:p:e:l:m:t:s:w:y:z:q:',\%opts); 24 | $i = $opts{"i"}; 25 | $a = $opts{"a"}; 26 | $b = $opts{"b"}; 27 | $k = 10; if($specified{"k"}) { $k = $opts{"k"}; } 28 | $o = $opts{"o"}; 29 | $q = 0; if($specified{"q"}) { $q = $opts{"q"}; } 30 | $p = $opts{"p"}; 31 | $e = $opts{"e"}; 32 | $l = $opts{"l"}; 33 | $m = 5; if($specified{"m"}) { $m = $opts{"m"}; } 34 | $t = 10; if($specified{"t"}) { $t = $opts{"t"}; } 35 | $s = 6.0; if($specified{"s"}) { $s = $opts{"s"}; } 36 | if($specified{"w"}) { $w = $opts{"w"}; } 37 | if($specified{"y"}) { $y = $opts{"y"}; } 38 | if($specified{"z"}) { $z = $opts{"z"}; } 39 | 40 | ### run smartpca 41 | $parfile = "$o.par"; 42 | $evec = "$o.evec"; 43 | open(PARFILE,">$parfile") || die("OOPS couldn't open file $parfile for writing"); 44 | print PARFILE ("genotypename: $i\n"); 45 | print PARFILE ("snpname: $a\n"); 46 | print PARFILE ("indivname: $b\n"); 47 | print PARFILE ("evecoutname: $evec\n"); 48 | print PARFILE ("evaloutname: $e\n"); 49 | print PARFILE ("altnormstyle: NO\n"); 50 | print PARFILE ("numoutevec: $k\n"); 51 | print PARFILE ("numoutlieriter: $m\n"); 52 | print PARFILE ("numoutlierevec: $t\n"); 53 | print PARFILE ("outliersigmathresh: $s\n"); 54 | print PARFILE ("qtmode: $q\n"); 55 | if($specified{"w"}) { print PARFILE ("poplistname: $w\n"); } 56 | if($specified{"z"}) { print PARFILE ("badsnpname: $z\n"); } 57 | close(PARFILE); 58 | $command = "smartpca"; # MUST put bin directory in path 59 | $command .= " -p $parfile >$l"; 60 | print("$command\n"); 61 | system("$command"); 62 | 63 | ### make string of populations for ploteig 64 | $popstring = ""; 65 | open(EVEC,$evec) || die("OOPS couldn't open file $evec for reading"); 66 | while($line = ) 67 | { 68 | chomp($line); 69 | my @array = split(/[\t ]+/,$line); 70 | $x = @array; 71 | if($array[1] =~ /eigvals/) { next; } # eigvals header line 72 | $pop = $array[$x-1]; 73 | if($popfound{$pop}) { next; } 74 | $popstring = $popstring . "$pop:"; 75 | $popfound{$pop} = 1; 76 | } 77 | close(EVEC); 78 | chop($popstring); # remove last ":" 79 | 80 | if($specified{"y"}) 81 | { 82 | ### make string of populations for ploteig based on -y flag input 83 | $popstring = ""; 84 | open(Y,$y) || die("COF"); 85 | while($line = ) 86 | { 87 | chomp($line); 88 | $popstring .= "$line:"; 89 | } 90 | chop($popstring); 91 | } 92 | 93 | ### cax ploteig 94 | $command = "ploteig"; # MUST put bin directory in path 95 | $command .= " -i $evec"; 96 | $command .= " -c 1:2 "; 97 | $command .= " -p $popstring "; 98 | $command .= " -x "; 99 | $command .= " -y "; 100 | $command .= " -o $p.xtxt "; # must end in .xtxt 101 | print("$command\n"); 102 | system("$command"); 103 | 104 | ### translate .evec file to .pca file expected by eigenstrat program 105 | ### Note: .evec file does not contain entries for outliers 106 | ### .pca file does contain entries (set to all 0.0) for outliers 107 | 108 | # ----- If this looks like a PLINK run, call the PLINK kludge 109 | if ( $i =~ m/\.ped$/ || $i =~ m/\.PED/ ) { 110 | $command = "evec2pca-ped.perl $k $evec $b $o"; 111 | } 112 | else { 113 | $command = "evec2pca.perl $k $evec $b $o"; 114 | } 115 | print("$command\n"); 116 | system("$command"); 117 | 118 | ### If labels are Case and Control only, compute correlations between 119 | ### Case/Control status and each eigenvector. Append to logfile. 120 | if(($popstring eq "Case:Control") || ($popstring eq "Control:Case")) 121 | { 122 | open(LOG,">>$l") || die("OOPS couldn't open file $l for appending"); 123 | print LOG ("\n"); 124 | for($x=0; $x<$k; $x++) # compute correlation for evec $x 125 | { 126 | open(EVEC,$evec) || die("OOPS couldn't open file $evec for reading"); 127 | $sum1=0; $sumx=0; $sumxx=0; $sumy=0; $sumyy=0; $sumxy=0; 128 | $line = ; chomp($line); # eigvals line 129 | while($line = ) 130 | { 131 | chomp($line); 132 | my @array = split(/[\t ]+/,$line); 133 | $this = $array[2+$x]; 134 | $sumy += $this; 135 | $sumyy += $this*$this; 136 | $sum1 += 1; 137 | if($line =~ /Case/) # Case is 1, Control is 0 138 | { 139 | $sumx += 1; 140 | $sumxx += 1; 141 | $sumxy += $this; 142 | } 143 | } 144 | close(EVEC); 145 | $meanx = $sumx/$sum1; 146 | $meany = $sumy/$sum1; 147 | if($sum1 == 0) { next; } 148 | $sdevx = sqrt($sumxx/$sum1 - $meanx*$meanx); 149 | $sdevy = sqrt($sumyy/$sum1 - $meany*$meany); 150 | if($sdevx * $sdevy == 0) { next; } 151 | $corr = ($sumxy/$sum1) / ($sdevx*$sdevy); 152 | $x1 = $x+1; 153 | printf LOG ("Correlation between eigenvector $x1 (of $k) and Case/Control status is %.03f\n",$corr); 154 | } 155 | close(LOG); 156 | } 157 | -------------------------------------------------------------------------------- /include/twtable.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file twtable.h 3 | * @brief Tracy-Widom distribution 4 | */ 5 | 6 | #ifndef INCLUDE_TWTABLE_H_ 7 | #define INCLUDE_TWTABLE_H_ 8 | const double TWXVAL[] = 9 | { -8.0, -7.9, -7.8, -7.7, -7.6, -7.5, -7.4, -7.3, -7.2, -7.1, -7.0, -6.9, 10 | -6.8, -6.7, -6.6, -6.5, -6.4, -6.3, -6.2, -6.1, -6.0, -5.9, -5.8, -5.7, -5.6, -5.5, -5.4, -5.3, 11 | -5.2, -5.1, -5.0, -4.9, -4.8, -4.7, -4.6, -4.5, -4.4, -4.3, -4.2, -4.1, -4.0, -3.9, -3.8, -3.7, 12 | -3.6, -3.5, -3.4, -3.3, -3.2, -3.1, -3.0, -2.9, -2.8, -2.7, -2.6, -2.5, -2.4, -2.3, -2.2, -2.1, 13 | -2.0, -1.9, -1.8, -1.7, -1.6, -1.5, -1.4, -1.3, -1.2, -1.1, -1.0, -0.9, -0.8, -0.7, -0.6, -0.5, 14 | -0.4, -0.3, -0.2, -0.1, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 15 | 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 16 | 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0, 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 5.0, 5.1, 17 | 5.2, 5.3, 5.4, 5.5, 5.6, 5.7, 5.8, 5.9, 6.0, 6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8, 6.9, 7.0, 18 | 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7, 7.8, 7.9, 8.0 19 | }; 20 | 21 | const double TWXTAIL[] = 22 | { 1.000000000, 1.000000000, 1.000000000, 1.000000000, 1.000000000, 23 | 1.000000000, 1.000000000, 0.999999999, 0.999999999, 0.999999997, 0.999999995, 0.999999989, 24 | 0.999999978, 0.999999958, 0.999999920, 0.999999849, 0.999999723, 0.999999498, 0.999999105, 25 | 0.999998431, 0.999997293, 0.999995401, 0.999992309, 0.999987331, 0.999979441, 0.999967125, 26 | 0.999948187, 0.999919496, 0.999876655, 0.999813597, 0.999722082, 0.999591101, 0.999406175, 27 | 0.999148569, 0.998794427, 0.998313849, 0.997669962, 0.996818016, 0.995704571, 0.994266851, 28 | 0.992432322, 0.990118582, 0.987233631, 0.983676579, 0.979338843, 0.974105853, 0.967859270, 29 | 0.960479677, 0.951849687, 0.941857369, 0.930399881, 0.917387157, 0.902745495, 0.886420892, 30 | 0.868381957, 0.848622271, 0.827162053, 0.804049066, 0.779358684, 0.753193114, 0.725679802, 31 | 0.696969061, 0.667231036, 0.636652122, 0.605430961, 0.573774198, 0.541892124, 0.509994383, 32 | 0.478285870, 0.446962951, 0.416210105, 0.386197065, 0.357076521, 0.328982392, 0.302028689, 33 | 0.276308949, 0.251896179, 0.228843301, 0.207183986, 0.186933854, 0.168091934, 0.150642330, 34 | 0.134556018, 0.119792709, 0.106302721, 0.094028817, 0.082907953, 0.072872924, 0.063853860, 35 | 0.055779577, 0.048578763, 0.042180992, 0.036517582, 0.031522284, 0.027131832, 0.023286351, 36 | 0.019929640, 0.017009350, 0.014477062, 0.012288293, 0.010402429, 0.008782605, 0.007395547, 37 | 0.006211384, 0.005203434, 0.004347977, 0.003624031, 0.003013114, 0.002499018, 0.002067590, 38 | 0.001706520, 0.001405143, 0.001154255, 0.000945945, 0.000773431, 0.000630927, 0.000513508, 39 | 0.000416999, 0.000337871, 0.000273152, 0.000220344, 0.000177359, 0.000142452, 0.000114170, 40 | 0.000091308, 0.000072871, 0.000058035, 0.000046124, 0.000036582, 0.000028955, 0.000022872, 41 | 0.000018030, 0.000014185, 0.000011138, 0.000008728, 0.000006826, 0.000005328, 0.000004151, 42 | 0.000003228, 0.000002505, 0.000001941, 0.000001501, 0.000001158, 0.000000892, 0.000000686, 43 | 0.000000527, 0.000000403, 0.000000308, 0.000000235, 0.000000179, 0.000000136, 0.000000104, 44 | 0.000000079, 0.000000059, 0.000000045, 0.000000034, 0.000000025, 0.000000019, 0.000000014, 45 | 0.000000011, 0.000000008 46 | }; 47 | 48 | const double TWXPDF[] = 49 | { 0.000000000, 0.000000000, 0.000000000, 0.000000000, 0.000000000, 50 | 0.000000001, 0.000000002, 0.000000005, 0.000000010, 0.000000019, 0.000000039, 0.000000076, 51 | 0.000000146, 0.000000276, 0.000000511, 0.000000932, 0.000001670, 0.000002942, 0.000005097, 52 | 0.000008683, 0.000014554, 0.000024005, 0.000038969, 0.000062279, 0.000098012, 0.000151923, 53 | 0.000231995, 0.000349097, 0.000517756, 0.000757035, 0.001091485, 0.001552137, 0.002177466, 54 | 0.003014256, 0.004118267, 0.005554591, 0.007397591, 0.009730295, 0.012643159, 0.016232112, 55 | 0.020595851, 0.025832397, 0.032034971, 0.039287325, 0.047658716, 0.057198759, 0.067932445, 56 | 0.079855636, 0.092931337, 0.107087044, 0.122213418, 0.138164458, 0.154759279, 0.171785501, 57 | 0.189004169, 0.206156009, 0.222968755, 0.239165233, 0.254471803, 0.268626779, 0.281388431, 58 | 0.292542221, 0.301906945, 0.309339558, 0.314738516, 0.318045543, 0.319245849, 0.318366852, 59 | 0.315475570, 0.310674866, 0.304098784, 0.295907232, 0.286280263, 0.275412215, 0.263505933, 60 | 0.250767272, 0.237400053, 0.223601597, 0.209558915, 0.195445624, 0.181419571, 0.167621190, 61 | 0.154172511, 0.141176787, 0.128718659, 0.116864772, 0.105664756, 0.095152500, 0.085347620, 62 | 0.076257058, 0.067876743, 0.060193257, 0.053185457, 0.046826015, 0.041082856, 0.035920459, 63 | 0.031301023, 0.027185487, 0.023534398, 0.020308645, 0.017470054, 0.014981856, 0.012809046, 64 | 0.010918644, 0.009279861, 0.007864200, 0.006645482, 0.005599836, 0.004705636, 0.003943413, 65 | 0.003295741, 0.002747112, 0.002283795, 0.001893694, 0.001566204, 0.001292071, 0.001063253, 66 | 0.000872795, 0.000714702, 0.000583831, 0.000475784, 0.000386816, 0.000313749, 0.000253894, 67 | 0.000204987, 0.000165125, 0.000132716, 0.000106431, 0.000085163, 0.000067996, 0.000054172, 68 | 0.000043066, 0.000034164, 0.000027045, 0.000021365, 0.000016843, 0.000013250, 0.000010403, 69 | 0.000008151, 0.000006374, 0.000004974, 0.000003874, 0.000003011, 0.000002336, 0.000001809, 70 | 0.000001398, 0.000001078, 0.000000830, 0.000000638, 0.000000489, 0.000000375, 0.000000286, 71 | 0.000000218, 0.000000166, 0.000000126, 0.000000096, 0.000000073, 0.000000055, 0.000000041, 72 | 0.000000031, 0.000000023 73 | }; 74 | 75 | const int TWTABSIZE = 161; 76 | 77 | #endif /* INCLUDE_TWTABLE_H_ */ 78 | -------------------------------------------------------------------------------- /src/eigensrc/eigsubs.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | 9 | /* ********************************************************************* */ 10 | void eigx_ (double *pmat, double *ev, int *n); 11 | void eigxv_ (double *pmat, double *eval, double *evec, int *n); 12 | void cdc_ (double *pmat, int *n); 13 | void inverse_ (double *pmat, int *n); 14 | void solve_ (double *pmat, double *v, int *n); 15 | void geneigsolve_ (double *pmat, double *qmat, double *eval, int *n); 16 | 17 | void packsym (double *pmat, double *mat, int n); 18 | 19 | 20 | void 21 | eigvals (double *mat, double *evals, int n) 22 | { 23 | double *pmat; 24 | int len; 25 | 26 | len = n * (n + 1); 27 | len /= 2; 28 | ZALLOC (pmat, len, double); 29 | 30 | vst (mat, mat, -1.0, n * n); 31 | packsym (pmat, mat, n); 32 | eigx_ (pmat, evals, &n); 33 | free (pmat); 34 | vst (mat, mat, -1.0, n * n); 35 | vst (evals, evals, -1.0, n); 36 | } 37 | 38 | void 39 | eigvecs (double *mat, double *evals, double *evecs, int n) 40 | { 41 | double *pmat; 42 | int len; 43 | 44 | len = n * (n + 1); 45 | len /= 2; 46 | ZALLOC (pmat, len, double); 47 | 48 | vst (mat, mat, -1.0, n * n); 49 | packsym (pmat, mat, n); 50 | 51 | eigxv_ (pmat, evals, evecs, &n); 52 | free (pmat); 53 | vst (mat, mat, -1.0, n * n); 54 | vst (evals, evals, -1.0, n); 55 | } 56 | 57 | /* note: dpotrf requires the entire matrix, not packed lower-tri */ 58 | void 59 | chdecomp (double *mat, int n) 60 | { 61 | /* symetric matrix - don't need to 62 | * convert to column major order */ 63 | 64 | cdc_ (mat, &n); 65 | } 66 | 67 | void 68 | inverse (double *mat, int n) 69 | { 70 | int i, j; 71 | 72 | /* convert to column-major order */ 73 | for (i = 0; i < n; i++) { 74 | for (j = 0; j < i; j++) { 75 | double t = mat[n * i + j]; 76 | mat[n * i + j] = mat[n * j + i]; 77 | mat[n * j + i] = t; 78 | } 79 | } 80 | 81 | /*** DEBUGGING: ***/ 82 | { 83 | FILE *fid = fopen ("eigsubs.dbg", "a"); 84 | fprintf (fid, "matrix U\n"); 85 | for (i = 0; i < n; i++) { 86 | for (j = 0; j < n; j++) { 87 | fprintf (fid, "%8.4f ", mat[i * n + j]); 88 | } 89 | fprintf (fid, "\n"); 90 | } 91 | } 92 | /*******************/ 93 | 94 | inverse_ (mat, &n); 95 | 96 | /*** DEBUGGING: ***/ 97 | { 98 | FILE *fid = fopen ("eigsubs.dbg", "a"); 99 | fprintf (fid, "inverse of matrix U\n"); 100 | for (i = 0; i < n; i++) { 101 | for (j = 0; j < n; j++) { 102 | fprintf (fid, "%8.4f ", mat[i * n + j]); 103 | } 104 | fprintf (fid, "\n"); 105 | } 106 | } 107 | /*******************/ 108 | 109 | for (i = 0; i < n; i++) { 110 | for (j = 0; j < i; j++) { 111 | double t = mat[n * i + j]; 112 | mat[n * i + j] = mat[n * j + i]; 113 | mat[n * j + i] = t; 114 | } 115 | } 116 | } 117 | 118 | void 119 | solve (double *mat, double *b, double *v, int n) 120 | { 121 | int i, j; 122 | 123 | double *mat2 = (double *) malloc (n * n * sizeof (double)); 124 | 125 | /* lapack is going to put the lu-decomp into the matrix, 126 | * so make a copy and convert to column-major order */ 127 | for (i = 0; i < n; i++) { 128 | for (j = 0; j < n; j++) { 129 | mat2[n * i + j] = mat[n * j + i]; 130 | } 131 | } 132 | 133 | /* copy b into v */ 134 | for (i = 0; i < n; i++) { 135 | v[i] = b[i]; 136 | } 137 | 138 | solve_ (mat2, v, &n); 139 | 140 | free (mat2); 141 | return; 142 | } 143 | 144 | void 145 | packsym (double *pmat, double *mat, int n) 146 | // lapack L mode (fortran) 147 | { 148 | int i, j, k = 0; 149 | for (i = 0; i < n; i++) { 150 | for (j = i; j < n; j++) { 151 | pmat[k] = mat[i * n + j]; 152 | ++k; 153 | } 154 | } 155 | } 156 | 157 | void 158 | geneigsolve (double *pmat, double *qmat, double *evec, double *eval, int n) 159 | { 160 | 161 | /* save copy of A and B, which LAPACK will overwrite */ 162 | double *amat = (double *) malloc (n * n * sizeof (double)); 163 | double *bmat = (double *) malloc (n * n * sizeof (double)); 164 | 165 | int i, j; 166 | for (i = 0; i < n * n; i++) { 167 | amat[i] = pmat[i]; 168 | bmat[i] = qmat[i]; 169 | } 170 | 171 | 172 | 173 | 174 | { 175 | FILE *fid = fopen ("eigsubs.dbg", "a"); 176 | fprintf (fid, "matrix A\n"); 177 | for (i = 0; i < n; i++) { 178 | for (j = 0; j < n; j++) { 179 | fprintf (fid, "%8.4f ", amat[i * n + j]); 180 | } 181 | fprintf (fid, "\n"); 182 | } 183 | 184 | fprintf (fid, "matrix B\n"); 185 | for (i = 0; i < n; i++) { 186 | for (j = 0; j < n; j++) { 187 | fprintf (fid, "%8.4f ", bmat[i * n + j]); 188 | } 189 | fprintf (fid, "\n"); 190 | } 191 | fclose (fid); 192 | } 193 | 194 | 195 | /* matrices have to be symetric-definite, so don't 196 | * need to convert to column-major order */ 197 | geneigsolve_ (pmat, qmat, eval, &n); 198 | 199 | 200 | /* copy eigenvectors to A and original A,B back */ 201 | /* ith eigenvector should be in row i */ 202 | for (i = 0; i < n; i++) { 203 | for (j = 0; j < n; j++) { 204 | evec[i * n + j] = pmat[i * n + j]; /* don't put back in row-major order (?) */ 205 | } 206 | } 207 | for (i = 0; i < n * n; i++) { 208 | pmat[i] = amat[i]; 209 | qmat[i] = bmat[i]; 210 | } 211 | 212 | /* LAPACK puts evals and evecs in ascending order */ 213 | /* reorder evals and evecs so evals are in descending order */ 214 | 215 | for (i = 0; i < n / 2; i++) { 216 | double t = eval[i]; 217 | eval[i] = eval[n - 1 - i]; 218 | eval[n - 1 - i] = t; 219 | 220 | for (j = 0; j < n; j++) { /* exchange row i and row(n-1-i) */ 221 | t = evec[i * n + j]; 222 | evec[i * n + j] = evec[(n - 1 - i) * n + j]; 223 | evec[(n - 1 - i) * n + j] = t; 224 | } 225 | } 226 | 227 | free (amat); 228 | free (bmat); 229 | 230 | } 231 | -------------------------------------------------------------------------------- /src/nicksrc/xsearch.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "xsearch.h" 6 | 7 | static ENTRY *xentry; 8 | static ENTRY **xxee; 9 | static int xxeenum = -1; 10 | static int xnum, xloaded; 11 | 12 | static int debug = NO; 13 | 14 | static int fancyhash = NO; 15 | 16 | 17 | /* ********************************************************************* */ 18 | 19 | void 20 | xhcreate (int n) 21 | { 22 | int t, i; 23 | if (xentry != NULL) { 24 | free (xentry); 25 | } 26 | if (n == 0) 27 | fatalx ("(xhcreate) zero length\n"); 28 | xnum = n; 29 | t = xnum % 17; 30 | if (t == 0) 31 | ++xnum; // for crude hash below 32 | ZALLOC (xentry, xnum, ENTRY); 33 | for (i = 0; i < xnum; i++) { 34 | xentry[i].key = NULL; 35 | } 36 | xloaded = 0; 37 | } 38 | 39 | void 40 | xhdestroy () 41 | { 42 | free (xentry); 43 | xentry = NULL; 44 | xnum = xloaded = 0; 45 | } 46 | 47 | ENTRY * 48 | xhsearch (ENTRY item, ACTION act) 49 | { 50 | 51 | ENTRY *itempt, *xtempt; 52 | int x; 53 | char *ccc; 54 | double yload; 55 | 56 | itempt = &item; 57 | ccc = itempt->key; 58 | x = xlookup (itempt->key, act); 59 | if (debug) 60 | printf ("lookup: %s %d\n", itempt->key, x); 61 | if ((x < 0) && (act == FIND)) 62 | return NULL; 63 | if ((x >= 0) && (act == FIND)) 64 | return xentry + x; 65 | if ((x < 0) && (act == ENTER)) 66 | fatalx ("duplicate key %s\n", itempt->key); 67 | 68 | xtempt = xentry + x; 69 | xtempt->key = itempt->key; 70 | xtempt->data = itempt->data; 71 | ++xloaded; 72 | yload = (double) xloaded / (double) xnum; 73 | if (yload > 0.9) 74 | fatalx ("excessive xsearch load\n"); 75 | return xtempt; 76 | } 77 | 78 | int 79 | xlookup (char *key, ACTION act) 80 | { 81 | ENTRY *xpt; 82 | int xbase, x, k; 83 | 84 | xbase = x = xhash (key); 85 | for (;;) { 86 | xpt = xentry + x; 87 | if (xpt->key == NULL) { 88 | if (act == FIND) 89 | return -1; 90 | return x; 91 | } 92 | k = strcmp (key, xpt->key); 93 | if (k == 0) { 94 | if (act == FIND) 95 | return x; 96 | return -1; 97 | } 98 | ++x; 99 | if (x >= xnum) 100 | x = 0; 101 | } 102 | } 103 | 104 | int 105 | xhash (char *key) 106 | { 107 | int t; 108 | t = stringhash (key); 109 | return abs (t) % xnum; 110 | } 111 | 112 | int 113 | stringhash (char *key) 114 | { 115 | 116 | #define MAXKEYLEN 512 117 | int xpack[MAXKEYLEN]; 118 | int len, wlen, w; 119 | unsigned char t; 120 | int thash = 7; 121 | int jmax, jmin; 122 | int i, j; 123 | 124 | if (key == NULL) 125 | return 13; 126 | len = strlen (key); 127 | if (len == 0) 128 | return 17; 129 | if (len >= MAXKEYLEN) 130 | fatalx ("key too long\n"); 131 | 132 | wlen = (len - 1) / 4; 133 | ++wlen; 134 | 135 | for (i = 0; i < wlen; ++i) { 136 | jmin = 4 * i; 137 | jmax = MIN (len - 1, jmin + 3); 138 | w = 0; 139 | for (j = jmin; j <= jmax; ++j) { 140 | t = (unsigned char) key[j]; 141 | w = (w << 8) ^ t; 142 | } 143 | xpack[i] = xcshift (w, i); 144 | } 145 | if (debug) 146 | printf ("zz %s %x %x\n", key, w, xpack[0]); 147 | for (i = 0; i < wlen; i++) { 148 | thash += xhash1 (xpack[i]); 149 | if (debug) 150 | printf ("zz2 %x\n", thash); 151 | thash = xcshift (thash, 3); 152 | } 153 | if (debug) 154 | printf ("key: %s hash: %x\n", key, thash); 155 | 156 | return thash; 157 | 158 | 159 | } 160 | 161 | int 162 | xhash1 (int ww) 163 | { 164 | 165 | int k, w, w1, w2; 166 | w = xcshift (ww, 17); 167 | if (fancyhash == NO) 168 | return 17 * w; 169 | for (k = 0; k < 3; ++k) { 170 | w1 = w >> 16; 171 | w2 = w << 16; 172 | w = w2 ^ xhash2 (w1) ^ (w2 >> 16); 173 | } 174 | return w; 175 | } 176 | 177 | int 178 | xhash2 (int x) 179 | { 180 | 181 | int xmax = 65535; 182 | int t; 183 | 184 | if (x == 0) 185 | return xmax; 186 | if (x == xmax) 187 | return 0; 188 | 189 | t = x * 11; 190 | return t % xmax; 191 | 192 | } 193 | 194 | int 195 | xcshift (int x, int shft) 196 | { 197 | int a, b; 198 | 199 | if (shft == 0) 200 | return x; 201 | a = x << shft; 202 | b = x >> (32 - shft); 203 | 204 | return a ^ b; 205 | 206 | } 207 | 208 | void 209 | xdestroy () 210 | { 211 | int i, num; 212 | ENTRY *pitem; 213 | 214 | if (xxee == NULL) 215 | return; 216 | num = xxeenum; 217 | for (i = 0; i < num; i++) { 218 | pitem = xxee[i]; 219 | if (pitem == NULL) 220 | continue; 221 | free (pitem->key); 222 | free (pitem->data); 223 | free (pitem); 224 | } 225 | free (xxee); 226 | xhdestroy (); 227 | } 228 | 229 | int 230 | xloadsearchx (char **ss, int n) 231 | { 232 | 233 | ENTRY item, *pitem; 234 | char xx[8]; 235 | int i, t; 236 | 237 | xhcreate (2 * n); 238 | ZALLOC (xxee, n, ENTRY *); 239 | xxeenum = n; 240 | for (i = 0; i < n; i++) { 241 | t = xlookup (ss[i], FIND); 242 | if (t >= 0) 243 | return i; 244 | ZALLOC (xxee[i], 1, ENTRY); 245 | pitem = xxee[i]; 246 | pitem->key = strdup (ss[i]); 247 | sprintf (xx, "%d", i); 248 | pitem->data = strdup (xx); 249 | xhsearch (*pitem, ENTER); 250 | } 251 | return -1; 252 | } 253 | 254 | void 255 | xloadsearch (char **ss, int n) 256 | { 257 | 258 | ENTRY item, *pitem; 259 | char xx[8]; 260 | int i; 261 | 262 | xhcreate (2 * n); 263 | ZALLOC (xxee, n, ENTRY *); 264 | xxeenum = n; 265 | for (i = 0; i < n; i++) { 266 | ZALLOC (xxee[i], 1, ENTRY); 267 | pitem = xxee[i]; 268 | pitem->key = strdup (ss[i]); 269 | sprintf (xx, "%d", i); 270 | pitem->data = strdup (xx); 271 | xhsearch (*pitem, ENTER); 272 | } 273 | } 274 | 275 | int 276 | xfindit (char *ss) 277 | { 278 | 279 | ENTRY item, *pitem; 280 | int k; 281 | 282 | item.key = ss; 283 | pitem = xhsearch (item, FIND); 284 | if (pitem == NULL) 285 | return -1; 286 | sscanf (pitem->data, "%d", &k); 287 | return k; 288 | 289 | } 290 | 291 | int 292 | finddup (char **ss, int n) 293 | { 294 | 295 | int t; 296 | 297 | t = xloadsearchx (ss, n); 298 | xdestroy (); 299 | return t; 300 | 301 | } 302 | -------------------------------------------------------------------------------- /src/nicksrc/twtable.c: -------------------------------------------------------------------------------- 1 | /** 2 | 3 | * @file twtable.c 4 | 5 | * @brief Tracy-Widom distribution 6 | 7 | */ 8 | 9 | 10 | const double TWXVAL[] = { 11 | -8.0, -7.9, -7.8, -7.7, -7.6, -7.5, -7.4, -7.3, -7.2, -7.1, 12 | -7.0, -6.9, 13 | -6.8, -6.7, -6.6, -6.5, -6.4, -6.3, -6.2, -6.1, 14 | -6.0, -5.9, -5.8, -5.7, -5.6, -5.5, -5.4, -5.3, 15 | -5.2, -5.1, 16 | -5.0, -4.9, -4.8, -4.7, -4.6, -4.5, -4.4, -4.3, -4.2, -4.1, 17 | -4.0, -3.9, -3.8, -3.7, 18 | -3.6, -3.5, -3.4, -3.3, -3.2, -3.1, 19 | -3.0, -2.9, -2.8, -2.7, -2.6, -2.5, -2.4, -2.3, -2.2, -2.1, 20 | 21 | -2.0, -1.9, -1.8, -1.7, -1.6, -1.5, -1.4, -1.3, -1.2, -1.1, 22 | -1.0, -0.9, -0.8, -0.7, -0.6, -0.5, 23 | -0.4, -0.3, -0.2, -0.1, 24 | 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 25 | 1.0, 1.1, 1.2, 1.3, 26 | 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 27 | 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 28 | 3.0, 3.1, 3.2, 29 | 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 30 | 4.0, 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 31 | 5.0, 5.1, 32 | 5.2, 5.3, 5.4, 5.5, 5.6, 5.7, 5.8, 5.9, 33 | 6.0, 6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8, 6.9, 34 | 7.0, 35 | 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7, 7.8, 7.9, 36 | 8.0 37 | }; 38 | 39 | const double TWXTAIL[] = { 40 | 1.000000000, 1.000000000, 1.000000000, 1.000000000, 1.000000000, 41 | 42 | 1.000000000, 1.000000000, 0.999999999, 0.999999999, 0.999999997, 43 | 0.999999995, 0.999999989, 44 | 0.999999978, 0.999999958, 0.999999920, 45 | 0.999999849, 0.999999723, 0.999999498, 0.999999105, 46 | 0.999998431, 47 | 0.999997293, 0.999995401, 0.999992309, 0.999987331, 0.999979441, 48 | 0.999967125, 49 | 0.999948187, 0.999919496, 0.999876655, 0.999813597, 50 | 0.999722082, 0.999591101, 0.999406175, 51 | 0.999148569, 0.998794427, 52 | 0.998313849, 0.997669962, 0.996818016, 0.995704571, 0.994266851, 53 | 54 | 0.992432322, 0.990118582, 0.987233631, 0.983676579, 0.979338843, 55 | 0.974105853, 0.967859270, 56 | 0.960479677, 0.951849687, 0.941857369, 57 | 0.930399881, 0.917387157, 0.902745495, 0.886420892, 58 | 0.868381957, 59 | 0.848622271, 0.827162053, 0.804049066, 0.779358684, 0.753193114, 60 | 0.725679802, 61 | 0.696969061, 0.667231036, 0.636652122, 0.605430961, 62 | 0.573774198, 0.541892124, 0.509994383, 63 | 0.478285870, 0.446962951, 64 | 0.416210105, 0.386197065, 0.357076521, 0.328982392, 0.302028689, 65 | 66 | 0.276308949, 0.251896179, 0.228843301, 0.207183986, 0.186933854, 67 | 0.168091934, 0.150642330, 68 | 0.134556018, 0.119792709, 0.106302721, 69 | 0.094028817, 0.082907953, 0.072872924, 0.063853860, 70 | 0.055779577, 71 | 0.048578763, 0.042180992, 0.036517582, 0.031522284, 0.027131832, 72 | 0.023286351, 73 | 0.019929640, 0.017009350, 0.014477062, 0.012288293, 74 | 0.010402429, 0.008782605, 0.007395547, 75 | 0.006211384, 0.005203434, 76 | 0.004347977, 0.003624031, 0.003013114, 0.002499018, 0.002067590, 77 | 78 | 0.001706520, 0.001405143, 0.001154255, 0.000945945, 0.000773431, 79 | 0.000630927, 0.000513508, 80 | 0.000416999, 0.000337871, 0.000273152, 81 | 0.000220344, 0.000177359, 0.000142452, 0.000114170, 82 | 0.000091308, 83 | 0.000072871, 0.000058035, 0.000046124, 0.000036582, 0.000028955, 84 | 0.000022872, 85 | 0.000018030, 0.000014185, 0.000011138, 0.000008728, 86 | 0.000006826, 0.000005328, 0.000004151, 87 | 0.000003228, 0.000002505, 88 | 0.000001941, 0.000001501, 0.000001158, 0.000000892, 0.000000686, 89 | 90 | 0.000000527, 0.000000403, 0.000000308, 0.000000235, 0.000000179, 91 | 0.000000136, 0.000000104, 92 | 0.000000079, 0.000000059, 0.000000045, 93 | 0.000000034, 0.000000025, 0.000000019, 0.000000014, 94 | 0.000000011, 95 | 0.000000008 96 | }; 97 | 98 | 99 | 100 | const double TWXPDF[] = { 101 | 0.000000000, 0.000000000, 0.000000000, 0.000000000, 0.000000000, 102 | 103 | 0.000000001, 0.000000002, 0.000000005, 0.000000010, 0.000000019, 104 | 0.000000039, 0.000000076, 105 | 0.000000146, 0.000000276, 0.000000511, 106 | 0.000000932, 0.000001670, 0.000002942, 0.000005097, 107 | 0.000008683, 108 | 0.000014554, 0.000024005, 0.000038969, 0.000062279, 0.000098012, 109 | 0.000151923, 110 | 0.000231995, 0.000349097, 0.000517756, 0.000757035, 111 | 0.001091485, 0.001552137, 0.002177466, 112 | 0.003014256, 0.004118267, 113 | 0.005554591, 0.007397591, 0.009730295, 0.012643159, 0.016232112, 114 | 115 | 0.020595851, 0.025832397, 0.032034971, 0.039287325, 0.047658716, 116 | 0.057198759, 0.067932445, 117 | 0.079855636, 0.092931337, 0.107087044, 118 | 0.122213418, 0.138164458, 0.154759279, 0.171785501, 119 | 0.189004169, 120 | 0.206156009, 0.222968755, 0.239165233, 0.254471803, 0.268626779, 121 | 0.281388431, 122 | 0.292542221, 0.301906945, 0.309339558, 0.314738516, 123 | 0.318045543, 0.319245849, 0.318366852, 124 | 0.315475570, 0.310674866, 125 | 0.304098784, 0.295907232, 0.286280263, 0.275412215, 0.263505933, 126 | 127 | 0.250767272, 0.237400053, 0.223601597, 0.209558915, 0.195445624, 128 | 0.181419571, 0.167621190, 129 | 0.154172511, 0.141176787, 0.128718659, 130 | 0.116864772, 0.105664756, 0.095152500, 0.085347620, 131 | 0.076257058, 132 | 0.067876743, 0.060193257, 0.053185457, 0.046826015, 0.041082856, 133 | 0.035920459, 134 | 0.031301023, 0.027185487, 0.023534398, 0.020308645, 135 | 0.017470054, 0.014981856, 0.012809046, 136 | 0.010918644, 0.009279861, 137 | 0.007864200, 0.006645482, 0.005599836, 0.004705636, 0.003943413, 138 | 139 | 0.003295741, 0.002747112, 0.002283795, 0.001893694, 0.001566204, 140 | 0.001292071, 0.001063253, 141 | 0.000872795, 0.000714702, 0.000583831, 142 | 0.000475784, 0.000386816, 0.000313749, 0.000253894, 143 | 0.000204987, 144 | 0.000165125, 0.000132716, 0.000106431, 0.000085163, 0.000067996, 145 | 0.000054172, 146 | 0.000043066, 0.000034164, 0.000027045, 0.000021365, 147 | 0.000016843, 0.000013250, 0.000010403, 148 | 0.000008151, 0.000006374, 149 | 0.000004974, 0.000003874, 0.000003011, 0.000002336, 0.000001809, 150 | 151 | 0.000001398, 0.000001078, 0.000000830, 0.000000638, 0.000000489, 152 | 0.000000375, 0.000000286, 153 | 0.000000218, 0.000000166, 0.000000126, 154 | 0.000000096, 0.000000073, 0.000000055, 0.000000041, 155 | 0.000000031, 156 | 0.000000023 157 | }; 158 | 159 | 160 | 161 | const int TWTABSIZE = 161; 162 | -------------------------------------------------------------------------------- /src/twsubs.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include "admutils.h" 7 | #include "eigsubs.h" 8 | 9 | /* ********************************************************************* */ 10 | 11 | extern int verbose; 12 | 13 | int twl2mode = YES; 14 | int mval = -1; 15 | int nval = -1; 16 | int numsamp = 100; 17 | double mul1 = 1.0; 18 | 19 | 20 | double xxlike (int m, double a, double var, double logsum, double lsum); 21 | double xxlikex (int m, double a, double logsum, double lsum); 22 | double xxliked (int m, double a, double logsum, double lsum); 23 | double xxliked2 (int m, double a, double logsum, double lsum); 24 | double oldtwestxx (double *lam, int m, double *pzn, double *pzvar); 25 | double doeig2 (double *vals, int m, double *pzn, double *ptw); 26 | 27 | double 28 | twestxx (double *lam, int m, double *pzn, double *pzvar) 29 | { 30 | double tw, y; 31 | 32 | if (twl2mode == NO) 33 | return oldtwestxx (lam, m, pzn, pzvar); 34 | (void) doeig2 (lam, m, pzn, &tw); 35 | 36 | y = (*pzn) * (double) m; 37 | *pzvar = asum (lam, m) / y; 38 | return tw; 39 | 40 | } 41 | 42 | double 43 | oldtwestxx (double *lam, int m, double *pzn, double *pzvar) 44 | { 45 | double lsum, logsum; 46 | double *ww; 47 | double a, p, yn, var; 48 | double ylike, ybase, y, ylmax, ynmax, yld, yld2, ainc, ym; 49 | int k; 50 | 51 | 52 | ZALLOC (ww, m, double); 53 | copyarr (lam, ww, m); 54 | lsum = asum (ww, m); 55 | vlog (ww, ww, m); 56 | logsum = asum (ww, m); 57 | 58 | ylmax = -1.0e20; 59 | yn = (double) m; 60 | ybase = xxlikex (m, yn, logsum, lsum); 61 | 62 | for (k = 1; k <= 100; ++k) { 63 | a = yn / 2.0; 64 | ylike = xxlikex (m, a, logsum, lsum); 65 | yld = xxliked (m, a, logsum, lsum); 66 | ylike -= ybase; 67 | if (verbose) 68 | printf ("ynloop %12.3f %12.3f %12.3f\n", yn / (double) m, ylike, yld); 69 | if (ylike < ylmax) 70 | break; 71 | ylmax = ylike; 72 | ynmax = yn; 73 | yn *= 1.1; 74 | } 75 | a = ynmax / 2.0; 76 | for (k = 1; k <= 10; ++k) { 77 | // newton iteration 78 | ylike = xxlikex (m, a, logsum, lsum); 79 | yld = xxliked (m, a, logsum, lsum); 80 | yld2 = xxliked2 (m, a, logsum, lsum); 81 | ylike -= ybase; 82 | ainc = -yld / yld2; 83 | a += ainc; 84 | if (verbose) 85 | printf ("newton: %3d %15.9f %15.9f %15.9f\n", k, ylike, yld, ainc); 86 | } 87 | fflush (stdout); 88 | yn = 2.0 * a; 89 | ym = (double) m; 90 | var = lsum / (2.0 * a * ym); 91 | 92 | *pzn = yn; 93 | *pzvar = var; 94 | 95 | free (ww); 96 | return 0; 97 | } 98 | 99 | double 100 | xxlike (int m, double a, double var, double logsum, double lsum) 101 | { 102 | double p, yl = 0.0; 103 | double ym, x; 104 | int j; 105 | 106 | p = 0.5 * (double) (m + 1); 107 | ym = (double) m; 108 | 109 | yl = -ym * a * log (2.0); 110 | for (j = 1; j <= m; ++j) { 111 | x = a - 0.5 * (double) (m - j); 112 | yl -= lgamma (x); 113 | } 114 | // so far this is log (C_L) normalizing constant 115 | yl -= ym * a * log (var); 116 | yl += (a - p) * logsum; 117 | yl -= lsum / (2.0 * var); 118 | 119 | return yl; 120 | 121 | } 122 | 123 | double 124 | xxlikex (int m, double a, double logsum, double lsum) 125 | { 126 | double p, yl = 0.0; 127 | double ym, x, var, lco; 128 | int j; 129 | 130 | p = 0.5 * (double) (m + 1); 131 | ym = (double) m; 132 | lco = lsum / (2.0 * ym); 133 | var = lco / a; 134 | 135 | 136 | yl = -ym * a * log (2.0); 137 | for (j = 1; j <= m; ++j) { 138 | x = a - 0.5 * (double) (m - j); 139 | yl -= lgamma (x); 140 | } 141 | // so far this is log (C_L) normalizing constant 142 | yl -= ym * a * log (var); 143 | yl += (a - p) * logsum; 144 | yl -= ym * a; // plugging in var 145 | 146 | return yl; 147 | 148 | } 149 | 150 | double 151 | xxliked (int m, double a, double logsum, double lsum) 152 | // first deriv wrt a 153 | { 154 | double p, yl = 0.0; 155 | double ym, x, var, vard; 156 | int j; 157 | 158 | p = 0.5 * (double) (m + 1); 159 | ym = (double) m; 160 | var = lsum / (2.0 * a * ym); 161 | vard = -var / a; 162 | 163 | 164 | yl = -ym * log (2.0); 165 | for (j = 1; j <= m; ++j) { 166 | x = a - 0.5 * (double) (m - j); 167 | if (x < 0.0) 168 | return 100.0; 169 | yl -= psi (x); 170 | } 171 | // so far this is log (C_L) normalizing constant 172 | yl -= ym * log (var); 173 | yl -= (ym * a / var) * vard; 174 | yl += logsum; 175 | yl -= ym; // plugging in var 176 | 177 | return yl; 178 | 179 | } 180 | 181 | double 182 | xxliked2 (int m, double a, double logsum, double lsum) 183 | // second deriv wrt a 184 | { 185 | double p, yl = 0.0; 186 | double ym, x, var, vard, vard2, y; 187 | int j; 188 | 189 | p = 0.5 * (double) (m + 1); 190 | ym = (double) m; 191 | var = lsum / (2.0 * a * ym); 192 | vard = -var / a; 193 | vard2 = 2.0 * var / (a * a); 194 | 195 | 196 | yl = 0.0; 197 | for (j = 1; j <= m; ++j) { 198 | x = a - 0.5 * (double) (m - j); 199 | if (x < 0.0) 200 | return 100.0; 201 | yl -= tau (x); 202 | } 203 | // so far this is log (C_L) normalizing constant 204 | yl -= 2.0 * (ym / var) * vard; 205 | yl -= (ym * a / var) * vard2; 206 | y = vard / var; 207 | yl += (ym * a) * y * y; 208 | 209 | return yl; 210 | 211 | } 212 | 213 | double 214 | doeig2 (double *vals, int m, double *pzn, double *ptw) 215 | { 216 | static int ncall = 0; 217 | double y, tw, tail; 218 | double zn, zvar, top, bot; 219 | double *evals; 220 | 221 | ++ncall; 222 | ZALLOC (evals, m, double); 223 | copyarr (vals, evals, m); 224 | y = (double) m / asum (evals, m); 225 | vst (evals, evals, y, m); 226 | top = (double) (m * (m + 2)); 227 | bot = asum2 (evals, m) - (double) m; 228 | zn = top / bot; 229 | y = evals[0] * zn; 230 | tw = twnorm (y, (double) m, zn); 231 | tail = twtail (tw); 232 | free (evals); 233 | *pzn = zn; 234 | *ptw = tw; 235 | return tail; 236 | } 237 | 238 | double 239 | rhoinv (double x, double gam) 240 | // Lee et al. page 5 for \rho^{-1} 241 | { 242 | double y1, y2; 243 | 244 | y1 = x + 1.0 - gam; 245 | y2 = y1 * y1 - 4.0 * x; 246 | if (y2 <= 0.0) 247 | return -1.0; 248 | 249 | y1 += sqrt (y2); 250 | 251 | return 0.5 * y1; 252 | 253 | } 254 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | ## shrinkmode added. 3/15 2 | EIGENSOFT version 7.2.1, 06/30/17 (for Linux only) 3 | 4 | The EIGENSOFT package implements methods from the following 2 papers: 5 | Patterson et al. 2006 PLoS Genet 2:e190 (population structure) 6 | Price et al. 2006 Nat Genet 38:904-9 (EIGENSTRAT stratification correction) 7 | 8 | NEW features of EIGENSOFT version 7.2.0 9 | -- shrinkmode 10 | 11 | NEW features of EIGENSOFT version 6.1.4 include: 12 | -- pcaselection was omitted from 6.1.3 by accident 13 | -- Statically linked GSL/openblas 14 | -- Fixed memory allocation bug in pcaselection 15 | -- Some routines moved into nicklib 16 | -- Error message on allocate failure now prints length as "%ld" 17 | supporting long values. 18 | 19 | NEW features of EIGENSOFT version 6.1.3 include: 20 | -- Restored script file extensions to .perl instead of .pl 21 | -- Added updated ploteig script that disappeared from the repository 22 | 23 | NEW features of EIGENSOFT version 6.1.2 include: 24 | -- Updated license info to be GPL compliant required by linking the GSL 25 | 26 | NEW features of EIGENSOFT version 6.1.1 include: 27 | -- Minor bug fix to correctly merge version 6.0.2 and version 6.1 changes. 28 | -- pcaselection operates on evec files. Added examples. 29 | -- Backported twtable.c/h from EIGENSOFT 7alpha 30 | 31 | NEW features of EIGENSOFT version 6.1 include: 32 | -- The range finding step of PCA fastmode only scales the multiplied matrix, 33 | as orthogonalization is unnecessary. This appears to improve accuracy. 34 | 35 | NEW features of EIGENSOFT version 6.0.2 include: 36 | -- Fixed Makefile and documentation to build eigenstrat properly 37 | -- Moved Tracy-Widom table into a header file for easier building 38 | 39 | NEW features of EIGENSOFT version 6.0.1 include: 40 | -- Minor bug fix which prevents smartpca from trying to print out eigenvalues 41 | if fastmode is set. 42 | 43 | NEW features of EIGENSOFT version 6.0.0beta included: 44 | -- New option fastmode which implements a very fast pca approximation. 45 | See POPGEN/README and Galinsky 2014 ASHG talk. 46 | -- Changes to external packages required. EIGENSOFT version 5.0.2 required 47 | lapack + blas. On the other hand, EIGENSOFT version 6.0beta requires 48 | GSL + lapack + OpenBLAS (but does not require the native version of blas). 49 | The Makefile has been changed accordingly. 50 | -- EIGENSOFT version 6.0beta supports multi-threading. See POPGEN/README. 51 | -- Bug fix for ldregress option. 52 | 53 | See CONVERTF/README for documentation of programs for converting file formats. 54 | See POPGEN/README for documentation of population structure programs. 55 | See EIGENSTRAT/README for documentation of EIGENSTRAT programs. 56 | 57 | Questions? 58 | See https://www.hsph.harvard.edu/alkes-price/eigensoft-frequently-asked-questions/ 59 | https://github.com/DReichLab/EIG 60 | 61 | For questions about building this software: 62 | Matthew Mah 63 | 64 | For questions about smartpca: 65 | Nick Patterson 66 | 67 | For questions about eigenstrat: 68 | Alkes Price 69 | 70 | Executables and source code: 71 | ---------------------------- 72 | All C executables are in the bin/ directory. 73 | 74 | We have placed source code for all C executables in the src/ directory, 75 | for users who wish to modify and recompile our programs. 76 | "cd src" 77 | "make" 78 | "make install" 79 | 80 | Note that some of our software will only compile if your system has the 81 | GSL + lapack + OpenBLAS packages installed. 82 | 83 | On Mac OSX, you can install gsl and OpenBLAS with lapack using homebrew: 84 | "brew install gsl" 85 | "brew install openblas" 86 | 87 | If these packages are not in standard directories, you can specify the 88 | appropriate include and library directories with the CFLAGS and LDFLAGS 89 | make variables. 90 | For example, on the Harvard Medical School O2 cluster, the command is: 91 | 'make CFLAGS="-I/n/app/openblas/0.2.19/include -I/n/app/gsl/2.3/include" LDFLAGS="-L/n/app/openblas/0.2.19/lib -L/n/app/gsl/2.3/lib/"' 92 | On Mac OSX: 93 | 'make CFLAGS="-I/usr/local/opt/openblas/include -I/usr/local/opt/gsl/include" LDFLAGS="-L/usr/local/opt/openblas/lib -L/usr/local/opt/gsl/lib"' 94 | 95 | If you have issues with missing lapacke symbols, for example "undefined reference to `LAPACKE_dlange'", run make with the corresponding additional libraries linked: 96 | 'make LDLIBS="-llapacke"' 97 | This has been encountered on Linux Mint 18. 98 | 99 | If you have trouble compiling and running our code, try compiling and 100 | running the pcatoy program in the src directory: 101 | "cd src" 102 | "make pcatoy" 103 | "./pcatoy" 104 | If you are unable to run the pcatoy program successfully, please contact 105 | your system administrator for help, as this is a systems issue which is 106 | beyond our scope. Your system administrator will be able to troubleshoot 107 | your systems issue using this trivial program. [You can also try running 108 | the pcatoy program in the bin directory, which we have already compiled.] 109 | 110 | To remake the entire package: 111 | "cd src" 112 | "make clobber" 113 | "make install" 114 | 115 | To remake EIG7.2 it is necessary to link to the OpenBLAS library. On orchestra, 116 | the path is /opt/openblas and should work automatically. On Broad institute machines, 117 | the user should execute "use .openblas-0.2.8" and "use GCC-4.9" at the command 118 | prompt before attempting to remake. All other users should install OpenBLAS and 119 | set the variable OPENBLAS to the path at the make command line, 120 | e.g. "make install OPENBLAS=/usr/local/openblas" 121 | 122 | ---------------------------- 123 | Acknowledgements: 124 | EIGENSOFT was written by Nick Patterson, Alkes Price, Samuela Pollack, 125 | Kevin Galinsky, Chris Chang, and Sasha Gusev. 126 | 127 | We thank John Novembre and Mike Boursnell for code improvements, Matt Hanna 128 | for the first implementation of multi-threading, and Angela Yu for a bugfix. 129 | 130 | ---------------------------- 131 | SOFTWARE COPYRIGHT NOTICE AGREEMENT 132 | This software and its documentation are copyright (2010) by Harvard University 133 | and The Broad Institute. All rights are reserved. This software is supplied 134 | without any warranty or guaranteed support whatsoever. Neither Harvard 135 | University nor The Broad Institute can be responsible for its use, misuse, or 136 | functionality. The software may be freely copied for non-commercial purposes, 137 | provided this copyright notice is retained. 138 | 139 | -------------------------------------------------------------------------------- /POPGEN/twtable: -------------------------------------------------------------------------------- 1 | ### this is a table of TW using a Runge-Kutta solver suggested by Per-Olaf Persson 2 | ### algorithm coded in C by NP using NAG ODE solver 3 | -8.000 1.000000000 0.000000000 4 | -7.900 1.000000000 0.000000000 5 | -7.800 1.000000000 0.000000000 6 | -7.700 1.000000000 0.000000000 7 | -7.600 1.000000000 0.000000000 8 | -7.500 1.000000000 0.000000001 9 | -7.400 1.000000000 0.000000002 10 | -7.300 0.999999999 0.000000005 11 | -7.200 0.999999999 0.000000010 12 | -7.100 0.999999997 0.000000019 13 | -7.000 0.999999995 0.000000039 14 | -6.900 0.999999989 0.000000076 15 | -6.800 0.999999978 0.000000146 16 | -6.700 0.999999958 0.000000276 17 | -6.600 0.999999920 0.000000511 18 | -6.500 0.999999849 0.000000932 19 | -6.400 0.999999723 0.000001670 20 | -6.300 0.999999498 0.000002942 21 | -6.200 0.999999105 0.000005097 22 | -6.100 0.999998431 0.000008683 23 | -6.000 0.999997293 0.000014554 24 | -5.900 0.999995401 0.000024005 25 | -5.800 0.999992309 0.000038969 26 | -5.700 0.999987331 0.000062279 27 | -5.600 0.999979441 0.000098012 28 | -5.500 0.999967125 0.000151923 29 | -5.400 0.999948187 0.000231995 30 | -5.300 0.999919496 0.000349097 31 | -5.200 0.999876655 0.000517756 32 | -5.100 0.999813597 0.000757035 33 | -5.000 0.999722082 0.001091485 34 | -4.900 0.999591101 0.001552137 35 | -4.800 0.999406175 0.002177466 36 | -4.700 0.999148569 0.003014256 37 | -4.600 0.998794427 0.004118267 38 | -4.500 0.998313849 0.005554591 39 | -4.400 0.997669962 0.007397591 40 | -4.300 0.996818016 0.009730295 41 | -4.200 0.995704571 0.012643159 42 | -4.100 0.994266851 0.016232112 43 | -4.000 0.992432322 0.020595851 44 | -3.900 0.990118582 0.025832397 45 | -3.800 0.987233631 0.032034971 46 | -3.700 0.983676579 0.039287325 47 | -3.600 0.979338843 0.047658716 48 | -3.500 0.974105853 0.057198759 49 | -3.400 0.967859270 0.067932445 50 | -3.300 0.960479677 0.079855636 51 | -3.200 0.951849687 0.092931337 52 | -3.100 0.941857369 0.107087044 53 | -3.000 0.930399881 0.122213418 54 | -2.900 0.917387157 0.138164458 55 | -2.800 0.902745495 0.154759279 56 | -2.700 0.886420892 0.171785501 57 | -2.600 0.868381957 0.189004169 58 | -2.500 0.848622271 0.206156009 59 | -2.400 0.827162053 0.222968755 60 | -2.300 0.804049066 0.239165233 61 | -2.200 0.779358684 0.254471803 62 | -2.100 0.753193114 0.268626779 63 | -2.000 0.725679802 0.281388431 64 | -1.900 0.696969061 0.292542221 65 | -1.800 0.667231036 0.301906945 66 | -1.700 0.636652122 0.309339558 67 | -1.600 0.605430961 0.314738516 68 | -1.500 0.573774198 0.318045543 69 | -1.400 0.541892124 0.319245849 70 | -1.300 0.509994383 0.318366852 71 | -1.200 0.478285870 0.315475570 72 | -1.100 0.446962951 0.310674866 73 | -1.000 0.416210105 0.304098784 74 | -0.900 0.386197065 0.295907232 75 | -0.800 0.357076521 0.286280263 76 | -0.700 0.328982392 0.275412215 77 | -0.600 0.302028689 0.263505933 78 | -0.500 0.276308949 0.250767272 79 | -0.400 0.251896179 0.237400053 80 | -0.300 0.228843301 0.223601597 81 | -0.200 0.207183986 0.209558915 82 | -0.100 0.186933854 0.195445624 83 | 0.000 0.168091934 0.181419571 84 | 0.100 0.150642330 0.167621190 85 | 0.200 0.134556018 0.154172511 86 | 0.300 0.119792709 0.141176787 87 | 0.400 0.106302721 0.128718659 88 | 0.500 0.094028817 0.116864772 89 | 0.600 0.082907953 0.105664756 90 | 0.700 0.072872924 0.095152500 91 | 0.800 0.063853860 0.085347620 92 | 0.900 0.055779577 0.076257058 93 | 1.000 0.048578763 0.067876743 94 | 1.100 0.042180992 0.060193257 95 | 1.200 0.036517582 0.053185457 96 | 1.300 0.031522284 0.046826015 97 | 1.400 0.027131832 0.041082856 98 | 1.500 0.023286351 0.035920459 99 | 1.600 0.019929640 0.031301023 100 | 1.700 0.017009350 0.027185487 101 | 1.800 0.014477062 0.023534398 102 | 1.900 0.012288293 0.020308645 103 | 2.000 0.010402429 0.017470054 104 | 2.100 0.008782605 0.014981856 105 | 2.200 0.007395547 0.012809046 106 | 2.300 0.006211384 0.010918644 107 | 2.400 0.005203434 0.009279861 108 | 2.500 0.004347977 0.007864200 109 | 2.600 0.003624031 0.006645482 110 | 2.700 0.003013114 0.005599836 111 | 2.800 0.002499018 0.004705636 112 | 2.900 0.002067590 0.003943413 113 | 3.000 0.001706520 0.003295741 114 | 3.100 0.001405143 0.002747112 115 | 3.200 0.001154255 0.002283795 116 | 3.300 0.000945945 0.001893694 117 | 3.400 0.000773431 0.001566204 118 | 3.500 0.000630927 0.001292071 119 | 3.600 0.000513508 0.001063253 120 | 3.700 0.000416999 0.000872795 121 | 3.800 0.000337871 0.000714702 122 | 3.900 0.000273152 0.000583831 123 | 4.000 0.000220344 0.000475784 124 | 4.100 0.000177359 0.000386816 125 | 4.200 0.000142452 0.000313749 126 | 4.300 0.000114170 0.000253894 127 | 4.400 0.000091308 0.000204987 128 | 4.500 0.000072871 0.000165125 129 | 4.600 0.000058035 0.000132716 130 | 4.700 0.000046124 0.000106431 131 | 4.800 0.000036582 0.000085163 132 | 4.900 0.000028955 0.000067996 133 | 5.000 0.000022872 0.000054172 134 | 5.100 0.000018030 0.000043066 135 | 5.200 0.000014185 0.000034164 136 | 5.300 0.000011138 0.000027045 137 | 5.400 0.000008728 0.000021365 138 | 5.500 0.000006826 0.000016843 139 | 5.600 0.000005328 0.000013250 140 | 5.700 0.000004151 0.000010403 141 | 5.800 0.000003228 0.000008151 142 | 5.900 0.000002505 0.000006374 143 | 6.000 0.000001941 0.000004974 144 | 6.100 0.000001501 0.000003874 145 | 6.200 0.000001158 0.000003011 146 | 6.300 0.000000892 0.000002336 147 | 6.400 0.000000686 0.000001809 148 | 6.500 0.000000527 0.000001398 149 | 6.600 0.000000403 0.000001078 150 | 6.700 0.000000308 0.000000830 151 | 6.800 0.000000235 0.000000638 152 | 6.900 0.000000179 0.000000489 153 | 7.000 0.000000136 0.000000375 154 | 7.100 0.000000104 0.000000286 155 | 7.200 0.000000079 0.000000218 156 | 7.300 0.000000059 0.000000166 157 | 7.400 0.000000045 0.000000126 158 | 7.500 0.000000034 0.000000096 159 | 7.600 0.000000025 0.000000073 160 | 7.700 0.000000019 0.000000055 161 | 7.800 0.000000014 0.000000041 162 | 7.900 0.000000011 0.000000031 163 | 8.000 0.000000008 0.000000023 164 | -------------------------------------------------------------------------------- /src/smarttables/twtable: -------------------------------------------------------------------------------- 1 | ### this is a table of TW using a Runge-Kutta solver suggested by Per-Olaf Persson 2 | ### algorithm coded in C by NP using NAG ODE solver 3 | ### arg r tail pdf 4 | -8.000 1.000000000 0.000000000 5 | -7.900 1.000000000 0.000000000 6 | -7.800 1.000000000 0.000000000 7 | -7.700 1.000000000 0.000000000 8 | -7.600 1.000000000 0.000000000 9 | -7.500 1.000000000 0.000000001 10 | -7.400 1.000000000 0.000000002 11 | -7.300 0.999999999 0.000000005 12 | -7.200 0.999999999 0.000000010 13 | -7.100 0.999999997 0.000000019 14 | -7.000 0.999999995 0.000000039 15 | -6.900 0.999999989 0.000000076 16 | -6.800 0.999999978 0.000000146 17 | -6.700 0.999999958 0.000000276 18 | -6.600 0.999999920 0.000000511 19 | -6.500 0.999999849 0.000000932 20 | -6.400 0.999999723 0.000001670 21 | -6.300 0.999999498 0.000002942 22 | -6.200 0.999999105 0.000005097 23 | -6.100 0.999998431 0.000008683 24 | -6.000 0.999997293 0.000014554 25 | -5.900 0.999995401 0.000024005 26 | -5.800 0.999992309 0.000038969 27 | -5.700 0.999987331 0.000062279 28 | -5.600 0.999979441 0.000098012 29 | -5.500 0.999967125 0.000151923 30 | -5.400 0.999948187 0.000231995 31 | -5.300 0.999919496 0.000349097 32 | -5.200 0.999876655 0.000517756 33 | -5.100 0.999813597 0.000757035 34 | -5.000 0.999722082 0.001091485 35 | -4.900 0.999591101 0.001552137 36 | -4.800 0.999406175 0.002177466 37 | -4.700 0.999148569 0.003014256 38 | -4.600 0.998794427 0.004118267 39 | -4.500 0.998313849 0.005554591 40 | -4.400 0.997669962 0.007397591 41 | -4.300 0.996818016 0.009730295 42 | -4.200 0.995704571 0.012643159 43 | -4.100 0.994266851 0.016232112 44 | -4.000 0.992432322 0.020595851 45 | -3.900 0.990118582 0.025832397 46 | -3.800 0.987233631 0.032034971 47 | -3.700 0.983676579 0.039287325 48 | -3.600 0.979338843 0.047658716 49 | -3.500 0.974105853 0.057198759 50 | -3.400 0.967859270 0.067932445 51 | -3.300 0.960479677 0.079855636 52 | -3.200 0.951849687 0.092931337 53 | -3.100 0.941857369 0.107087044 54 | -3.000 0.930399881 0.122213418 55 | -2.900 0.917387157 0.138164458 56 | -2.800 0.902745495 0.154759279 57 | -2.700 0.886420892 0.171785501 58 | -2.600 0.868381957 0.189004169 59 | -2.500 0.848622271 0.206156009 60 | -2.400 0.827162053 0.222968755 61 | -2.300 0.804049066 0.239165233 62 | -2.200 0.779358684 0.254471803 63 | -2.100 0.753193114 0.268626779 64 | -2.000 0.725679802 0.281388431 65 | -1.900 0.696969061 0.292542221 66 | -1.800 0.667231036 0.301906945 67 | -1.700 0.636652122 0.309339558 68 | -1.600 0.605430961 0.314738516 69 | -1.500 0.573774198 0.318045543 70 | -1.400 0.541892124 0.319245849 71 | -1.300 0.509994383 0.318366852 72 | -1.200 0.478285870 0.315475570 73 | -1.100 0.446962951 0.310674866 74 | -1.000 0.416210105 0.304098784 75 | -0.900 0.386197065 0.295907232 76 | -0.800 0.357076521 0.286280263 77 | -0.700 0.328982392 0.275412215 78 | -0.600 0.302028689 0.263505933 79 | -0.500 0.276308949 0.250767272 80 | -0.400 0.251896179 0.237400053 81 | -0.300 0.228843301 0.223601597 82 | -0.200 0.207183986 0.209558915 83 | -0.100 0.186933854 0.195445624 84 | 0.000 0.168091934 0.181419571 85 | 0.100 0.150642330 0.167621190 86 | 0.200 0.134556018 0.154172511 87 | 0.300 0.119792709 0.141176787 88 | 0.400 0.106302721 0.128718659 89 | 0.500 0.094028817 0.116864772 90 | 0.600 0.082907953 0.105664756 91 | 0.700 0.072872924 0.095152500 92 | 0.800 0.063853860 0.085347620 93 | 0.900 0.055779577 0.076257058 94 | 1.000 0.048578763 0.067876743 95 | 1.100 0.042180992 0.060193257 96 | 1.200 0.036517582 0.053185457 97 | 1.300 0.031522284 0.046826015 98 | 1.400 0.027131832 0.041082856 99 | 1.500 0.023286351 0.035920459 100 | 1.600 0.019929640 0.031301023 101 | 1.700 0.017009350 0.027185487 102 | 1.800 0.014477062 0.023534398 103 | 1.900 0.012288293 0.020308645 104 | 2.000 0.010402429 0.017470054 105 | 2.100 0.008782605 0.014981856 106 | 2.200 0.007395547 0.012809046 107 | 2.300 0.006211384 0.010918644 108 | 2.400 0.005203434 0.009279861 109 | 2.500 0.004347977 0.007864200 110 | 2.600 0.003624031 0.006645482 111 | 2.700 0.003013114 0.005599836 112 | 2.800 0.002499018 0.004705636 113 | 2.900 0.002067590 0.003943413 114 | 3.000 0.001706520 0.003295741 115 | 3.100 0.001405143 0.002747112 116 | 3.200 0.001154255 0.002283795 117 | 3.300 0.000945945 0.001893694 118 | 3.400 0.000773431 0.001566204 119 | 3.500 0.000630927 0.001292071 120 | 3.600 0.000513508 0.001063253 121 | 3.700 0.000416999 0.000872795 122 | 3.800 0.000337871 0.000714702 123 | 3.900 0.000273152 0.000583831 124 | 4.000 0.000220344 0.000475784 125 | 4.100 0.000177359 0.000386816 126 | 4.200 0.000142452 0.000313749 127 | 4.300 0.000114170 0.000253894 128 | 4.400 0.000091308 0.000204987 129 | 4.500 0.000072871 0.000165125 130 | 4.600 0.000058035 0.000132716 131 | 4.700 0.000046124 0.000106431 132 | 4.800 0.000036582 0.000085163 133 | 4.900 0.000028955 0.000067996 134 | 5.000 0.000022872 0.000054172 135 | 5.100 0.000018030 0.000043066 136 | 5.200 0.000014185 0.000034164 137 | 5.300 0.000011138 0.000027045 138 | 5.400 0.000008728 0.000021365 139 | 5.500 0.000006826 0.000016843 140 | 5.600 0.000005328 0.000013250 141 | 5.700 0.000004151 0.000010403 142 | 5.800 0.000003228 0.000008151 143 | 5.900 0.000002505 0.000006374 144 | 6.000 0.000001941 0.000004974 145 | 6.100 0.000001501 0.000003874 146 | 6.200 0.000001158 0.000003011 147 | 6.300 0.000000892 0.000002336 148 | 6.400 0.000000686 0.000001809 149 | 6.500 0.000000527 0.000001398 150 | 6.600 0.000000403 0.000001078 151 | 6.700 0.000000308 0.000000830 152 | 6.800 0.000000235 0.000000638 153 | 6.900 0.000000179 0.000000489 154 | 7.000 0.000000136 0.000000375 155 | 7.100 0.000000104 0.000000286 156 | 7.200 0.000000079 0.000000218 157 | 7.300 0.000000059 0.000000166 158 | 7.400 0.000000045 0.000000126 159 | 7.500 0.000000034 0.000000096 160 | 7.600 0.000000025 0.000000073 161 | 7.700 0.000000019 0.000000055 162 | 7.800 0.000000014 0.000000041 163 | 7.900 0.000000011 0.000000031 164 | 8.000 0.000000008 0.000000023 165 | -------------------------------------------------------------------------------- /include/admutils.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #define IDSIZE 40 5 | 6 | #ifndef ADMUTILS 7 | 8 | typedef struct 9 | { 10 | char ID[IDSIZE]; 11 | char *egroup; 12 | char gender; /* 'M' or 'F' */ 13 | double theta_mode; /* most likely theta on mesh */ 14 | double lambda_mode; /* mean of log10(lambda) from probability distribution */ 15 | 16 | double Xtheta_mode; /* most likely theta on mesh */ 17 | double Xlambda_mode; /* most likely lambda on mesh */ 18 | 19 | int idnum; 20 | int affstatus; /* affected status */ 21 | int ignore; /* YES => do not use */ 22 | int flag; 23 | double thetatrue; 24 | double Xthetatrue; 25 | double lambdatrue; 26 | double Xlambdatrue; 27 | double totgamms[3]; 28 | double totscore; 29 | double rawqval; 30 | double qval; 31 | } Indiv; 32 | 33 | typedef struct 34 | { 35 | char ID[IDSIZE]; 36 | int chrom; 37 | char cchrom[6]; 38 | double genpos; 39 | double physpos; 40 | double aa_cauc_freq; /* frequencies in ancestral pop to AA */ 41 | double aa_af_freq; 42 | double cauc_freq; /* frequencies of variant allele */ 43 | double af_freq; 44 | double cftrue; 45 | double aftrue; 46 | double aa_cftrue; 47 | double aa_aftrue; 48 | int markernum; /* marker number */ 49 | char *pbuff; 50 | char *ebuff; /* for random ethnic path */ 51 | int isfake; /* 1 if fake marker else 0 */ 52 | int isrfake; 53 | /* real marker treated as fake. Used for 2D scoring */ 54 | int ignore; 55 | int ngtypes; /* number of gtypes */ 56 | int *gtypes; 57 | int af_nn[2]; 58 | int cauc_nn[2]; 59 | double *modelscores; 60 | double *totmodelscores; 61 | double score; 62 | double weight; 63 | double estgenpos; 64 | double estdis; 65 | double dis; 66 | double esum; 67 | double lsum; 68 | double gpsum; 69 | double gpnum; 70 | void *pcupt; 71 | int tagnumber; 72 | char alleles[2]; 73 | int chimpfudge; 74 | } SNP; 75 | 76 | typedef struct 77 | { 78 | char ID[IDSIZE]; 79 | Indiv *father; 80 | Indiv *mother; 81 | Indiv *child; 82 | int findex; 83 | int mindex; 84 | int cindex; 85 | int base; 86 | } TRIO; 87 | // ?index into original Indiv array. base is work variable used in phasetrio to store init index in new array 88 | 89 | typedef struct 90 | { 91 | char gname[IDSIZE]; 92 | SNP **snpm; 93 | Indiv **indivm; 94 | int numsnps; 95 | int numindivs; 96 | int rlen; 97 | int fdes; 98 | int snpindex; 99 | unsigned char *buff; 100 | } genofile; 101 | 102 | typedef struct 103 | { 104 | double xd[4]; 105 | double xc[9]; 106 | double ap1; 107 | double ap2; 108 | double cp1; 109 | double cp2; 110 | double rpowersum; 111 | double crpowersum; 112 | double gammasum[2]; 113 | double gammanum[2]; 114 | int pubfmodern; 115 | } SNPMC; 116 | // gammasum for cases/controls 117 | #endif 118 | #define ADMUTILS 119 | 120 | void loadstats (FILE * statsfile, Indiv * indiv_array, int *numindivs); 121 | void loadXstats (FILE * Xstatsfile, Indiv * indiv_array, int numindivs, 122 | int *numloaded); 123 | 124 | void sett1 (double *tt, double theta, int numstates); 125 | void sett1r (double *tt, double theta, int numstate, double risks); 126 | void gettln (SNP * cupt, Indiv * indx, 127 | double *ptheta, double *plambda, int *pnumstates, int *pignore); 128 | 129 | void puttln (SNP * cupt, Indiv * indx, double theta, double lambda); 130 | 131 | 132 | /* UTILITY FUNCTIONS */ 133 | 134 | int countcol (char *fname); 135 | int countcolumns (FILE * fp); 136 | 137 | void fataly (const char *name); 138 | int compare_doubles (const void *a, const void *b); 139 | 140 | void pcheck (char *name, char x); 141 | void printm (double **M, int numstates); 142 | int numvalids (Indiv * indx, SNP ** snpmarkers, int fc, int lc); 143 | void gethpos (int *fc, int *lc, SNP ** snpm, int numsnps, 144 | int xchrom, int lo, int hi); 145 | int numvalidgtypes (SNP * cupt); 146 | double malefreq (Indiv ** indivmarkers, int numindivs); 147 | int isimatch (int a, int b); 148 | void makedir (char *dirname); 149 | int indxindex (char **namelist, int len, char *strid); 150 | int indindex (Indiv ** indivmarkers, int numindivs, char *indid); 151 | int snpindex (SNP ** snpmarkers, int numsnps, char *snpid); 152 | void inddupcheck (Indiv ** indivmarkers, int numindivs); 153 | void freesnpindex (); 154 | int ignoresnp (SNP * cupt); 155 | double entrop (double *a, int n); 156 | double xxlog2 (double t); 157 | void testnan (double *a, int n); 158 | void hap2dip (SNP * cupt); 159 | void flipalleles (SNP * cupt); 160 | void flipalleles_phased (SNP * cupt); 161 | int getgtypes (SNP * cupt, int k); 162 | void putgtypes (SNP * cupt, int k, int val); 163 | int getep (SNP * cupt, int k); 164 | void putep (SNP * cupt, int k, int val); 165 | int hasharr (char **xarr, int nxarr); 166 | void wbuff (unsigned char *buff, int num, int g); 167 | int rbuff (unsigned char *buff, int num); 168 | int ridfile (char *fname); 169 | double hwcheck (SNP * cupt, double *cc); 170 | double hwcheckx (SNP * cupt, Indiv ** indm, double *cc); 171 | void cntit (double *xc, SNP * cupt1, SNP * cupt2); 172 | // dup routines 173 | void setfastdupnum (int num); 174 | void setfastdupthresh (double thresh, double kill); 175 | void killxhets (SNP ** snpmarkers, Indiv ** indivmarkers, int numsnps, 176 | int numindivs); 177 | void fastdupcheck (SNP ** snpmarkers, Indiv ** indivmarkers, int numsnps, 178 | int numindivs); 179 | int grabgtypes (int *gtypes, SNP * cupt, int numindivs); 180 | int kcode (int *w, int len, int base); 181 | void cdup (SNP ** snpm, Indiv ** indm, int nsnp, int *buff, int lbuff); 182 | void printdup (SNP ** snpm, int nsnp, Indiv * inda, Indiv * indb, int nmatch, 183 | int nnomatch); 184 | void killdup (Indiv * inda, Indiv * indb, SNP ** snpm, int nsnp); 185 | double kurtosis (double *a, int n); 186 | int getlist (char *name, char **list); 187 | void printvers (char *progname, char *vers); 188 | int numvalidind (Indiv ** indivmarkers, int numind); 189 | void numvalidgtallind (int *x, SNP ** snpm, int numsnps, int numind); 190 | int numvalidgtind (SNP ** snpm, int numsnps, int ind); 191 | int numvalidgt (Indiv ** indivmarkers, SNP * cupt); 192 | int numvalidgtx (Indiv ** indivmarkers, SNP * cupt, int affst); 193 | int isxmale (SNP * cupt, Indiv * indx); 194 | 195 | void printmatz (double *ww, char **eglist, int n); 196 | void printmatz5 (double *ww, char **eglist, int n); 197 | void printmatz10 (double *ww, char **eglist, int n); 198 | char *get3 (char *ss); 199 | char *getshort (char *ss, int n); 200 | 201 | 202 | #undef max 203 | #define max(A,B) ((A) > (B) ? (A) : (B)) 204 | 205 | #define MAXNUMR 200 206 | // max number models 207 | 208 | #define CNULL '\0' 209 | -------------------------------------------------------------------------------- /bin/ploteig: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/perl -w 2 | 3 | ### ploteig -i eigfile -p pops -c a:b [-t title] [-s stem] [-g gstem] [-o outfile] [-x] [-k] [-y] [-z sep] -r colorstring -m xmul -n ymul 4 | use Getopt::Std ; 5 | use File::Basename ; 6 | 7 | ## pops : separated -x = make postscript and pdf -z use another separator 8 | ## -k keep intermediate files 9 | ## NEW if pops is a file names are read one per line 10 | ## scaling on x, y axes 11 | 12 | getopts('i:o:p:c:s:d:z:t:r:m:n:g:xkyq',\%opts) ; 13 | $postscmode = $opts{"x"} ; 14 | $oldkeystyle = $opts{"y"} ; 15 | $nopoptitle = $opts{"q"} ; 16 | $kflag = $opts{"k"} ; 17 | $keepflag = 1 if ($kflag) ; 18 | $keepflag = 1 unless ($postscmode) ; 19 | 20 | $zsep = ":" ; 21 | if (defined $opts{"z"}) { 22 | $zsep = $opts{"z"} ; 23 | $zsep = "\+" if ($zsep eq "+") ; 24 | } 25 | 26 | if (defined $opts{"r"}) { 27 | $colorstr = $opts{"r"} ; 28 | setcolor($colorstr) ; 29 | } 30 | $xmul = $opts{"m"} ; 31 | $xmul = 1 unless (defined $xmul) ; 32 | 33 | $ymul = $opts{"n"} ; 34 | $ymul = 1 unless (defined $ymul) ; 35 | 36 | $title = "" ; 37 | if (defined $opts{"t"}) { 38 | $title = $opts{"t"} ; 39 | } 40 | if (defined $opts{"i"}) { 41 | $infile = $opts{"i"} ; 42 | } 43 | else { 44 | usage() ; 45 | exit 0 ; 46 | } 47 | 48 | open (FF, $infile) || die "can't open $infile\n" ; 49 | @L = () ; 50 | chomp @L ; 51 | $nf = 0 ; 52 | foreach $line (@L) { 53 | next if ($line =~ /\#/) ; 54 | @Z = split " ", $line ; 55 | $x = @Z ; 56 | $nf = $x if ($nf < $x) ; 57 | } 58 | printf "## number of fields: %d\n", $nf ; 59 | $popcol = $nf-1 ; 60 | 61 | 62 | if (defined $opts{"p"}) { 63 | $pops = $opts{"p"} ; 64 | } 65 | else { 66 | die "p parameter compulsory\n" ; 67 | } 68 | 69 | $popsname = setpops ($pops) ; 70 | print "$popsname\n" ; 71 | 72 | $c1 = 1; $c2 =2 ; 73 | if (defined $opts{"c"}) { 74 | $cols = $opts{"c"} ; 75 | ($c1, $c2) = split ":", $cols ; 76 | die "bad c param: $cols\n" unless (defined $cols) ; 77 | } 78 | 79 | $stem = "$infile.$c1:$c2" ; 80 | if (defined $opts{"s"}) { 81 | $stem = $opts{"s"} ; 82 | } 83 | 84 | if (defined $opts{"g"}) { 85 | $stem = $gstem = $opts{"g"} ; 86 | } 87 | 88 | $gstem = "$stem.$popsname" unless (defined $gstem) ; 89 | $gnfile = "$gstem.xtxt" ; 90 | 91 | if (defined $opts{"o"}) { 92 | $gnfile = $opts{"o"} ; 93 | } 94 | 95 | 96 | @T = () ; ## trash 97 | open (GG, ">$gnfile") || die "can't open $gnfile\n" ; 98 | print GG "## " unless ($postscmode) ; 99 | print GG "set terminal postscript color noenhanced\n" ; 100 | print GG "set title \"$title\" \n" ; 101 | print GG "set key outside\n" unless ($oldkeystyle) ; 102 | print GG "set xlabel \"eigenvector $c1\" \n" if ($xmul == 1) ; 103 | print GG "set xlabel \"eigenvector $c1 (* $xmul) \" \n" if ($xmul != 1) ; 104 | print GG "set ylabel \"eigenvector $c2\" \n" if ($ymul == 1) ; 105 | print GG "set ylabel \"eigenvector $c1 (* $ymul) \" \n" if ($ymul != 1) ; 106 | print GG "plot " ; 107 | $np = @P ; 108 | $lastpop = $P[$np-1] ; 109 | $d1 = $c1+1 ; 110 | $d2 = $c2+1 ; 111 | foreach $pop (@P) { 112 | $dfile = "$stem:$pop" ; 113 | push @T, $dfile ; 114 | print GG " \"$dfile\" using (\$$d1)*$xmul:(\$$d2)*$ymul " ; 115 | print GG "notitle " if (defined $nopoptitle) ; 116 | print GG "title \"$pop\" " unless (defined $nopoptitle) ; 117 | if (defined $COL{$pop}) { 118 | $color = $COL{$pop} ; 119 | print GG "lt rgb \"$color\" " ; 120 | } 121 | print GG ", \\\n" unless ($pop eq $lastpop) ; 122 | open (YY, ">$dfile") || die "can't open $dfile\n" ; 123 | foreach $line (@L) { 124 | next if ($line =~ /\#/) ; 125 | @Z = split " ", $line ; 126 | next unless (defined $Z[$popcol]) ; 127 | next unless ($Z[$popcol] eq $pop) ; 128 | print YY "$line\n" ; 129 | } 130 | close YY ; 131 | } 132 | print GG "\n" ; 133 | print GG "## " if ($postscmode) ; 134 | print GG "pause 9999\n" ; 135 | close GG ; 136 | 137 | if ($postscmode) { 138 | $psfile = "$stem.ps" ; 139 | 140 | if ($gnfile =~ /xtxt/) { 141 | $psfile = $gnfile ; 142 | $psfile =~ s/xtxt/ps/ ; 143 | } 144 | system "gnuplot < $gnfile > $psfile" ; 145 | system "/home/np29/bin/fixgreen $psfile" ; 146 | system "ps2pdf $psfile " ; 147 | } 148 | unlink (@T) unless $keepflag ; 149 | 150 | sub setcolor { 151 | my ($colorst) = @_ ; 152 | local ($cp, $pop, $color, @CP, $line) ; 153 | if (-r $colorst) { 154 | open (C1, $colorst) || die "can't open $colorst\n" ; 155 | foreach $line () { 156 | chomp $line ; 157 | ($pop, $color) = split " ", $line ; 158 | next if ($pop =~ /\#/) ; 159 | next unless (defined $color) ; 160 | print STDERR "setting color for $pop to $color\n" ; 161 | $COL{$pop} = $color ; 162 | } 163 | close C1 ; 164 | return ; 165 | } 166 | 167 | @CP = split " ", $colorst ; 168 | foreach $cp (@CP) { 169 | ($pop, $color) = split ":", $cp ; 170 | $COL{$pop} = $color ; 171 | } 172 | } 173 | 174 | sub usage { 175 | 176 | print "ploteig -i eigfile -p pops -c a:b [-t title] [-s stem] [-o outfile] [-x] [-k] -c colorstringh [-m xmul] [-n ymul]\n" ; 177 | print "-i eigfile input file first col indiv-id last col population\n" ; 178 | print "## as output by smartpca in outputvecs \n" ; 179 | print "-c a:b a, b columns to plot. 1:2 would be common and leading 2 eigenvectors\n" ; 180 | print "-p pops Populations to plot. : delimited. eg -p Bantu:San:French\n" ; 181 | print "## pops can also be a filename. List populations 1 per line\n" ; 182 | print "[-s stem] stem will start various output files\n" ; 183 | print "[-o ofile] ofile will be gnuplot control file. Should have xtxt suffix\n"; 184 | print "[-x] make ps and pdf files\n" ; 185 | print "[-k] keep various intermediate files although -x set\n" ; 186 | print "## necessary if .xtxt file is to be hand edited\n" ; 187 | print "[-r colorstringpairs or colorstringfile]\n" ; 188 | print "[-g gstem] make gstem.xtxt gstem.ps gstem.pdf \n" ; 189 | print "[-y] put key at top right inside box (old mode)\n" ; 190 | print "[-t] title (legend)\n" ; 191 | 192 | print "The xtxt file is a gnuplot file and can be easily hand edited. Intermediate files 193 | needed if you want to make your own plot\n" ; 194 | 195 | } 196 | sub setpops { 197 | my ($pops) = @_ ; 198 | local (@a, $d, $b, $e) ; 199 | 200 | if (-e $pops) { 201 | open (FF1, $pops) || die "can't open $pops\n" ; 202 | @P = () ; 203 | foreach $line () { 204 | ($a) = split " ", $line ; 205 | next unless (defined $a) ; 206 | next if ($a =~ /\#/) ; 207 | push @P, $a ; 208 | } 209 | $out = join ":", @P ; 210 | print "## pops: $out\n" ; 211 | ($b, $d , $e) = fileparse($pops) ; 212 | return $b ; 213 | } 214 | @P = split $zsep, $pops ; 215 | return $pops ; 216 | 217 | } 218 | -------------------------------------------------------------------------------- /include/mcio.h: -------------------------------------------------------------------------------- 1 | #ifndef _MCIO_ 2 | #define _MCIO_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | #define MAXSTR 512 14 | #define LONGSTR 10000 15 | #define MAXFF 50 16 | #define MAXCH 100 17 | #define MTCHROM 90 18 | #define XYCHROM 91 19 | #define BADCHROM 99 20 | #define GDISMUL 1000000 21 | // multiplier for gdis to make integer for sort 22 | 23 | enum outputmodetype 24 | { 25 | ANCESTRYMAP, 26 | EIGENSTRAT, 27 | PED, 28 | PACKEDPED, 29 | PACKEDANCESTRYMAP 30 | }; 31 | 32 | 33 | typedef struct 34 | { 35 | char ID[IDSIZE]; 36 | double gpos; 37 | double ppos; 38 | int chrom; 39 | char cchrom[6]; 40 | int nn[4]; 41 | int ignore; 42 | int isrfake; 43 | char alleles[2]; 44 | int inputrow; 45 | int cuptnum; 46 | int chimpfudge; 47 | } SNPDATA; 48 | 49 | 50 | int numfakes (SNPDATA ** snpraw, int *snpindx, int nreal, double spacing); 51 | double nextmesh (double val, double spacing); 52 | double interp (double l, double r, double x, double al, double ar); 53 | 54 | int 55 | loadsnps (SNP ** snpm, SNPDATA ** snpraw, 56 | int *snpindx, int nreal, double spacing, int *numignore); 57 | 58 | int readsnpdata (SNPDATA ** snpraw, char *fname); 59 | int readinddata (Indiv ** indivmarkers, char *fname); 60 | int readindpeddata (Indiv ** indivmarkers, char *fname); 61 | void pedname (char *name, char *sx0, char *sx1); 62 | 63 | int readtldata (Indiv ** indivmarkers, int numind, char *fname); 64 | int readindval (Indiv ** indivmarkers, int numind, char *fname); 65 | int readfreqdata (SNP ** snpm, int numsnps, char *fname); 66 | void clearsnp (SNP * cupt); 67 | int rmindivs (SNP ** snpm, int numsnps, Indiv ** indivmarkers, int numindivs); 68 | int rmsnps (SNP ** snpm, int numsnps, char *deletesnpoutname); 69 | void clearind (Indiv ** indm, int numind); 70 | void cleartg (Indiv ** indm, int nind); 71 | 72 | double mknn (int *nn, int n0, int n1); 73 | void clearsnpord (); 74 | int getsnps (char *snpfname, SNP *** snpmarkpt, double spacing, 75 | char *badsnpname, int *nignore, int numrisks); 76 | int getsizex (char *fname); 77 | int getindivs (char *indivfname, Indiv *** indmarkpt); 78 | 79 | int setstatus (Indiv ** indm, int numindivs, char *smatch); 80 | int setstatusv (Indiv ** indm, int numindivs, char *smatch, int val); 81 | 82 | long getgenos (char *genoname, SNP ** snpmarkers, Indiv ** indivmarkers, 83 | int numsnps, int numindivs, int nignore); 84 | void getgenos_list (char *genotypelist, SNP ** snpmarkers, 85 | Indiv ** indivmarkers, int numsnps, int numindivs, 86 | int nignore); 87 | void printsnps (char *snpoutfilename, SNP ** snpm, int num, Indiv ** indm, 88 | int printfake, int printvalids); 89 | int checkxval (SNP * cupt, Indiv * indx, int val); 90 | void printdata (char *genooutfilename, char *indoutfilename, 91 | SNP ** snpm, Indiv ** indiv, int numsnps, int numind, 92 | int packmode); 93 | int readgdata (Indiv ** indivmarkers, int numind, char *gname); 94 | int numvalidind (Indiv ** indivmarkers, int numind); 95 | int numvalidgtind (SNP ** snpm, int numsnps, int ind); 96 | int numvalidgt (Indiv ** indivmarkers, SNP * cupt); 97 | int numvalidgtx (Indiv ** indivmarkers, SNP * cupt, int affst); 98 | int getweights (char *fname, SNP ** snpm, int numsnps); 99 | void outpack (char *genooutfilename, SNP ** snpm, Indiv ** indiv, int numsnps, 100 | int numind); 101 | int ispack (char *gname); 102 | int iseigenstrat (char *gname); 103 | void inpack (char *genooutfilename, SNP ** snpm, Indiv ** indiv, int numsnps, 104 | int numind); 105 | int inpack2 (char *genooutfilename, SNP ** snpm, Indiv ** indiv, int numsnps, 106 | int numind); 107 | int ineigenstrat (char *genooutfilename, SNP ** snpm, Indiv ** indiv, 108 | int numsnps, int numind); 109 | void setepath (SNP ** snpm, int n); 110 | void clearepath (char *xpack); 111 | long bigread (int fdes, char *packg, long numbytes); 112 | 113 | // pedfile support 114 | int getpedgenos (char *genoname, SNP ** snpmarkers, Indiv ** indivmarkers, 115 | int numsnps, int numindivs, int nignore); 116 | void genopedcnt (char *genoname, int **gcounts, int nsnp); 117 | 118 | int pedval (char *sx); 119 | int xpedval (char c); 120 | int ptoachrom (char *ss); 121 | 122 | void setgref (int **gcounts, int nsnp, int *gvar, int *gref); 123 | void cleargdata (SNP ** snpmarkers, int numsnps, int numindivs); 124 | void setgenotypename (char **gname, char *iname); 125 | void settersemode (int mode); 126 | 127 | void dobadsnps (SNPDATA ** snpraw, int nreal, char *badsnpname); 128 | int snprawindex (SNPDATA ** snpraw, int nreal, char *sname); 129 | int readsnpmapdata (SNPDATA ** snpraw, char *fname); 130 | int checkfake (char *ss); 131 | void setbadpedignore (); 132 | int setsdpos (SNPDATA * sdpt, int pos); 133 | 134 | void 135 | outeigenstrat (char *snpname, char *indname, char *gname, 136 | SNP ** snpm, Indiv ** indiv, int numsnps, int numind); 137 | 138 | void 139 | outped (char *snpname, char *indname, char *gname, 140 | SNP ** snpm, Indiv ** indiv, int numsnps, int numind, int ogmode); 141 | 142 | void 143 | outpackped (char *snpname, char *indname, char *gname, SNP ** snpm, 144 | Indiv ** indiv, int numsnps, int numind, int ogmode); 145 | 146 | void setbedbuff (char *buff, int *gtypes, int numind); 147 | int bedval (int g); 148 | int str2chrom (char *ss); 149 | 150 | void outindped (char *indname, Indiv ** indiv, int numind, int ogmode); 151 | 152 | void printmap (char *snpname, SNP ** snpm, int numsnps, Indiv ** indiv); 153 | 154 | int maxlinelength (char *fname); 155 | int checksize (int numindivs, int numsnps, enum outputmodetype outputmode); 156 | 157 | void setomode (enum outputmodetype *outmode, char *omode); 158 | 159 | void 160 | outfiles (char *snpname, char *indname, char *gname, SNP ** snpm, 161 | Indiv ** indiv, int numsnps, int numind, int packem, int ogmode); 162 | 163 | void snpdecimate (SNP ** snpm, int nsnp, int decim, int mindis, int maxdis); 164 | void decimate (SNP ** cbuff, int n, int decim, int mindis, int maxdis); 165 | int vvadjust (double *cc, int n, double *pmean); 166 | int killhir2 (SNP ** snpm, int numsnps, int numind, double physlim, 167 | double genlim, double rhothresh); 168 | void freecupt (SNP ** cupt); 169 | void freeped (); 170 | void cntpops (int *count, Indiv ** indm, int numindivs, char **eglist, 171 | int numeg); 172 | void printalleles (SNP * cupt, FILE * fff); 173 | char *getpackgenos (); 174 | void clearpackgenos (); 175 | void setchr (int mode); 176 | void setchimpmode (int mode); 177 | 178 | int genoopenit (genofile ** gfile, char *geno2name, SNP ** snp2m, 179 | Indiv ** indiv2m, int numsnp2, int numindiv2, int nignore); 180 | int genoreadit (genofile * gfile, SNP ** pcupt); 181 | void genocloseit (genofile * gfile); 182 | 183 | void putped (int num); 184 | void getped (int num); 185 | 186 | void logdeletedsnp (char *snpname, char *cmnt, char *deletesnpoutname); 187 | void sortsnps (SNP ** snpa, SNP ** snpb, int n); 188 | void setpordercheck (int mode); 189 | void putsnpordered (int mode); 190 | int getsnpordered (); 191 | 192 | 193 | #endif 194 | -------------------------------------------------------------------------------- /src/nicksrc/sortit.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "strsubs.h" 6 | #include "sortit.h" 7 | #include "vsubs.h" 8 | 9 | /** 10 | a simple sort routine 11 | */ 12 | 13 | static double *ttt; 14 | static long *lttt; 15 | static int *ittt; 16 | static int **pttt; 17 | static int plen = 0; 18 | static int *porder = NULL; 19 | 20 | void 21 | setorder (int *pp, int rlen) 22 | { 23 | int *tt; 24 | 25 | if (plen > 0) { 26 | if (porder != NULL) 27 | free (porder); 28 | } 29 | 30 | if (pp == NULL) { 31 | porder = NULL; 32 | plen = rlen; 33 | return; 34 | } 35 | ZALLOC (porder, rlen, int); 36 | ZALLOC (tt, rlen, int); 37 | copyiarr (pp, tt, rlen); 38 | isortit (tt, porder, rlen); 39 | free (tt); 40 | plen = rlen; 41 | } 42 | 43 | double 44 | median (double *aa, int len) 45 | // should be O(len) algorithm 46 | { 47 | double *b, y; 48 | int t, x, a, n; 49 | 50 | ZALLOC (b, len, double); 51 | n = 0; 52 | for (a = 0; a < len; ++a) { 53 | y = aa[a]; 54 | if (isfinite (y)) { 55 | b[n] = y; 56 | ++n; 57 | } 58 | } 59 | if (n == 0) 60 | fatalx ("(median) no valids\n"); 61 | if (n == 1) 62 | return b[0]; 63 | if (n == 2) 64 | return 0.5 * (b[0] + b[1]); 65 | sortit (b, NULL, n); 66 | t = n % 2; 67 | x = n / 2; 68 | y = b[x]; 69 | if (t == 0) 70 | y = 0.5 * (b[x] + b[x - 1]); 71 | 72 | free (b); 73 | // printf("zzmed: %d %d %d %9.3f\n", len, n, x, y) ; 74 | return y; 75 | 76 | 77 | } 78 | 79 | void 80 | sortit (double *a, int *ind, int len) 81 | { 82 | int i, k; 83 | int *inda; 84 | 85 | if (len == 0) 86 | fatalx ("(sortit) len = 0\n"); 87 | ZALLOC (ttt, len, double); 88 | ZALLOC (inda, len, int); 89 | 90 | for (i = 0; i < len; i++) { 91 | inda[i] = i; 92 | } 93 | 94 | copyarr (a, ttt, len); 95 | qsort ((int *) inda, len, sizeof (int), 96 | (int (*)(const void *, const void *)) compit); 97 | 98 | for (i = 0; i < len; i++) { 99 | k = inda[i]; 100 | a[i] = ttt[k]; 101 | } 102 | free (ttt); 103 | if (ind != NULL) 104 | copyiarr (inda, ind, len); 105 | free (inda); 106 | } 107 | 108 | int 109 | compit (int *a1, int *a2) 110 | { 111 | if (ttt[*a1] < ttt[*a2]) 112 | return -1; 113 | if (ttt[*a1] > ttt[*a2]) 114 | return 1; 115 | return 0; 116 | } 117 | 118 | void 119 | lsortit (long *a, int *ind, int len) 120 | { 121 | int i, k; 122 | int *inda; 123 | 124 | if (len == 0) 125 | fatalx ("(lsortit) len = 0\n"); 126 | ZALLOC (lttt, len, long); 127 | ZALLOC (inda, len, int); 128 | 129 | for (i = 0; i < len; i++) { 130 | inda[i] = i; 131 | } 132 | 133 | copylarr (a, lttt, len); 134 | qsort ((int *) inda, len, sizeof (int), 135 | (int (*)(const void *, const void *)) lcompit); 136 | 137 | for (i = 0; i < len; i++) { 138 | k = inda[i]; 139 | a[i] = lttt[k]; 140 | } 141 | free (lttt); 142 | if (ind != NULL) 143 | copyiarr (inda, ind, len); 144 | free (inda); 145 | } 146 | 147 | void 148 | isortit (int *a, int *ind, int len) 149 | { 150 | int i, k; 151 | int *inda; 152 | 153 | if (len == 0) 154 | fatalx ("(isortit) len = 0\n"); 155 | ZALLOC (ittt, len, int); 156 | ZALLOC (inda, len, int); 157 | 158 | for (i = 0; i < len; i++) { 159 | inda[i] = i; 160 | } 161 | 162 | copyiarr (a, ittt, len); 163 | qsort ((int *) inda, len, sizeof (int), 164 | (int (*)(const void *, const void *)) icompit); 165 | 166 | for (i = 0; i < len; i++) { 167 | k = inda[i]; 168 | a[i] = ittt[k]; 169 | } 170 | free (ittt); 171 | if (ind != NULL) 172 | copyiarr (inda, ind, len); 173 | free (inda); 174 | } 175 | 176 | int 177 | lcompit (int *a1, int *a2) 178 | { 179 | if (lttt[*a1] < lttt[*a2]) 180 | return -1; 181 | if (lttt[*a1] > lttt[*a2]) 182 | return 1; 183 | return 0; 184 | } 185 | 186 | int 187 | icompit (int *a1, int *a2) 188 | { 189 | if (ittt[*a1] < ittt[*a2]) 190 | return -1; 191 | if (ittt[*a1] > ittt[*a2]) 192 | return 1; 193 | return 0; 194 | } 195 | 196 | void 197 | invperm (int *a, int *b, int n) 198 | { 199 | 200 | /** 201 | a, b can be same 202 | */ 203 | int i, j; 204 | int *x; 205 | 206 | if (n == 0) 207 | return; 208 | ZALLOC (x, n, int); 209 | 210 | ivclear (x, -1, n); 211 | for (i = 0; i < n; i++) { 212 | j = b[i]; 213 | x[j] = i; 214 | } 215 | copyiarr (x, a, n); 216 | free (x); 217 | } 218 | 219 | void 220 | ipsortit (int **a, int *ind, int len, int rlen) 221 | { 222 | ipsortitp (a, ind, len, rlen, NULL); 223 | 224 | } 225 | 226 | void 227 | ipsortitp (int **a, int *ind, int len, int rlen, int *order) 228 | 229 | /** 230 | sort integer array pointers 231 | rows of array are sorted in lexicographical order 232 | 233 | compiarr can be called outside the sort 234 | */ 235 | { 236 | int i, k; 237 | int *inda; 238 | 239 | if (len == 0) 240 | fatalx ("(ipsortit) len = 0\n"); 241 | ZALLOC (pttt, len, int *); 242 | ZALLOC (inda, len, int); 243 | 244 | setorder (order, rlen); // order defines order as sorted in ascending order. 245 | 246 | for (i = 0; i < len; i++) { 247 | if (a[i] == NULL) 248 | fatalx ("(ipsortit) array pointer %d NULL\n", i); 249 | inda[i] = i; 250 | } 251 | 252 | copyiparr (a, pttt, len); 253 | qsort ((int *) inda, len, 254 | sizeof (int), (int (*)(const void *, const void *)) ipcompit); 255 | 256 | for (i = 0; i < len; i++) { 257 | k = inda[i]; 258 | a[i] = pttt[k]; 259 | } 260 | if (ind != NULL) 261 | copyiarr (inda, ind, len); 262 | free (inda); 263 | free (pttt); 264 | } 265 | 266 | int 267 | ipcompit (int *a1, int *a2) 268 | { 269 | int l; 270 | l = compiarr (pttt[*a1], pttt[*a2], plen); 271 | return l; 272 | } 273 | 274 | int 275 | compiarr (int *a, int *b, int len) 276 | { 277 | int i, k; 278 | for (i = 0; i < len; i++) { 279 | k = i; 280 | if (porder != NULL) 281 | k = porder[i]; 282 | if (a[k] < b[k]) 283 | return -1; 284 | if (a[k] > b[k]) 285 | return 1; 286 | } 287 | return 0; 288 | } 289 | 290 | int 291 | comparr (double *a, double *b, int len) 292 | { 293 | int i, k; 294 | for (i = 0; i < len; i++) { 295 | k = i; 296 | if (porder != NULL) 297 | k = porder[i]; 298 | if (a[k] < b[k]) 299 | return -1; 300 | if (a[k] > b[k]) 301 | return 1; 302 | } 303 | return 0; 304 | } 305 | 306 | 307 | void 308 | mkirank (int *rank, int *xin, int n) 309 | // faster to call isortit 310 | { 311 | double *a; 312 | 313 | ZALLOC (a, n, double); 314 | floatit (a, xin, n); 315 | mkrank (rank, a, n); 316 | free (a); 317 | } 318 | 319 | void 320 | mkrank (int *rank, double *xin, int n) 321 | 322 | /** rank 0:n-1 323 | largest element k has rank[k] = 0, smallest, rank[k] = n-1 324 | */ 325 | { 326 | int i; 327 | double *a; 328 | int *ind; 329 | 330 | ZALLOC (a, n, double); 331 | ZALLOC (ind, n, int); 332 | 333 | vst (a, xin, -1.0, n); 334 | sortit (a, ind, n); 335 | 336 | for (i = 0; i < n; i++) { 337 | rank[ind[i]] = i; 338 | } 339 | 340 | free (a); 341 | free (ind); 342 | 343 | 344 | } 345 | -------------------------------------------------------------------------------- /src/eigensrc/eigx.c: -------------------------------------------------------------------------------- 1 | // LAPACK-using version of eigensrc/eigx.f, easier to build on OS X 2 | // Christopher Chang (chrchang@alumni.caltech.edu), BGI Cognitive Genomics Lab 3 | 4 | #include 5 | #include 6 | 7 | #ifdef __APPLE__ 8 | #include 9 | #else 10 | 11 | #if __LP64__ 12 | typedef int __CLPK_integer; 13 | #else 14 | typedef long int __CLPK_integer; 15 | #endif 16 | typedef double __CLPK_doublereal; 17 | 18 | int dspev_ (char *jobz, char *uplo, __CLPK_integer * n, 19 | __CLPK_doublereal * ap, __CLPK_doublereal * w, 20 | __CLPK_doublereal * z__, __CLPK_integer * ldz, 21 | __CLPK_doublereal * work, __CLPK_integer * info); 22 | 23 | int dpotrf_ (char *uplo, __CLPK_integer * n, __CLPK_doublereal * a, 24 | __CLPK_integer * lda, __CLPK_integer * info); 25 | 26 | int dgetrf_ (__CLPK_integer * m, __CLPK_integer * n, __CLPK_doublereal * a, 27 | __CLPK_integer * lda, __CLPK_integer * ipiv, 28 | __CLPK_integer * info); 29 | 30 | int dgetri_ (__CLPK_integer * n, __CLPK_doublereal * a, __CLPK_integer * lda, 31 | __CLPK_integer * ipiv, __CLPK_doublereal * work, 32 | __CLPK_integer * lwork, __CLPK_integer * info); 33 | 34 | int dgetrs_ (char *trans, __CLPK_integer * n, __CLPK_integer * nrhs, 35 | __CLPK_doublereal * a, __CLPK_integer * lda, 36 | __CLPK_integer * ipiv, __CLPK_doublereal * b, 37 | __CLPK_integer * ldb, __CLPK_integer * info); 38 | 39 | int dsygv_ (__CLPK_integer * itype, char *jobz, char *uplo, 40 | __CLPK_integer * n, __CLPK_doublereal * a, __CLPK_integer * lda, 41 | __CLPK_doublereal * b, __CLPK_integer * ldb, 42 | __CLPK_doublereal * w, __CLPK_doublereal * work, 43 | __CLPK_integer * lwork, __CLPK_integer * info); 44 | #endif // end !__APPLE__ 45 | 46 | void 47 | mem_error () 48 | { 49 | fprintf (stderr, "CM\n"); 50 | exit (1); 51 | } 52 | 53 | void 54 | inverse_error (char *procname, int info) 55 | { 56 | if (info < 0) { 57 | fprintf (stderr, "error (%s): illegal argument %d\n", procname, -info); 58 | } 59 | else { 60 | fprintf (stderr, "error (%s): singular matrix %d\n", procname, info); 61 | } 62 | exit (1); 63 | } 64 | 65 | void 66 | eigx_ (double *pmat, double *ev, __CLPK_integer * n) 67 | { 68 | char jobz = 'N'; 69 | char uplo = 'L'; 70 | __CLPK_integer ldz = *n; 71 | __CLPK_integer info; 72 | double *z; 73 | double *work; 74 | z = (double *) malloc (ldz * ldz * sizeof (double)); 75 | if (!z) { 76 | mem_error (); 77 | } 78 | work = (double *) malloc (3 * ldz * sizeof (double)); 79 | if (!work) { 80 | free (z); 81 | mem_error (); 82 | } 83 | dspev_ (&jobz, &uplo, n, pmat, ev, z, &ldz, work, &info); 84 | free (z); 85 | free (work); 86 | if (info) { 87 | #if __LP64__ 88 | fprintf (stderr, "INFO: %d\n", info); 89 | #else 90 | fprintf (stderr, "INFO: %ld\n", info); 91 | #endif 92 | exit (1); 93 | } 94 | } 95 | 96 | void 97 | eigxv_ (double *pmat, double *eval, double *evec, __CLPK_integer * n) 98 | { 99 | char jobz = 'V'; 100 | char uplo = 'L'; 101 | __CLPK_integer ldz = *n; 102 | __CLPK_integer info; 103 | double *work = (double *) malloc (3 * ldz * sizeof (double)); 104 | if (!work) { 105 | mem_error (); 106 | } 107 | dspev_ (&jobz, &uplo, n, pmat, eval, evec, &ldz, work, &info); 108 | free (work); 109 | if (info) { 110 | #if __LP64__ 111 | fprintf (stderr, "INFO: %d\n", info); 112 | #else 113 | fprintf (stderr, "INFO: %ld\n", info); 114 | #endif 115 | exit (1); 116 | } 117 | } 118 | 119 | void 120 | cdc_ (double *pmat, __CLPK_integer * n) 121 | { 122 | char uplo = 'L'; 123 | __CLPK_integer lda = *n; 124 | __CLPK_integer info; 125 | dpotrf_ (&uplo, n, pmat, &lda, &info); 126 | if (info) { 127 | if (info < 0) { 128 | #if __LP64__ 129 | fprintf (stderr, "error (CDC): illegal argument %d\n", -info); 130 | #else 131 | fprintf (stderr, "error (CDC): illegal argument %ld\n", -info); 132 | #endif 133 | } 134 | else { 135 | #if __LP64__ 136 | fprintf (stderr, "error (CDC): minor not positive definite %d\n", info); 137 | #else 138 | fprintf (stderr, "error (CDC): minor not positive definite %ld\n", 139 | info); 140 | #endif 141 | } 142 | exit (1); 143 | } 144 | } 145 | 146 | void 147 | inverse_ (double *pmat, __CLPK_integer * n) 148 | { 149 | __CLPK_integer lwork = (*n) * (*n); 150 | __CLPK_integer info; 151 | __CLPK_integer *ipiv; 152 | double *work; 153 | ipiv = (__CLPK_integer *) malloc ((*n) * sizeof (__CLPK_integer)); 154 | if (!ipiv) { 155 | mem_error (); 156 | } 157 | work = (double *) malloc (lwork * sizeof (double)); 158 | if (!work) { 159 | free (ipiv); 160 | mem_error (); 161 | } 162 | dgetrf_ (n, n, pmat, n, ipiv, &info); 163 | if (info) { 164 | free (ipiv); 165 | free (work); 166 | inverse_error ("INVERSE", info); 167 | exit (1); 168 | } 169 | dgetri_ (n, pmat, n, ipiv, work, &lwork, &info); 170 | free (ipiv); 171 | free (work); 172 | if (info) { 173 | inverse_error ("INVERSE", info); 174 | } 175 | } 176 | 177 | void 178 | solve_ (double *pmat, double *v, __CLPK_integer * n) 179 | { 180 | __CLPK_integer ldb = *n; 181 | char trans = 'N'; 182 | __CLPK_integer nrhs = 1; 183 | double *work; 184 | __CLPK_integer *ipiv; 185 | __CLPK_integer info; 186 | ipiv = (__CLPK_integer *) malloc (ldb * sizeof (__CLPK_integer)); 187 | if (!ipiv) { 188 | mem_error (); 189 | } 190 | work = (double *) malloc (ldb * ldb * sizeof (double)); 191 | if (!work) { 192 | free (ipiv); 193 | mem_error (); 194 | } 195 | dgetrf_ (n, n, pmat, n, ipiv, &info); 196 | if (info) { 197 | free (ipiv); 198 | free (work); 199 | inverse_error ("SOLVE", info); 200 | } 201 | dgetrs_ (&trans, n, &nrhs, pmat, n, ipiv, v, &ldb, &info); 202 | free (ipiv); 203 | free (work); 204 | if (info < 0) { 205 | inverse_error ("SOLVE", info); 206 | } 207 | } 208 | 209 | void 210 | geneigsolve_ (double *pmat, double *qmat, double *eval, __CLPK_integer * n) 211 | { 212 | __CLPK_integer lwork = (*n) * (*n); 213 | double *work = (double *) malloc (lwork * sizeof (double)); 214 | __CLPK_integer wood_elf = 1; // Sameer Merchant memorial temporary variable 215 | __CLPK_integer info; 216 | if (!work) { 217 | mem_error (); 218 | } 219 | dsygv_ (&wood_elf, "V", "U", n, pmat, n, qmat, n, eval, work, &lwork, 220 | &info); 221 | free (work); 222 | if (info && (info <= 2 * (*n))) { 223 | if (info < 0) { 224 | #if __LP64__ 225 | fprintf (stderr, "error (GENEIGSOLVE): illegal argument %d\n", -info); 226 | #else 227 | fprintf (stderr, "error (GENEIGSOLVE): illegal argument %ld\n", -info); 228 | #endif 229 | } 230 | else if (info <= (*n)) { 231 | #if __LP64__ 232 | fprintf (stderr, "error (GENEIGSOLVE): failure to converge %d\n", info); 233 | #else 234 | fprintf (stderr, "error (GENEIGSOLVE): failure to converge %ld\n", 235 | info); 236 | #endif 237 | } 238 | else { 239 | #if __LP64__ 240 | fprintf (stderr, "error (GENEIGSOLVE): not positive definite %d\n", 241 | info); 242 | #else 243 | fprintf (stderr, "error (GENEIGSOLVE): not positive definite %ld\n", 244 | info); 245 | #endif 246 | } 247 | exit (1); 248 | } 249 | } 250 | -------------------------------------------------------------------------------- /src/baseprog.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include "admutils.h" 14 | #include "mcio.h" 15 | #include "mcmcpars.h" 16 | #include "egsubs.h" 17 | #include "exclude.h" 18 | 19 | 20 | #define WVERSION "420" 21 | 22 | // badpairsname added 23 | 24 | /** 25 | does nothing but read the data 26 | and print snps 27 | sometimes a good place to start !! 28 | 29 | New I/O (mcio.c) added 30 | New admutils (snpindex hash) 31 | mcio bug fixed (large files) 32 | */ 33 | 34 | 35 | #define MAXFL 50 36 | #define MAXSTR 512 37 | 38 | extern int packmode; 39 | 40 | char *trashdir = "/var/tmp"; 41 | extern int verbose; 42 | int qtmode = NO; 43 | Indiv **indivmarkers; 44 | SNP **snpmarkers; 45 | int numsnps, numindivs; 46 | 47 | char *genotypename = NULL; 48 | char *snpname = NULL; 49 | char *genooutfilename = NULL; 50 | char *indoutfilename = NULL; 51 | char *indivname = NULL; 52 | char *badsnpname = NULL; 53 | char *goodsnpname = NULL; 54 | char *badpairsname = NULL; 55 | char *markername = NULL; 56 | char *idname = NULL; 57 | 58 | char *outputname = NULL; 59 | FILE *ofile; 60 | 61 | double fakespacing = 0.0; 62 | 63 | char unknowngender = 'U'; 64 | 65 | void readcommands (int argc, char **argv); 66 | void dophyscheck (SNP ** snpm, int numsnps); 67 | 68 | int 69 | main (int argc, char **argv) 70 | { 71 | 72 | int i, j, k, g; 73 | SNP *cupt; 74 | Indiv *indx; 75 | int ch1, ch2; 76 | 77 | int numvind, nignore, numrisks = 1; 78 | int markernum, idnum; 79 | 80 | ofile = stdout; 81 | packmode = YES; 82 | readcommands (argc, argv); 83 | if (indivname == NULL) { 84 | printf ("no indivname\n"); 85 | return 0; 86 | } 87 | if (outputname != NULL) 88 | openit (outputname, &ofile, "w"); 89 | 90 | numsnps = 91 | getsnps (snpname, &snpmarkers, fakespacing, badsnpname, &nignore, 92 | numrisks); 93 | 94 | // fakespacing 0.0 (default) 95 | 96 | numindivs = getindivs (indivname, &indivmarkers); 97 | setstatus (indivmarkers, numindivs, "Case"); 98 | 99 | setgenotypename (&genotypename, indivname); 100 | 101 | printf ("genotypename: %s\n", genotypename); 102 | 103 | if (genotypename != NULL) { 104 | getgenos (genotypename, snpmarkers, indivmarkers, 105 | numsnps, numindivs, nignore); 106 | 107 | /** 108 | if (badpairsname != NULL) { 109 | loadbadpsc(snpmarkers, numsnps, NO, goodsnpname) ; 110 | dobadpairs(badpairsname, snpmarkers, numsnps) ; 111 | } 112 | */ 113 | } 114 | dophyscheck (snpmarkers, numsnps); 115 | 116 | numvind = numvalidind (indivmarkers, numindivs); 117 | printf ("\n\n"); 118 | printf ("numindivs: %d valid: %d numsnps: %d nignore: %d\n", 119 | numindivs, numvind, numsnps, nignore); 120 | 121 | if (verbose) { 122 | for (i = 0; i < numindivs; ++i) { 123 | indx = indivmarkers[i]; 124 | printf ("%20s ", indx->ID); 125 | for (j = 0; j < numsnps; ++j) { 126 | cupt = snpmarkers[j]; 127 | if (cupt->ignore) 128 | continue; 129 | g = getgtypes (cupt, i); 130 | if (g < 0) 131 | g = 9; 132 | printf ("%1d", g); 133 | } 134 | printf (" %20s", indx->egroup); 135 | printnl (); 136 | } 137 | } 138 | // numsnps includes fakes 139 | 140 | if (markername != NULL) { 141 | markernum = snpindex (snpmarkers, numsnps, markername); 142 | if (markernum < 0) 143 | fatalx ("markername %s not found\n", markername); 144 | cupt = snpmarkers[markernum]; 145 | printf ("markername: %s %d %9.3f %12.0f\n", 146 | cupt->ID, cupt->chrom, cupt->genpos, cupt->physpos); 147 | for (i = 0; i < numindivs; ++i) { 148 | indx = indivmarkers[i]; 149 | g = getgtypes (cupt, i); 150 | printf ("%20s %20s %2d\n", cupt->ID, indx->ID, g); 151 | } 152 | } 153 | 154 | if (idname != NULL) { 155 | idnum = indindex (indivmarkers, numindivs, idname); 156 | if (idnum < 0) 157 | fatalx ("idname %s not found\n", idname); 158 | indx = indivmarkers[idnum]; 159 | printf ("idname: %20s %c %20s\n", 160 | indx->ID, indx->gender, indx->egroup); 161 | for (j = 0; j < numsnps; ++j) { 162 | cupt = snpmarkers[j]; 163 | if (cupt->ignore) 164 | continue; 165 | g = getgtypes (cupt, idnum); 166 | printf ("%20s %20s %2d", cupt->ID, indx->ID, g); 167 | printf (" %3d %12.0f", cupt->chrom, cupt->physpos); 168 | printf (" %c %c", cupt->alleles[0], cupt->alleles[1]); 169 | printnl (); 170 | } 171 | } 172 | 173 | /** 174 | if (genotypename != NULL) { 175 | printdata(genooutfilename, indoutfilename, snpmarkers, indivmarkers, numsnps, numindivs, NO) ; 176 | } 177 | */ 178 | 179 | printf ("##end of run\n"); 180 | return 0; 181 | } 182 | 183 | void 184 | readcommands (int argc, char **argv) 185 | { 186 | int i, haploid = 0; 187 | char *parname = NULL; 188 | phandle *ph; 189 | char str[5000]; 190 | char *tempname; 191 | int n; 192 | 193 | while ((i = getopt (argc, argv, "p:vV")) != -1) { 194 | 195 | switch (i) { 196 | 197 | case 'p': 198 | parname = strdup (optarg); 199 | break; 200 | 201 | case 'v': 202 | printf ("version: %s\n", WVERSION); 203 | break; 204 | 205 | case 'V': 206 | verbose = YES; 207 | break; 208 | 209 | case '?': 210 | printf ("Usage: bad params.... \n"); 211 | fatalx ("bad params\n"); 212 | } 213 | } 214 | 215 | 216 | pcheck (parname, 'p'); 217 | printf ("parameter file: %s\n", parname); 218 | ph = openpars (parname); 219 | dostrsub (ph); 220 | 221 | /** 222 | DIR2: /fg/nfiles/admixdata/ms2 223 | SSSS: DIR2/outfiles 224 | genotypename: DIR2/autos_ccshad_fakes 225 | eglistname: DIR2/eurlist 226 | output: eurout 227 | */ 228 | getint (ph, "packmode:", &packmode); // controls internals 229 | 230 | getstring (ph, "genotypename:", &genotypename); 231 | getstring (ph, "genooutfilename:", &genooutfilename); 232 | getstring (ph, "indoutfilename:", &indoutfilename); 233 | getstring (ph, "snpname:", &snpname); 234 | getstring (ph, "indivname:", &indivname); 235 | getstring (ph, "output:", &outputname); 236 | getstring (ph, "badsnpname:", &badsnpname); 237 | getstring (ph, "goodsnpname:", &goodsnpname); 238 | getstring (ph, "badpairsname:", &badpairsname); 239 | getstring (ph, "markername:", &markername); 240 | getstring (ph, "idname:", &idname); 241 | getdbl (ph, "fakespacing:", &fakespacing); 242 | getint (ph, "familynames:", &familynames); 243 | writepars (ph); 244 | closepars (ph); 245 | 246 | } 247 | 248 | void 249 | dophyscheck (SNP ** snpm, int numsnps) 250 | { 251 | // catch places where physpos genpos are in opposite order 252 | SNP *cupt, *cuptold; 253 | int i; 254 | 255 | for (i = 0; i < numsnps; i++) { 256 | cupt = snpm[i]; 257 | if (i == 0) 258 | cuptold = cupt; 259 | if (cupt->isfake) 260 | continue; 261 | if (cupt->ignore) 262 | continue; 263 | if (cupt->chrom == cuptold->chrom) { 264 | if (cupt->physpos < cuptold->physpos) { 265 | printf ("physcheck %20s %15s %12.3f %12.3f %13.0f %13.0f\n", 266 | cuptold->ID, cupt->ID, 267 | cuptold->genpos, cupt->genpos, 268 | cuptold->physpos, cupt->physpos); 269 | } 270 | } 271 | cuptold = cupt; 272 | } 273 | } 274 | --------------------------------------------------------------------------------