├── CONVERTF ├── example.pheno ├── example.bed ├── example.eigenstratgeno ├── example.packedancestrymap ├── example.packedancestrymapgeno ├── example.pedind ├── example.ind ├── example.ind.packedancestrymap ├── par.EIGENSTRAT.PED ├── par.ANCESTRYMAP.EIGENSTRAT ├── par.PACKEDANCESTRYMAP.ANCESTRYMAP ├── example.map ├── par.PED.PACKEDPED ├── example.ped ├── par.PED.EIGENSTRAT ├── example.pedsnp ├── par.PACKEDPED.PACKEDANCESTRYMAP ├── example.perl ├── example.snp ├── example.snp.packedancestrymap ├── ind2pheno.perl └── example.ancestrymapgeno ├── POPGEN ├── HGDP.X.perl ├── elldemo │ ├── ctable │ ├── ebalist │ ├── test4 │ ├── test3 │ ├── aaa.pdf │ ├── sicaa.phyl │ ├── doplot │ ├── aaa:Sicily_EBA │ ├── par1 │ ├── aaa:Lithuanian │ ├── q1 │ ├── q2 │ ├── q3 │ ├── aaa.xtxt │ ├── q0 │ ├── aaa:Georgian │ ├── aaa:Sardinian │ ├── rescale_ell │ └── sicaa.evec ├── example.geno ├── grmjunk.id ├── lsqproject.pdf ├── example.plot.pdf ├── example.eval ├── example.pca ├── example.ind ├── twexample.perl ├── example.plot2.xtxt ├── example.plot.xtxt ├── grmjunk ├── par.example ├── example.evec ├── example.snp ├── example.perl ├── example.log ├── twexample.eval ├── smartpca.info └── twtable ├── EIGENSTRAT ├── example.pheno ├── example.geno ├── example.plot.pdf ├── example.eval ├── example.pca ├── example.chisq ├── example.QTL.chisq ├── example.ind ├── example.QTL.chisq.GC ├── example.chisq.GC ├── example.QTL.ind ├── example.log ├── example.chisq.par ├── example.QTL.chisq.par ├── example.plot.xtxt ├── example.pca.par ├── example.pca.evec ├── example.snp ├── example.oldstyle.perl ├── example.perl └── example.QTL.perl ├── src ├── .gitignore ├── eigensrc │ ├── .gitignore │ ├── pcatoy.c │ ├── smartsubs.c │ ├── exclude.c │ ├── twstats.c │ ├── eigsubs.c │ └── eigx.c ├── nicksrc │ ├── Makefile │ ├── gauss.c │ ├── LICENSE.txt │ └── sortit.c ├── ksrc │ ├── Makefile │ ├── kjg_fpca.c │ └── kjg_gsl.c ├── gval.h ├── README ├── LICENSE.txt ├── Makefile ├── egsubs.c ├── h2d.c ├── gval.c ├── twsubs.c ├── smarttables │ └── twtable └── baseprog.c ├── include ├── globals.h ├── packit.h ├── smartsubs.h ├── nicklib.h ├── gval.h ├── egsubs.h ├── eigsubs.h ├── badpairs.h ├── exclude.h ├── workqueue.h ├── ldsubs.h ├── xsearch.h ├── sortit.h ├── getpars.h ├── regsubs.h ├── mcmcpars.h ├── xpsubs.h ├── not-thread-h ├── kjg_fpca.h ├── linsubs.h ├── ranmath.h ├── kjg_gsl.h ├── strsubs.h ├── statsubs.h ├── admutils.h └── mcio.h ├── bin ├── .gitignore ├── evec2pca.perl ├── gc.perl ├── rescale_ell ├── smarteigenstrat.perl ├── evec2pca-ped.perl ├── smartpca.perl └── ploteig ├── .gitignore ├── LICENSE.txt └── README /CONVERTF/example.pheno: -------------------------------------------------------------------------------- 1 | 11000 2 | -------------------------------------------------------------------------------- /POPGEN/HGDP.X.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin 2 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.pheno: -------------------------------------------------------------------------------- 1 | 11000 2 | -------------------------------------------------------------------------------- /POPGEN/elldemo/ctable: -------------------------------------------------------------------------------- 1 | Sicily_EBA black 2 | base gray70 3 | -------------------------------------------------------------------------------- /POPGEN/elldemo/ebalist: -------------------------------------------------------------------------------- 1 | I3122 2 | I11442 3 | I7796 4 | I7800 5 | I7807 6 | -------------------------------------------------------------------------------- /POPGEN/elldemo/test4: -------------------------------------------------------------------------------- 1 | Lithuanian 2 | Georgian 3 | Sardinian 4 | Sicily_EBA 5 | -------------------------------------------------------------------------------- /POPGEN/elldemo/test3: -------------------------------------------------------------------------------- 1 | Lithuanian 2 | Georgian 3 | Sardinian 4 | ##Sicily_EBA 5 | -------------------------------------------------------------------------------- /CONVERTF/example.bed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DReichLab/EIG/HEAD/CONVERTF/example.bed -------------------------------------------------------------------------------- /EIGENSTRAT/example.geno: -------------------------------------------------------------------------------- 1 | 11100 2 | 01212 3 | 21101 4 | 00122 5 | 21100 6 | 00111 7 | 22110 8 | -------------------------------------------------------------------------------- /POPGEN/elldemo/aaa.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DReichLab/EIG/HEAD/POPGEN/elldemo/aaa.pdf -------------------------------------------------------------------------------- /POPGEN/example.geno: -------------------------------------------------------------------------------- 1 | 11100 2 | 01212 3 | 21101 4 | 00122 5 | 21100 6 | 00111 7 | 22110 8 | -------------------------------------------------------------------------------- /POPGEN/grmjunk.id: -------------------------------------------------------------------------------- 1 | NA SAMPLE0 2 | NA SAMPLE1 3 | NA SAMPLE2 4 | NA SAMPLE3 5 | NA SAMPLE4 6 | -------------------------------------------------------------------------------- /POPGEN/lsqproject.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DReichLab/EIG/HEAD/POPGEN/lsqproject.pdf -------------------------------------------------------------------------------- /POPGEN/example.plot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DReichLab/EIG/HEAD/POPGEN/example.plot.pdf -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | baseprog 2 | convertf 3 | mergeit 4 | pca 5 | smshrink 6 | smartpca 7 | testpca 8 | -------------------------------------------------------------------------------- /CONVERTF/example.eigenstratgeno: -------------------------------------------------------------------------------- 1 | 11100 2 | 01212 3 | 21101 4 | 00122 5 | 21100 6 | 00111 7 | 22110 8 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.plot.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DReichLab/EIG/HEAD/EIGENSTRAT/example.plot.pdf -------------------------------------------------------------------------------- /EIGENSTRAT/example.eval: -------------------------------------------------------------------------------- 1 | 3.145365 2 | 0.479177 3 | 0.279889 4 | 0.095570 5 | -0.000000 6 | -------------------------------------------------------------------------------- /POPGEN/example.eval: -------------------------------------------------------------------------------- 1 | 3.145365 2 | 0.479177 3 | 0.279889 4 | 0.095570 5 | -0.000000 6 | -------------------------------------------------------------------------------- /CONVERTF/example.packedancestrymap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DReichLab/EIG/HEAD/CONVERTF/example.packedancestrymap -------------------------------------------------------------------------------- /CONVERTF/example.packedancestrymapgeno: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DReichLab/EIG/HEAD/CONVERTF/example.packedancestrymapgeno -------------------------------------------------------------------------------- /src/eigensrc/.gitignore: -------------------------------------------------------------------------------- 1 | eigenstrat 2 | eigenstratQTL 3 | pcatoy 4 | smarteigenstrat 5 | smartpca 6 | smartrel 7 | twstats 8 | -------------------------------------------------------------------------------- /POPGEN/elldemo/sicaa.phyl: -------------------------------------------------------------------------------- 1 | 3 2 | Lithuanian 0.000 0.020 0.021 3 | Georgian 0.020 0.000 0.018 4 | Sardinian 0.021 0.018 0.000 5 | -------------------------------------------------------------------------------- /POPGEN/example.pca: -------------------------------------------------------------------------------- 1 | 2 2 | 3.1450 3 | 0.4790 4 | 0.6502 0.0875 5 | 0.3589 0.0804 6 | -0.0781 -0.6017 7 | -0.3982 0.7316 8 | -0.5328 -0.2977 9 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.pca: -------------------------------------------------------------------------------- 1 | 2 2 | 3.1450 3 | 0.4790 4 | 0.6502 0.0875 5 | 0.3589 0.0804 6 | -0.0781 -0.6017 7 | -0.3982 0.7316 8 | -0.5328 -0.2977 9 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.chisq: -------------------------------------------------------------------------------- 1 | Chisq EIGENSTRAT 2 | 2.2222 0.9615 3 | 2.9167 0.2219 4 | 2.0833 0.0612 5 | 4.1667 0.1258 6 | 2.9167 2.0997 7 | 5.0000 3.0000 8 | 3.8095 0.0764 9 | -------------------------------------------------------------------------------- /include/globals.h: -------------------------------------------------------------------------------- 1 | #ifndef _GLOBALS_ 2 | #define _GLOBALS_ 3 | 4 | int numchrom = 22; 5 | int fancynorm = YES, verbose = NO, plotmode = NO, outnum = -1; 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /include/packit.h: -------------------------------------------------------------------------------- 1 | int packmode = NO; 2 | unsigned char *packgenos = NULL; 3 | long packlen = 0; 4 | long rlen = -1; 5 | int rdismode = NO; 6 | unsigned char *packepath; 7 | -------------------------------------------------------------------------------- /include/smartsubs.h: -------------------------------------------------------------------------------- 1 | void setoutliermode (int mode); 2 | int 3 | ridoutlier (double *evecs, int n, int neigs, 4 | double thresh, int *badlist, OUTLINFO ** outinfo); 5 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.QTL.chisq: -------------------------------------------------------------------------------- 1 | Chisq EIGENSTRAT 2 | 2.2222 0.9615 3 | 2.9167 0.2219 4 | 2.0833 0.0612 5 | 4.1667 0.1258 6 | 2.9167 2.0997 7 | 5.0000 3.0000 8 | 3.8095 0.0764 9 | -------------------------------------------------------------------------------- /CONVERTF/example.pedind: -------------------------------------------------------------------------------- 1 | 1 SAMPLE0 0 0 2 2 2 | 2 SAMPLE1 0 0 1 2 3 | 3 SAMPLE2 0 0 2 1 4 | 4 SAMPLE3 0 0 1 1 5 | 5 SAMPLE4 0 0 2 1 6 | -------------------------------------------------------------------------------- /bin/.gitignore: -------------------------------------------------------------------------------- 1 | baseprog 2 | convertf 3 | eigenstrat 4 | eigenstratQTL 5 | mergeit 6 | pca 7 | pcatoy 8 | smarteigenstrat 9 | smartpca 10 | smartrel 11 | smshrink 12 | twstats 13 | -------------------------------------------------------------------------------- /POPGEN/elldemo/doplot: -------------------------------------------------------------------------------- 1 | #!/usr//bin/perl -w 2 | 3 | system "ploteig -i sicaa.evec -c 1:2 -g aaa -p test4 -x -k -r ctable -e ell4aa.out -t \" Sicily EBA projected conf: 0.95\"" ; 4 | 5 | -------------------------------------------------------------------------------- /include/nicklib.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | -------------------------------------------------------------------------------- /CONVERTF/example.ind: -------------------------------------------------------------------------------- 1 | SAMPLE0 F Case 2 | SAMPLE1 M Case 3 | SAMPLE2 F Control 4 | SAMPLE3 M Control 5 | SAMPLE4 F Control 6 | -------------------------------------------------------------------------------- /POPGEN/example.ind: -------------------------------------------------------------------------------- 1 | SAMPLE0 F Case 2 | SAMPLE1 M Case 3 | SAMPLE2 F Control 4 | SAMPLE3 M Control 5 | SAMPLE4 F Control 6 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.ind: -------------------------------------------------------------------------------- 1 | SAMPLE0 F Case 2 | SAMPLE1 M Case 3 | SAMPLE2 F Control 4 | SAMPLE3 M Control 5 | SAMPLE4 F Control 6 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.QTL.chisq.GC: -------------------------------------------------------------------------------- 1 | Chisq EIGENSTRAT 2 | lambda=6.396 lambda=1.000 3 | 0.3475 0.9615 4 | 0.4560 0.2219 5 | 0.3257 0.0612 6 | 0.6515 0.1258 7 | 0.4560 2.0997 8 | 0.7818 3.0000 9 | 0.5956 0.0764 10 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.chisq.GC: -------------------------------------------------------------------------------- 1 | Chisq EIGENSTRAT 2 | lambda=6.396 lambda=1.000 3 | 0.3475 0.9615 4 | 0.4560 0.2219 5 | 0.3257 0.0612 6 | 0.6515 0.1258 7 | 0.4560 2.0997 8 | 0.7818 3.0000 9 | 0.5956 0.0764 10 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.QTL.ind: -------------------------------------------------------------------------------- 1 | SAMPLE0 F 0.999999 2 | SAMPLE1 M 1.000001 3 | SAMPLE2 F -0.000001 4 | SAMPLE3 M 0.000001 5 | SAMPLE4 F -0.000001 6 | -------------------------------------------------------------------------------- /CONVERTF/example.ind.packedancestrymap: -------------------------------------------------------------------------------- 1 | SAMPLE0 F Case 2 | SAMPLE1 M Case 3 | SAMPLE2 F Control 4 | SAMPLE3 M Control 5 | SAMPLE4 F Control 6 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.log: -------------------------------------------------------------------------------- 1 | parameter file: example.chisq.par 2 | genotypename: example.geno 3 | snpname: example.snp 4 | indivname: example.ind 5 | pcaname: example.pca 6 | outputname: example.chisq 7 | numpc: 1 8 | qtmode: NO 9 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.chisq.par: -------------------------------------------------------------------------------- 1 | genotypename: example.geno 2 | snpname: example.snp 3 | indivname: example.ind 4 | pcaname: example.pca 5 | outputname: example.chisq 6 | numpc: 1 7 | qtmode: NO 8 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.QTL.chisq.par: -------------------------------------------------------------------------------- 1 | genotypename: example.geno 2 | snpname: example.snp 3 | indivname: example.QTL.ind 4 | pcaname: example.pca 5 | outputname: example.QTL.chisq 6 | numpc: 1 7 | qtmode: YES 8 | -------------------------------------------------------------------------------- /POPGEN/twexample.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | $command = "../bin/twstats"; 4 | $command .= " -t twtable "; 5 | $command .= " -i twexample.eval "; 6 | $command .= " -o twexample.out"; 7 | print("$command\n"); 8 | system("$command"); 9 | 10 | -------------------------------------------------------------------------------- /CONVERTF/par.EIGENSTRAT.PED: -------------------------------------------------------------------------------- 1 | genotypename: example.eigenstratgeno 2 | snpname: example.snp 3 | indivname: example.ind 4 | outputformat: PED 5 | genotypeoutname: example.ped 6 | snpoutname: example.pedsnp 7 | indivoutname: example.pedind 8 | -------------------------------------------------------------------------------- /include/gval.h: -------------------------------------------------------------------------------- 1 | void setgval (SNP ** xsnps, int nrows, Indiv ** indivmarkers, int numindivs, 2 | int *xindex, int *xtypes, int ncols); 3 | void unsetgval (); 4 | int getgval (int row, int col, double *val); 5 | int getggval (int indindx, int col, double *val); 6 | -------------------------------------------------------------------------------- /POPGEN/example.plot2.xtxt: -------------------------------------------------------------------------------- 1 | set terminal postscript color 2 | set title "" 3 | set xlabel "eigenvector 1" 4 | set ylabel "eigenvector 2" 5 | plot "example.evec.1:2:Case" using 2:3 title "Case" , \ 6 | "example.evec.1:2:Control" using 2:3 title "Control" 7 | ## pause 9999 8 | -------------------------------------------------------------------------------- /CONVERTF/par.ANCESTRYMAP.EIGENSTRAT: -------------------------------------------------------------------------------- 1 | genotypename: example.ancestrymapgeno 2 | snpname: example.snp 3 | indivname: example.ind 4 | outputformat: EIGENSTRAT 5 | genotypeoutname: example.eigenstratgeno 6 | snpoutname: example.snp 7 | indivoutname: example.ind 8 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.plot.xtxt: -------------------------------------------------------------------------------- 1 | set terminal postscript color 2 | set title "" 3 | set xlabel "eigenvector 1" 4 | set ylabel "eigenvector 2" 5 | plot "example.pca.evec.1:2:Case" using 2:3 title "Case" , \ 6 | "example.pca.evec.1:2:Control" using 2:3 title "Control" 7 | ## pause 9999 8 | -------------------------------------------------------------------------------- /CONVERTF/par.PACKEDANCESTRYMAP.ANCESTRYMAP: -------------------------------------------------------------------------------- 1 | genotypename: example.packedancestrymapgeno 2 | snpname: example.snp 3 | indivname: example.ind 4 | outputformat: ANCESTRYMAP 5 | genotypeoutname: example.ancestrymapgeno 6 | snpoutname: example.snp 7 | indivoutname: example.ind 8 | -------------------------------------------------------------------------------- /POPGEN/example.plot.xtxt: -------------------------------------------------------------------------------- 1 | set terminal postscript color 2 | set title "" 3 | set key outside 4 | set xlabel "eigenvector 1" 5 | set ylabel "eigenvector 2" 6 | plot "example.evec.1:2:Case" using 2:3 title "Case" , \ 7 | "example.evec.1:2:Control" using 2:3 title "Control" 8 | ## pause 9999 9 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.pca.par: -------------------------------------------------------------------------------- 1 | genotypename: example.geno 2 | snpname: example.snp 3 | indivname: example.ind 4 | evecoutname: example.pca.evec 5 | evaloutname: example.eval 6 | altnormstyle: NO 7 | numoutevec: 2 8 | numoutlieriter: 5 9 | numoutlierevec: 2 10 | outliersigmathresh: 6.0 11 | qtmode: 0 12 | -------------------------------------------------------------------------------- /POPGEN/grmjunk: -------------------------------------------------------------------------------- 1 | 1 1 7 1.789380 2 | 2 1 7 0.789216 3 | 2 2 7 0.670580 4 | 3 1 7 -0.286950 5 | 3 2 7 -0.108997 6 | 3 3 7 0.317350 7 | 4 1 7 -0.993409 8 | 4 2 7 -0.527105 9 | 4 3 7 -0.108997 10 | 4 4 7 0.958931 11 | 5 1 7 -1.298237 12 | 5 2 7 -0.823694 13 | 5 3 7 0.187593 14 | 5 4 7 0.670580 15 | 5 5 7 1.263758 16 | -------------------------------------------------------------------------------- /src/nicksrc/Makefile: -------------------------------------------------------------------------------- 1 | override CFLAGS += -c -O3 -g -p -Wimplicit -I../../include 2 | 3 | all: libnick.a 4 | 5 | libnick.a: strsubs.o sortit.o vsubs.o statsubs.o linsubs.o getpars.o xsearch.o gauss.o gds.o 6 | ar -r libnick.a $^ 7 | ranlib libnick.a 8 | 9 | clean: 10 | rm -f *.o 11 | rm -f libnick.a 12 | -------------------------------------------------------------------------------- /POPGEN/par.example: -------------------------------------------------------------------------------- 1 | genotypename: ../CONVERTF/example.ped 2 | snpname: ../CONVERTF/example.map 3 | indivname: ../CONVERTF/example.ped 4 | evecoutname: example.evec 5 | evaloutname: example.eval 6 | altnormstyle: NO 7 | numoutevec: 2 8 | familynames: NO 9 | grmoutname: grmjunk 10 | -------------------------------------------------------------------------------- /src/ksrc/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS += -I../../include -I/opt/openblas/include 2 | 3 | ifeq ($(DEBUG), 1) 4 | CFLAGS += -g # enable debugging 5 | endif 6 | 7 | ifeq ($(PROFILING), 1) 8 | CFLAGS += -pg # enable profiling 9 | endif 10 | 11 | .PHONY: all clean 12 | 13 | all: kjg_gsl.o kjg_fpca.o 14 | 15 | clean: 16 | rm *.o 17 | -------------------------------------------------------------------------------- /CONVERTF/example.map: -------------------------------------------------------------------------------- 1 | 11 rs0000 0.000000 0 2 | 11 rs1111 0.001000 100000 3 | 11 rs2222 0.002000 200000 4 | 11 rs3333 0.003000 300000 5 | 11 rs4444 0.004000 400000 6 | 11 rs5555 0.005000 500000 7 | 11 rs6666 0.006000 600000 8 | -------------------------------------------------------------------------------- /CONVERTF/par.PED.PACKEDPED: -------------------------------------------------------------------------------- 1 | genotypename: example.ped 2 | snpname: example.pedsnp # or example.map, either works 3 | indivname: example.pedind # or example.ped, either works 4 | outputformat: PACKEDPED 5 | genotypeoutname: example.bed 6 | snpoutname: example.pedsnp 7 | indivoutname: example.pedind 8 | familynames: NO 9 | -------------------------------------------------------------------------------- /CONVERTF/example.ped: -------------------------------------------------------------------------------- 1 | 1 SAMPLE0 0 0 2 2 1 2 3 3 1 1 1 1 3 3 1 1 3 3 2 | 2 SAMPLE1 0 0 1 2 1 2 1 3 1 4 1 1 1 3 1 1 3 3 3 | 3 SAMPLE2 0 0 2 1 1 2 1 1 1 4 1 2 1 3 1 4 3 4 4 | 4 SAMPLE3 0 0 1 1 2 2 1 3 4 4 2 2 1 1 1 4 3 4 5 | 5 SAMPLE4 0 0 2 1 2 2 1 1 1 4 2 2 1 1 1 4 4 4 6 | -------------------------------------------------------------------------------- /CONVERTF/par.PED.EIGENSTRAT: -------------------------------------------------------------------------------- 1 | genotypename: example.ped 2 | snpname: example.pedsnp # or example.map, either works 3 | indivname: example.pedind # or example.ped, either works 4 | outputformat: EIGENSTRAT 5 | genotypeoutname: example.eigenstratgeno 6 | snpoutname: example.snp 7 | indivoutname: example.ind 8 | familynames: NO 9 | -------------------------------------------------------------------------------- /CONVERTF/example.pedsnp: -------------------------------------------------------------------------------- 1 | 11 rs0000 0.000000 0 A C 2 | 11 rs1111 0.001000 100000 A G 3 | 11 rs2222 0.002000 200000 A T 4 | 11 rs3333 0.003000 300000 C A 5 | 11 rs4444 0.004000 400000 G A 6 | 11 rs5555 0.005000 500000 T A 7 | 11 rs6666 0.006000 600000 G T 8 | -------------------------------------------------------------------------------- /POPGEN/elldemo/aaa:Sicily_EBA: -------------------------------------------------------------------------------- 1 | I3122 -0.094106 -0.019738 Sicily_EBA 2 | I11442 -0.061346 -0.019553 Sicily_EBA 3 | I7796 -0.046874 -0.045908 Sicily_EBA 4 | I7800 -0.066847 -0.032000 Sicily_EBA 5 | I7807 -0.073119 -0.032957 Sicily_EBA 6 | -------------------------------------------------------------------------------- /CONVERTF/par.PACKEDPED.PACKEDANCESTRYMAP: -------------------------------------------------------------------------------- 1 | genotypename: example.bed 2 | snpname: example.pedsnp # or example.map, either works 3 | indivname: example.pedind # or example.ped, either works 4 | outputformat: PACKEDANCESTRYMAP 5 | genotypeoutname: example.packedancestrymapgeno 6 | snpoutname: example.snp 7 | indivoutname: example.ind 8 | familynames: NO 9 | -------------------------------------------------------------------------------- /POPGEN/example.evec: -------------------------------------------------------------------------------- 1 | #eigvals: 3.145 0.479 2 | SAMPLE0 0.6502 0.0875 Case 3 | SAMPLE1 0.3589 0.0804 Case 4 | SAMPLE2 -0.0781 -0.6017 Control 5 | SAMPLE3 -0.3982 0.7316 Control 6 | SAMPLE4 -0.5328 -0.2977 Control 7 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.pca.evec: -------------------------------------------------------------------------------- 1 | #eigvals: 3.145 0.479 2 | SAMPLE0 0.6502 0.0875 Case 3 | SAMPLE1 0.3589 0.0804 Case 4 | SAMPLE2 -0.0781 -0.6017 Control 5 | SAMPLE3 -0.3982 0.7316 Control 6 | SAMPLE4 -0.5328 -0.2977 Control 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Object files 2 | *.o 3 | *.ko 4 | *.obj 5 | *.elf 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Libraries 12 | *.lib 13 | *.a 14 | *.la 15 | *.lo 16 | 17 | # Shared objects (inc. Windows DLLs) 18 | *.dll 19 | *.so 20 | *.so.* 21 | *.dylib 22 | 23 | # Executables 24 | *.exe 25 | *.out 26 | *.app 27 | *.i*86 28 | *.x86_64 29 | *.hex 30 | 31 | # Debug files 32 | *.dSYM/ 33 | -------------------------------------------------------------------------------- /CONVERTF/example.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | # $parfile = "par.ANCESTRYMAP.EIGENSTRAT"; 4 | $parfile = "par.EIGENSTRAT.PED"; 5 | # $parfile = "par.PED.EIGENSTRAT"; 6 | # $parfile = "par.PED.PACKEDPED"; 7 | # $parfile = "par.PACKEDPED.PACKEDANCESTRYMAP"; 8 | # $parfile = "par.PACKEDANCESTRYMAP.ANCESTRYMAP"; 9 | 10 | system("../bin/convertf -p $parfile"); 11 | -------------------------------------------------------------------------------- /POPGEN/example.snp: -------------------------------------------------------------------------------- 1 | rs0000 11 0.000000 0 A C 2 | rs1111 11 0.001000 100000 A G 3 | rs2222 11 0.002000 200000 A T 4 | rs3333 11 0.003000 300000 C A 5 | rs4444 11 0.004000 400000 G A 6 | rs5555 11 0.005000 500000 T A 7 | rs6666 11 0.006000 600000 G T 8 | -------------------------------------------------------------------------------- /CONVERTF/example.snp: -------------------------------------------------------------------------------- 1 | rs0000 11 0.000000 0 A C 2 | rs1111 11 0.001000 100000 A G 3 | rs2222 11 0.002000 200000 A T 4 | rs3333 11 0.003000 300000 C A 5 | rs4444 11 0.004000 400000 G A 6 | rs5555 11 0.005000 500000 T A 7 | rs6666 11 0.006000 600000 G T 8 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.snp: -------------------------------------------------------------------------------- 1 | rs0000 11 0.000000 0 A C 2 | rs1111 11 0.001000 100000 A G 3 | rs2222 11 0.002000 200000 A T 4 | rs3333 11 0.003000 300000 C A 5 | rs4444 11 0.004000 400000 G A 6 | rs5555 11 0.005000 500000 T A 7 | rs6666 11 0.006000 600000 G T 8 | -------------------------------------------------------------------------------- /include/egsubs.h: -------------------------------------------------------------------------------- 1 | #include "admutils.h" 2 | 3 | 4 | int makeeglist (char **eglist, int maxnumeg, Indiv ** indivmarkers, 5 | int numindivs); 6 | int mkeglist (Indiv ** indm, int numindivs, char **eglist); 7 | void seteglist (Indiv ** indm, int nindiv, char *eglistname); 8 | void seteglistv (Indiv ** indm, int nindiv, char *eglistname, int val); 9 | int loadlist (char **list, char *listname); 10 | int loadlist_type (char **list, char *listname, int *ztypes, int off); 11 | -------------------------------------------------------------------------------- /CONVERTF/example.snp.packedancestrymap: -------------------------------------------------------------------------------- 1 | rs0000 11 0.000000 0 A C 2 | rs1111 11 0.001000 100000 A G 3 | rs2222 11 0.002000 200000 A T 4 | rs3333 11 0.003000 300000 C A 5 | rs4444 11 0.004000 400000 G A 6 | rs5555 11 0.005000 500000 T A 7 | rs6666 11 0.006000 600000 G T 8 | -------------------------------------------------------------------------------- /CONVERTF/ind2pheno.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | $in = $ARGV[0]; # .ind file 4 | $out = $ARGV[1]; # .pheno file 5 | 6 | open(IN,$in) || die("COF"); 7 | open(OUT,">$out") || die("COF"); 8 | 9 | while($line = ) 10 | { 11 | if($line =~ /Case/) { print OUT ("1"); $case=1; } 12 | elsif($line =~ /Control/) { print OUT ("0"); $control=1; } 13 | else { print OUT ("9"); } 14 | } 15 | print OUT ("\n"); 16 | unless($case) { print("WARNING: no cases\n"); } 17 | unless($control) { print("WARNING: no controls\n"); } 18 | 19 | -------------------------------------------------------------------------------- /src/gval.h: -------------------------------------------------------------------------------- 1 | void setgval (SNP ** xsnps, int nrows, Indiv ** indivmarkers, int numindivs, 2 | int *xindex, int *xtypes, int ncols); 3 | void unsetgval (); 4 | int getgval (int row, int col, double *val); 5 | int getggval (int indindx, int col, double *val); 6 | 7 | void set_ind_mask (); 8 | 9 | size_t get_nrows (); 10 | size_t get_ncols (); 11 | 12 | void kjg_geno_get_normalized_row (const size_t snp_index, double *y); 13 | size_t kjg_geno_get_normalized_rows (const size_t i, const size_t r, 14 | double *Y); 15 | -------------------------------------------------------------------------------- /POPGEN/elldemo/par1: -------------------------------------------------------------------------------- 1 | DIR: /home/np29/broaddatax/v37/ 2 | ##DIR: . 3 | S1: test4 4 | indivname: DIR/S1.ind 5 | snpname: DIR/S1.snp 6 | genotypename: DIR/S1.geno 7 | numoutliter: 0 8 | numoutevec: 2 9 | evecoutname: sicaa.evec 10 | phylipname: sicaa.phyl 11 | poplistname: test3 12 | inbreed: NO 13 | lsqproject: YES 14 | shrinkmode: YES 15 | hiprec: YES 16 | elllistname: ebalist 17 | elloutname: ell4aa.out 18 | ellconf: 0.95 19 | -------------------------------------------------------------------------------- /include/eigsubs.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | void eigvals (double *mat, double *evals, int n); 7 | void eigvecs (double *mat, double *evals, double *evecs, int n); 8 | void mkorth(double *orth, double *ww, int n) ; 9 | void eigb (double *lam, double *a, double *b, int n); 10 | void eigc (double *lam, double *a, double *b, int n); 11 | double twestxx (double *lam, int m, double *pzn, double *pzvar); 12 | 13 | typedef struct 14 | { 15 | int vecno; 16 | double score; 17 | } OUTLINFO;; 18 | -------------------------------------------------------------------------------- /include/badpairs.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "admutils.h" 3 | 4 | void dobadpairs (char *badpairsname, SNP ** snpm, int numsnps); 5 | void dogood (char *goodsnpname, SNP ** snpm, int numsnps); 6 | void getsnpsc (char *snpscname, SNP ** snpm, int numsnps); 7 | int killsnps (Indiv ** indivmarkers, SNP ** snpmarkers, int numsnps, 8 | int mincasenum); 9 | void loadbadpsc (SNP ** snpm, int numsnps, int rmode, char *gname); 10 | 11 | double entrop (double *a, int n); 12 | double xxlog2 (double t); 13 | double mutx (double *dd); 14 | double mutxx (double *dd, int m, int n); 15 | -------------------------------------------------------------------------------- /POPGEN/example.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | $ENV{'PATH'} = "../bin:$ENV{'PATH'}"; 4 | 5 | $command = "smartpca"; 6 | $command .= " -p par.example >example.log"; 7 | print("$command\n"); 8 | system("$command"); 9 | 10 | $command = "ploteig"; 11 | $command .= " -i example.evec "; 12 | $command .= " -c 1:2 "; 13 | $command .= " -p Case:Control "; 14 | $command .= " -x "; 15 | $command .= " -o example.plot.xtxt "; # must end in .xtxt 16 | print("$command\n"); 17 | system("$command"); 18 | 19 | $command = "evec2pca.perl 2 example.evec example.ind example.pca"; 20 | print("$command\n"); 21 | system("$command"); 22 | -------------------------------------------------------------------------------- /include/exclude.h: -------------------------------------------------------------------------------- 1 | #ifndef _EXCLUDE_ 2 | #define _EXCLUDE_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | /* file name parameter : xregionname */ 9 | /* HW filter parameter : nhwfilter (-1 means no-filter) */ 10 | /* maximum number of regions : 1000 11 | closed intervals in physical position include endpoints */ 12 | /* read file and set ignore flag for SNPs */ 13 | void excluderegions (char *xregionname, SNP ** snps, int nsnps, 14 | char *deletesnpoutname); 15 | void hwfilter (SNP ** snps, int nsnps, int nindiv, double nhwfilter, 16 | char *deletesnpoutname); 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /src/README: -------------------------------------------------------------------------------- 1 | Partial list of Authors of eigensoft 2 | 3 | Nick Patterson 4 | Alkes Price 5 | Sam Pollack 6 | Sasha Gusev 7 | Chris Chang 8 | Kevin Galinsky 9 | 10 | ======================================================== 11 | fastmode: YES 12 | optional parameters 13 | fastdim: (default 2 * numeigs) 14 | fastiter: (default numeigs) 15 | 16 | In fastmode 17 | A .evec and .eval file wiil be produced on request but: 18 | 19 | NO outlier removal 20 | NO Tracy-Widom significance testing 21 | NO lsqproject mode 22 | NO F_st. (Use fstonly: YES) if this is wanted. 23 | 24 | If fastmode and easymode set, NO projection 25 | 26 | -------------------------------------------------------------------------------- /POPGEN/elldemo/aaa:Lithuanian: -------------------------------------------------------------------------------- 1 | LithuanianF1 0.030379 0.322035 Lithuanian 2 | lithuania3 0.031552 0.282867 Lithuanian 3 | lithuania10 0.023618 0.342455 Lithuanian 4 | lithuania9 0.032949 0.338347 Lithuanian 5 | LithuanianA1 0.023348 0.316050 Lithuanian 6 | LithuanianE2 0.031801 0.273176 Lithuanian 7 | lithuania1 0.015194 0.254946 Lithuanian 8 | lithuania8 0.030478 0.305058 Lithuanian 9 | lithuania2 0.032994 0.334543 Lithuanian 10 | LithuanianD1 0.016299 0.282471 Lithuanian 11 | -------------------------------------------------------------------------------- /src/nicksrc/gauss.c: -------------------------------------------------------------------------------- 1 | #include "ranmath.h" 2 | 3 | double 4 | gauss () 5 | { 6 | 7 | /** 8 | Numer alg. in C pp 289 ff 9 | */ 10 | 11 | static int iset = 0; 12 | static double gset; 13 | double v1, v2, rsq, fac; 14 | 15 | if (iset == 1) { 16 | iset = 0; 17 | return gset; 18 | } 19 | 20 | do { 21 | v1 = 2.0 * DRAND2 () - 1.0; 22 | v2 = 2.0 * DRAND2 () - 1.0; 23 | rsq = v1 * v1 + v2 * v2; 24 | } while (rsq >= 1.0 || rsq == 0.0); 25 | 26 | fac = sqrt (-2.0 * log (rsq) / rsq); 27 | gset = v1 * fac; 28 | iset = 1; 29 | return v2 * fac; 30 | 31 | } 32 | 33 | void 34 | gaussa (double *a, int n) 35 | { 36 | int i; 37 | for (i = 0; i < n; i++) 38 | a[i] = gauss (); 39 | 40 | } 41 | -------------------------------------------------------------------------------- /include/workqueue.h: -------------------------------------------------------------------------------- 1 | #ifndef WORKQUEUE_H 2 | #define WORKQUEUE_H 3 | 4 | #ifdef HAVE_PTHREAD 5 | #include 6 | #endif 7 | 8 | typedef struct work_task_t 9 | { 10 | #ifdef HAVE_PTHREAD 11 | pthread_t thread; 12 | #endif 13 | void *(*start_routine) (void *); 14 | void *argument; 15 | } work_task; 16 | 17 | typedef struct work_queue_t 18 | { 19 | // Which tasks are pending? 20 | work_task *tasks; 21 | int num_tasks; 22 | } work_queue; 23 | 24 | void create_work_queue (work_queue ** the_queue); 25 | void destroy_work_queue (work_queue ** the_queue); 26 | void queue_task (work_queue * queue, const work_task * task); 27 | void wait_for_queue_to_complete (const work_queue * queue); 28 | 29 | #endif // WORKQUEUE_H 30 | -------------------------------------------------------------------------------- /include/ldsubs.h: -------------------------------------------------------------------------------- 1 | 2 | typedef struct 3 | { 4 | double S0; 5 | double S1; 6 | double S2; 7 | double S11; 8 | double S12; 9 | double S22; 10 | double m1; 11 | double m2; 12 | double v11; 13 | double v12; 14 | double v22; 15 | double corr; 16 | double Z; 17 | } CORR; 18 | 19 | int calccorr (CORR * corrpt, int mode, int ztrans); 20 | void printcorr (CORR * corrpt); 21 | void clearcorr (CORR * corrpt); 22 | void addcorr (CORR * corrpt, double x1, double x2); 23 | void addcorrn (CORR * corrpt, double x1, double x2, double yn); 24 | void minuscorr (CORR * out, CORR * c1, CORR * c2); 25 | 26 | double lddip (double *xc); 27 | double zdip (double *xc); 28 | void setzdipmode (int mode); 29 | void setzdphasedmode (int mode); 30 | -------------------------------------------------------------------------------- /include/xsearch.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int xfindit(char *ss) ; 4 | int xloadsearchx(char **ss, int n) ; 5 | int finddup(char **ss, int n) ; 6 | void xloadsearch(char **ss, int n) ; 7 | void xdestroy() ; 8 | 9 | void xhcreate (int n) ; 10 | void xhdestroy() ; 11 | ENTRY *xhsearch(ENTRY item, ACTION act) ; 12 | 13 | int xlookup(char *key, ACTION act) ; 14 | int xhash (char *key) ; 15 | int xhash1(int ww) ; 16 | int xhash2 (int x) ; 17 | int xcshift(int x, int shft) ; 18 | int stringhash(char *key) ; 19 | void setfancyhash(int val) ; 20 | long xlhash (long x) ; 21 | int xshash (int x) ; 22 | int fnv_hash(char *strng) ; 23 | 24 | #define FNV_PRIME 0x01000193 25 | #define FNV_OFFSET_BASIS 0x811c9dc5 26 | 27 | -------------------------------------------------------------------------------- /include/sortit.h: -------------------------------------------------------------------------------- 1 | 2 | void sortit(double *a, int *ind, int len) ; 3 | double median (double *a, int n) ; 4 | int compit (int *a1, int *a2) ; 5 | void isortit(int *a, int *ind, int len) ; 6 | void lsortit(long *a, int *ind, int len) ; 7 | int icompit (int *a1, int *a2) ; 8 | int lcompit (int *a1, int *a2) ; 9 | void invperm(int *a, int *b, int n) ; 10 | int ipcompit (int *a1, int *a2) ; 11 | int comparr(double *a, double *b, int len) ; 12 | int compiarr(int *a, int *b, int len) ; 13 | int complarr(long *a, long *b, int len) ; 14 | void ipsortit(int **a, int *ind, int len, int rlen) ; 15 | void ipsortitp(int **a, int *ind, int len, int rlen, int *pp) ; 16 | void setorder (int *pp, int rlen) ; 17 | void mkirank(int *rank, int *xin, int n) ; 18 | void mkrank(int *rank, double *xin, int n) ; 19 | 20 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.oldstyle.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | $ENV{'PATH'} = "../bin:$ENV{'PATH'}"; 4 | # MUST put pca bin directory in path for smartpca.perl to work 5 | 6 | $command = "pca"; 7 | $command .= " -i example.geno "; 8 | $command .= " -o example.pca "; 9 | $command .= " -e example.eval "; 10 | $command .= " -l example.log "; 11 | $command .= " -k 2 "; 12 | $command .= " -m 5 "; 13 | $command .= " -t 2 "; 14 | $command .= " -s 6.0 "; 15 | print("$command\n"); 16 | system("$command"); 17 | 18 | $command = "eigenstrat"; # or eigenstrat.big.perl for large data sets 19 | $command .= " -i example.geno "; 20 | $command .= " -j example.pheno "; 21 | $command .= " -p example.pca "; 22 | $command .= " -l 1 "; 23 | $command .= " -o example.chisq "; 24 | print("$command\n"); 25 | system("$command"); 26 | 27 | $command = "gc.perl example.chisq example.chisq.GC"; 28 | print("$command\n"); 29 | system("$command"); 30 | -------------------------------------------------------------------------------- /include/getpars.h: -------------------------------------------------------------------------------- 1 | typedef struct { 2 | int numpars ; 3 | FILE *fx ; 4 | char **ppars ; 5 | char **pdata ; 6 | } phandle ; 7 | 8 | void writepars(phandle *pp) ; 9 | void closepars(phandle *pp) ; 10 | phandle *openpars(char *fname) ; 11 | 12 | int getlongstring(phandle *pp, char *parname, char **kret) ; 13 | // whole of line 14 | int getstring(phandle *pp, char *parname, char **kret) ; 15 | int getint(phandle *pp, char *parname, int *kret) ; 16 | int getints(phandle *pp, char *parname, int *aint, int nint) ; 17 | int getintss(phandle *pp, char *parname, int *aint, int *xint) ; 18 | 19 | int getdbl(phandle *pp, char *parname, double *dbl) ; 20 | int getdbls(phandle *pp, char *parname, double *dbl, int ndbl) ; 21 | int getdblss(phandle *pp, char *parname, double *dbl, int *ndbl) ; 22 | int subst(char *outstr, char *instr, char *ins, char *outs) ; 23 | void dostrsub(phandle *pp) ; 24 | int upstring (char *ss) ; 25 | void subcolon(char *ss) ; 26 | -------------------------------------------------------------------------------- /include/regsubs.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | double regressit (double *ans, double *eq, double *rhs, int m, int n); 8 | void regressitall (char **vname, double *eq, double *rhs, int m, int n); 9 | void add1 (int *a, int *b, int n); 10 | 11 | void ptoz (double *p, double *z, int n); 12 | void ztop (double *p, double *z, int n); 13 | double logregressit (double *ans, double *eq, double **rhs, int neq, int nv); 14 | double logrscore (double *eq, double **rhs, int neq, int nv); 15 | 16 | void calcgh (double *grad, double *hess, double *eq, double *z, 17 | double *n0, double *n1, int neq, int nv); 18 | 19 | double zlike (double *eq, double *n0, double *n1, 20 | double *ans, int neq, int nv); 21 | 22 | void squish (double *xmat, double *mat, int nrow, int oldc, int newc); 23 | 24 | void 25 | calcres (double *res, double *ans, double *eq, double *rhs, int neq, int nv); 26 | -------------------------------------------------------------------------------- /include/mcmcpars.h: -------------------------------------------------------------------------------- 1 | 2 | double thp1 = 1.0, thp2 = 5.0; /* params for theta */ 3 | double thxp1 = 1.0, thxp2 = 10.0; /* params for theta X */ 4 | double thxp0 = 40; /* cross term */ 5 | /* -1 is default value (=0 logically) */ 6 | 7 | double lp1 = 10.2, lp2 = 2; 8 | double lxp1 = 10.2, lxp2 = 2; 9 | 10 | double priorlmean = 6; 11 | double priorlmsig = 5; 12 | /* hyperprior on mean, s.dev for gamma prior on lambda */ 13 | 14 | double qtrbase = 0.0; 15 | 16 | double loclip = -20.0; 17 | double hiclip = 15.0; 18 | 19 | double a1 = 2, b1 = 8; 20 | double aa2 = 2, bb2 = 14, cc2 = 85; 21 | double p1 = 18, psi1 = 3; 22 | /* for toys */ 23 | 24 | double muval = 0.0, tmumean = 0.2, muval1; 25 | /* for bridge sampler */ 26 | 27 | int pubxindiv = -1; 28 | int alkesmode = NO; 29 | int malexhet = NO; 30 | int familynames = YES; 31 | 32 | int decim = 0, dmindis = 200000, dmaxdis = 500000; // decimation parameters 33 | int hashcheck = YES; 34 | int outputall = NO; 35 | int sevencolumnped = NO; 36 | 37 | FILE *fstdetails = NULL; 38 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | $ENV{'PATH'} = "../bin:$ENV{'PATH'}"; 4 | # MUST put smartpca bin directory in path for smartpca.perl to work 5 | 6 | $command = "smartpca.perl"; 7 | $command .= " -i example.geno "; 8 | $command .= " -a example.snp "; 9 | $command .= " -b example.ind " ; 10 | $command .= " -k 2 "; 11 | $command .= " -o example.pca "; 12 | $command .= " -p example.plot "; 13 | $command .= " -e example.eval "; 14 | $command .= " -l example.log "; 15 | $command .= " -m 5 "; 16 | $command .= " -t 2 "; 17 | $command .= " -s 6.0 "; 18 | print("$command\n"); 19 | system("$command"); 20 | 21 | $command = "smarteigenstrat.perl "; 22 | $command .= " -i example.geno "; 23 | $command .= " -a example.snp "; 24 | $command .= " -b example.ind "; 25 | $command .= " -p example.pca "; 26 | $command .= " -k 1 "; 27 | $command .= " -o example.chisq "; 28 | $command .= " -l example.log "; 29 | print("$command\n"); 30 | system("$command"); 31 | 32 | $command = "gc.perl example.chisq example.chisq.GC"; 33 | print("$command\n"); 34 | system("$command"); 35 | -------------------------------------------------------------------------------- /include/xpsubs.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "admutils.h" 4 | 5 | extern int verbose; 6 | 7 | double xpest (double **gg, int *gobs, int *na, int *nb, 8 | int neq, double *ppa, double *ppb); 9 | 10 | void mk2from3ml (double *xd, double *xc, double p, double pp); 11 | void mk2from2 (double *xd, double *xc); 12 | 13 | void loadpprob (double *pprob, double pa, double pb); 14 | void gen3 (double *ww, double a, double b); 15 | 16 | double xpest2like (double **gg, int *gobs, int *na, int *nb, 17 | int *iscasearr, 18 | int neq, double ppa, double ppb, double risk); 19 | 20 | 21 | // clean up SANS when finalized 22 | 23 | typedef struct 24 | { 25 | SNP *cupt; 26 | int numsamps; 27 | double admbayessc; 28 | double *baymodelsc; 29 | double admfsc; 30 | double admzscore; 31 | double admbsc; 32 | double admfccsc; 33 | double simpsc[2]; 34 | double admyl[3]; 35 | double lrmax; 36 | double lrsig; 37 | double maxlod; 38 | /* now start of fine-mapping scores */ 39 | double gscore; 40 | double gcheck; 41 | double gbayes; 42 | } SANS; 43 | -------------------------------------------------------------------------------- /include/not-thread-h: -------------------------------------------------------------------------------- 1 | #ifndef _THREAD_ 2 | #define _THREAD_ 3 | 4 | #include 5 | 6 | // globals defined in smartpca.c 7 | extern int ldregress; 8 | extern int minallelecnt; 9 | extern int maxmissing; 10 | extern int numthreads; 11 | extern double ldlimit; 12 | 13 | extern pthread_mutex_t mutex_xtx; 14 | extern pthread_mutex_t mutex_nkill; 15 | 16 | 17 | typedef struct thread_args { 18 | 19 | int mythreadnum; 20 | 21 | SNP **xsnplist; 22 | double *xmean; 23 | double *xfancy; 24 | int *xindex; 25 | int *xtypes; 26 | 27 | int numindivs; 28 | int nrows; 29 | int ncols; 30 | int blocksize; 31 | int weightmode; 32 | 33 | int nkill; // out 34 | int nused; // out 35 | 36 | double *XTX; 37 | 38 | } thread_args_t; 39 | 40 | 41 | void *thread_function(void *args); 42 | 43 | thread_args_t *pack_args(int mythreadnum, SNP **xsnplist, double *xmean, double *xfancy, int *xindex, int *xtypes, double *XTX, 44 | int numindivs, int nrows, int ncols, int blocksize, int weightmode); 45 | 46 | void unpack_args(thread_args_t *ta, int *nkill, int *nused); 47 | 48 | #endif 49 | 50 | -------------------------------------------------------------------------------- /EIGENSTRAT/example.QTL.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | $ENV{'PATH'} = "../bin:$ENV{'PATH'}"; 4 | # MUST put smartpca bin directory in path for smartpca.perl to work 5 | 6 | $command = "smartpca.perl"; 7 | $command .= " -i example.geno "; 8 | $command .= " -a example.snp "; 9 | $command .= " -b example.QTL.ind " ; 10 | $command .= " -k 2 "; 11 | $command .= " -o example.pca "; 12 | $command .= " -p example.plot "; 13 | $command .= " -e example.eval "; 14 | $command .= " -l example.log "; 15 | $command .= " -m 5 "; 16 | $command .= " -t 2 "; 17 | $command .= " -s 6.0 "; 18 | $command .= " -q YES "; 19 | print("$command\n"); 20 | system("$command"); 21 | 22 | $command = "smarteigenstrat.perl "; 23 | $command .= " -i example.geno "; 24 | $command .= " -a example.snp "; 25 | $command .= " -b example.QTL.ind "; 26 | $command .= " -p example.pca "; 27 | $command .= " -k 1 "; 28 | $command .= " -o example.QTL.chisq "; 29 | $command .= " -l example.log "; 30 | $command .= " -q YES "; 31 | print("$command\n"); 32 | system("$command"); 33 | 34 | $command = "gc.perl example.QTL.chisq example.QTL.chisq.GC"; 35 | print("$command\n"); 36 | system("$command"); 37 | -------------------------------------------------------------------------------- /src/eigensrc/pcatoy.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "eigsubs.h" 5 | #include 6 | 7 | int 8 | main () 9 | { 10 | int NSAMPLES, n, k; 11 | double *eval, *evec, *XTX; 12 | 13 | NSAMPLES = 2; 14 | 15 | /* malloc */ 16 | if ((eval = (double *) malloc (NSAMPLES * sizeof (*eval))) == NULL) { 17 | fprintf (stderr, "CM\n"); 18 | exit (1); 19 | } 20 | if ((evec = 21 | (double *) malloc (NSAMPLES * NSAMPLES * sizeof (*evec))) == NULL) { 22 | fprintf (stderr, "CM\n"); 23 | exit (1); 24 | } 25 | if ((XTX = (double *) malloc (NSAMPLES * NSAMPLES * sizeof (*XTX))) == NULL) { 26 | fprintf (stderr, "CM\n"); 27 | exit (1); 28 | } 29 | 30 | XTX[0] = 1; 31 | XTX[1] = 0; 32 | XTX[2] = 0; 33 | XTX[3] = 1; /* 2x2 identity matrix */ 34 | 35 | eigvecs (XTX, eval, evec, NSAMPLES); /* eigenvector k is evec[k*NSAMPLES+n] */ 36 | 37 | /* print eval and evec */ 38 | printf ("The eigenvectors of the 2x2 identity matrix are:\n"); 39 | for (n = 0; n < NSAMPLES; n++) { 40 | for (k = 0; k < NSAMPLES; k++) { 41 | printf (" "); 42 | printf ("%.02f", evec[k * NSAMPLES + n]); 43 | } 44 | printf ("\n"); 45 | } 46 | return 0; 47 | } 48 | -------------------------------------------------------------------------------- /include/kjg_fpca.h: -------------------------------------------------------------------------------- 1 | /** @file kjg_fpca.h 2 | * @brief Runs fastPCA. 3 | * This module also has methods to multiply a genotype matrix against the GSL 4 | * matrices. 5 | */ 6 | 7 | #ifndef KJG_FPCA_H_ 8 | #define KJG_FPCA_H_ 9 | 10 | #include 11 | 12 | extern size_t KJG_FPCA_ROWS; // number of rows to process at once 13 | 14 | /** Performs a fast PCA 15 | * @param *eval eigenvalues 16 | * @param *evec eigenvectors 17 | * @param K number of eigenvalues/vectors 18 | * @param L width of projection matrix 19 | * @param I iterations to do exponentiation 20 | */ 21 | 22 | void kjg_fpca (size_t K, size_t L, size_t I, double *eval, double *evec); 23 | 24 | /** Multiplies B=X*A1 and A2 = XT*B = XT*X*A1 25 | * @param *A1 some matrix 26 | * @param *B intermediate matrix 27 | * @param *A2 next matrix 28 | */ 29 | 30 | void kjg_fpca_XTXA (const gsl_matrix * A1, gsl_matrix * B, gsl_matrix * A2); 31 | 32 | /** Multiplies B = X*A 33 | * @param *A some matrix 34 | * @param *B another matrix 35 | */ 36 | 37 | void kjg_fpca_XA (const gsl_matrix * A, gsl_matrix * B); 38 | 39 | /** Multiplies A = XT*B 40 | * @param *B some matrix 41 | * @param *A another matrix 42 | */ 43 | 44 | void kjg_fpca_XTB (const gsl_matrix * B, gsl_matrix * A); 45 | 46 | #endif /* KJG_FPCA_H_ */ 47 | -------------------------------------------------------------------------------- /POPGEN/elldemo/q1: -------------------------------------------------------------------------------- 1 | 0.950 ::sample: I3122 2 | -0.094106295 -0.019737790 3 | -0.094106436 -0.019739079 4 | 0.000059549 0.000003096 5 | 0.000003096 0.000145052 6 | ellcoords: -0.0941 -0.0197 0.029491478 0.018871080 -1.534647054 :: 0.950 7 | sample: I11442 8 | -0.061346222 -0.019552895 9 | -0.061343762 -0.019555191 10 | 0.000060178 0.000012115 11 | 0.000012115 0.000150393 12 | ellcoords: -0.0613 -0.0196 0.030177005 0.018734409 -1.439596300 :: 0.950 13 | sample: I7796 14 | -0.046873995 -0.045907875 15 | -0.046872172 -0.045907252 16 | 0.000123822 0.000000395 17 | 0.000000395 0.000280934 18 | ellcoords: -0.0469 -0.0459 0.041026961 0.027237240 -1.568280598 :: 0.950 19 | sample: I7800 20 | -0.066846646 -0.031999605 21 | -0.066842606 -0.031999713 22 | 0.000320876 -0.000001283 23 | -0.000001283 0.000713629 24 | ellcoords: -0.0668 -0.0320 0.065388904 0.043846220 1.567529274 :: 0.950 25 | sample: I7807 26 | -0.073119168 -0.032956599 27 | -0.073121562 -0.032957419 28 | 0.000118724 0.000007414 29 | 0.000007414 0.000275285 30 | ellcoords: -0.0731 -0.0330 0.040638127 0.026631378 -1.523582242 :: 0.950 31 | -------------------------------------------------------------------------------- /POPGEN/elldemo/q2: -------------------------------------------------------------------------------- 1 | 0.990 :: sicaa.evec 2 | sample: I3122 3 | -0.094106295 -0.019737790 4 | -0.094106436 -0.019739079 5 | 0.000091541 0.000004759 6 | 0.000004759 0.000222980 7 | ellcoords: -0.0941 -0.0197 0.036565194 0.023397427 -1.534647054 :: 0.990 8 | sample: I11442 9 | -0.061346222 -0.019552895 10 | -0.061343762 -0.019555191 11 | 0.000092508 0.000018624 12 | 0.000018624 0.000231191 13 | ellcoords: -0.0613 -0.0196 0.037415149 0.023227974 -1.439596300 :: 0.990 14 | sample: I7796 15 | -0.046873995 -0.045907875 16 | -0.046872172 -0.045907252 17 | 0.000190345 0.000000607 18 | 0.000000607 0.000431864 19 | ellcoords: -0.0469 -0.0459 0.050867535 0.033770263 -1.568280598 :: 0.990 20 | sample: I7800 21 | -0.066846646 -0.031999605 22 | -0.066842606 -0.031999713 23 | 0.000493265 -0.000001972 24 | -0.000001972 0.001097022 25 | ellcoords: -0.0668 -0.0320 0.081072843 0.054363011 1.567529274 :: 0.990 26 | sample: I7807 27 | -0.073119168 -0.032956599 28 | -0.073121562 -0.032957419 29 | 0.000182508 0.000011397 30 | 0.000011397 0.000423180 31 | ellcoords: -0.0731 -0.0330 0.050385437 0.033019081 -1.523582242 :: 0.990 32 | -------------------------------------------------------------------------------- /POPGEN/elldemo/q3: -------------------------------------------------------------------------------- 1 | 0.950 :: sicaa.evec 2 | sample: I3122 3 | -0.094106295 -0.019737790 4 | -0.094106436 -0.019739079 5 | 0.000059549 0.000003096 6 | 0.000003096 0.000145052 7 | ellcoords: -0.0941 -0.0197 0.029491478 0.018871080 -1.534647054 :: 0.950 8 | sample: I11442 9 | -0.061346222 -0.019552895 10 | -0.061343762 -0.019555191 11 | 0.000060178 0.000012115 12 | 0.000012115 0.000150393 13 | ellcoords: -0.0613 -0.0196 0.030177005 0.018734409 -1.439596300 :: 0.950 14 | sample: I7796 15 | -0.046873995 -0.045907875 16 | -0.046872172 -0.045907252 17 | 0.000123822 0.000000395 18 | 0.000000395 0.000280934 19 | ellcoords: -0.0469 -0.0459 0.041026961 0.027237240 -1.568280598 :: 0.950 20 | sample: I7800 21 | -0.066846646 -0.031999605 22 | -0.066842606 -0.031999713 23 | 0.000320876 -0.000001283 24 | -0.000001283 0.000713629 25 | ellcoords: -0.0668 -0.0320 0.065388904 0.043846220 1.567529274 :: 0.950 26 | sample: I7807 27 | -0.073119168 -0.032956599 28 | -0.073121562 -0.032957419 29 | 0.000118724 0.000007414 30 | 0.000007414 0.000275285 31 | ellcoords: -0.0731 -0.0330 0.040638127 0.026631378 -1.523582242 :: 0.950 32 | -------------------------------------------------------------------------------- /POPGEN/elldemo/aaa.xtxt: -------------------------------------------------------------------------------- 1 | set terminal postscript color noenhanced 2 | set title " Sicily EBA projected conf: 0.95" 3 | set key outside 4 | set xlabel "eigenvector 1" 5 | set ylabel "eigenvector 2" 6 | set parametric 7 | fx(a, b, phi, m0, t) = a*cos(phi)*cos(t)+b*sin(phi)*sin(t) + m0 8 | fy(a, b, phi, m1, t) = -a*sin(phi)*cos(t)+b*cos(phi)*sin(t) + m1 9 | plot "aaa:Lithuanian" using ($2)*1:($3)*1 title "Lithuanian" , \ 10 | "aaa:Georgian" using ($2)*1:($3)*1 title "Georgian" , \ 11 | "aaa:Sardinian" using ($2)*1:($3)*1 title "Sardinian" , \ 12 | "aaa:Sicily_EBA" using ($2)*1:($3)*1 title "Sicily_EBA" lt rgb "black" , \ 13 | m0 = -0.0941, m1 = -0.0197, a = 0.029491478, b = 0.018871080, phi = -1.534647054, fx(a, b, phi, m0, t), fy(a, b, phi, m1, t) with lines title "I3122", \ 14 | m0 = -0.0613, m1 = -0.0196, a = 0.030177005, b = 0.018734409, phi = -1.439596300, fx(a, b, phi, m0, t), fy(a, b, phi, m1, t) with lines title "I11442", \ 15 | m0 = -0.0469, m1 = -0.0459, a = 0.041026961, b = 0.027237240, phi = -1.568280598, fx(a, b, phi, m0, t), fy(a, b, phi, m1, t) with lines title "I7796", \ 16 | m0 = -0.0668, m1 = -0.0320, a = 0.065388904, b = 0.043846220, phi = 1.567529274, fx(a, b, phi, m0, t), fy(a, b, phi, m1, t) with lines title "I7800", \ 17 | m0 = -0.0731, m1 = -0.0330, a = 0.040638127, b = 0.026631378, phi = -1.523582242, fx(a, b, phi, m0, t), fy(a, b, phi, m1, t) with lines title "I7807" 18 | ## pause 9999 19 | -------------------------------------------------------------------------------- /POPGEN/elldemo/q0: -------------------------------------------------------------------------------- 1 | 0.950 :: sicaa.evec 2 | sample: I3122 3 | -0.094106295 -0.019737790 4 | -0.094106436 -0.019739079 5 | 6 | 0.000059549 0.000003096 7 | 0.000003096 0.000145052 8 | ellcoords: -0.0941 -0.0197 0.029491478 0.018871080 -1.534647054 :: 0.950 9 | sample: I11442 10 | -0.061346222 -0.019552895 11 | -0.061343762 -0.019555191 12 | 13 | 0.000060178 0.000012115 14 | 0.000012115 0.000150393 15 | ellcoords: -0.0613 -0.0196 0.030177005 0.018734409 -1.439596300 :: 0.950 16 | sample: I7796 17 | -0.046873995 -0.045907875 18 | -0.046872172 -0.045907252 19 | 20 | 0.000123822 0.000000395 21 | 0.000000395 0.000280934 22 | ellcoords: -0.0469 -0.0459 0.041026961 0.027237240 -1.568280598 :: 0.950 23 | sample: I7800 24 | -0.066846646 -0.031999605 25 | -0.066842606 -0.031999713 26 | 27 | 0.000320876 -0.000001283 28 | -0.000001283 0.000713629 29 | ellcoords: -0.0668 -0.0320 0.065388904 0.043846220 1.567529274 :: 0.950 30 | sample: I7807 31 | -0.073119168 -0.032956599 32 | -0.073121562 -0.032957419 33 | 34 | 0.000118724 0.000007414 35 | 0.000007414 0.000275285 36 | ellcoords: -0.0731 -0.0330 0.040638127 0.026631378 -1.523582242 :: 0.950 37 | -------------------------------------------------------------------------------- /POPGEN/elldemo/aaa:Georgian: -------------------------------------------------------------------------------- 1 | GEO-002 0.124960 -0.019309 Georgian 2 | GEO-005 0.098987 -0.025584 Georgian 3 | GEO-010 0.124701 -0.043397 Georgian 4 | GEO-015 0.131265 -0.027730 Georgian 5 | GEO-020 0.135953 -0.053088 Georgian 6 | GEO-028 0.134414 -0.036569 Georgian 7 | GEO-031 0.157799 -0.025231 Georgian 8 | GEO-032 0.146574 -0.026196 Georgian 9 | GEO-039 0.158120 -0.022020 Georgian 10 | GEO-051 0.144490 -0.042876 Georgian 11 | GEO-061 0.153842 -0.030906 Georgian 12 | GEO-082 0.158142 -0.028626 Georgian 13 | GEO001 0.156173 -0.043278 Georgian 14 | mg43 0.159324 -0.022091 Georgian 15 | mg47 0.140898 -0.038999 Georgian 16 | mg22 0.138969 -0.035963 Georgian 17 | mg49 0.153820 -0.036044 Georgian 18 | mg23 0.140328 -0.032225 Georgian 19 | mg62 0.158555 -0.026727 Georgian 20 | mg27 0.154282 -0.028294 Georgian 21 | mg31 0.141187 -0.024043 Georgian 22 | mg34 0.149388 -0.030698 Georgian 23 | mg40 0.162562 -0.032568 Georgian 24 | -------------------------------------------------------------------------------- /src/nicksrc/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009-2016, Broad Institute, Inc. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * 8 | Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | 12 | * 13 | Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | 18 | * 19 | Neither the name Broad Institute, Inc. nor the names of its 20 | contributors may be used to endorse or promote products derived from 21 | this software without specific prior written permission. 22 | 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 24 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 27 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 29 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 32 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE 33 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2006-2016, Broad Institute, Inc. and Harvard Medical School 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * 8 | Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | 12 | * 13 | Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | 18 | * 19 | Neither the name Broad Institute, Inc. Harvard University, nor the names of its 20 | contributors may be used to endorse or promote products derived from 21 | this software without specific prior written permission. 22 | 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 24 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 27 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 29 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 32 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE 33 | -------------------------------------------------------------------------------- /src/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2006-2016, Broad Institute, Inc. and Harvard Medical School 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * 8 | Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | 12 | * 13 | Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | 18 | * 19 | Neither the name Broad Institute, Inc. Harvard University, nor the names of its 20 | contributors may be used to endorse or promote products derived from 21 | this software without specific prior written permission. 22 | 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 24 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 27 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 29 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 32 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE 33 | -------------------------------------------------------------------------------- /bin/evec2pca.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | ### translate .evec file to .pca file expected by eigenstrat program 4 | ### Note: .evec file does not contain entries for outliers 5 | ### .pca file does contain entries (set to all 0.0) for outliers 6 | 7 | $k = $ARGV[0]; 8 | $evec = $ARGV[1]; 9 | $ind = $ARGV[2]; 10 | $pca = $ARGV[3]; 11 | 12 | open(EVEC,$evec) || die("OOPS couldn't open file $evec for reading"); 13 | open(IND,$ind) || die("OOPS couldn't open indiv file $ind for reading"); 14 | open(PCA,">$pca") || die("OOPS couldn't open file $pca for writing"); 15 | 16 | print PCA ("$k\n"); # number of output eigenvectors/eigenvalues 17 | $line = ; chomp($line); # eigvals line 18 | my @array = split(/[\t ]+/,$line); 19 | for($x=0; $x<$k; $x++) { printf PCA ("%.04f\n",$array[$x+2]); } # x-th eval 20 | while($line = ) 21 | { 22 | chomp($line); 23 | $line = " " . $line; 24 | my @array = split(/[\t ]+/,$line); 25 | $l = @array; 26 | unless($l == 3+$k) { die("OOPS #evec in $evec is different from $k"); } 27 | $sample = $array[1]; 28 | for($x=0; $x<$k; $x++) { $evecarray{$sample}[$x] = $array[$x+2]; } 29 | $found{$sample} = 1; 30 | } 31 | 32 | while($line = ) 33 | { 34 | chomp($line); 35 | my @array = split(/[\t ]+/,$line); 36 | $sample = $array[0]; 37 | if($sample eq "") { $sample = $array[1]; } 38 | unless($found{$sample}) 39 | { 40 | for($x=0; $x<$k; $x++) { $evecarray{$sample}[$x] = 0.0; } 41 | } 42 | for($x=0; $x<$k; $x++) 43 | { 44 | printf PCA (" "); 45 | if($evecarray{$sample}[$x] > 0) { printf PCA (" "); } 46 | printf PCA ("%.04f",$evecarray{$sample}[$x]); 47 | } 48 | printf PCA ("\n"); 49 | } 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /CONVERTF/example.ancestrymapgeno: -------------------------------------------------------------------------------- 1 | rs0000 SAMPLE0 1 2 | rs0000 SAMPLE1 1 3 | rs0000 SAMPLE2 1 4 | rs0000 SAMPLE3 0 5 | rs0000 SAMPLE4 0 6 | rs1111 SAMPLE0 0 7 | rs1111 SAMPLE1 1 8 | rs1111 SAMPLE2 2 9 | rs1111 SAMPLE3 1 10 | rs1111 SAMPLE4 2 11 | rs2222 SAMPLE0 2 12 | rs2222 SAMPLE1 1 13 | rs2222 SAMPLE2 1 14 | rs2222 SAMPLE3 0 15 | rs2222 SAMPLE4 1 16 | rs3333 SAMPLE0 0 17 | rs3333 SAMPLE1 0 18 | rs3333 SAMPLE2 1 19 | rs3333 SAMPLE3 2 20 | rs3333 SAMPLE4 2 21 | rs4444 SAMPLE0 2 22 | rs4444 SAMPLE1 1 23 | rs4444 SAMPLE2 1 24 | rs4444 SAMPLE3 0 25 | rs4444 SAMPLE4 0 26 | rs5555 SAMPLE0 0 27 | rs5555 SAMPLE1 0 28 | rs5555 SAMPLE2 1 29 | rs5555 SAMPLE3 1 30 | rs5555 SAMPLE4 1 31 | rs6666 SAMPLE0 2 32 | rs6666 SAMPLE1 2 33 | rs6666 SAMPLE2 1 34 | rs6666 SAMPLE3 1 35 | rs6666 SAMPLE4 0 36 | -------------------------------------------------------------------------------- /POPGEN/elldemo/aaa:Sardinian: -------------------------------------------------------------------------------- 1 | HGDP00666 -0.116760 -0.030823 Sardinian 2 | HGDP00667 -0.116709 -0.017192 Sardinian 3 | HGDP00668 -0.137047 -0.027317 Sardinian 4 | HGDP00669 -0.126519 -0.024759 Sardinian 5 | HGDP00670 -0.135795 -0.031889 Sardinian 6 | HGDP00671 -0.128935 -0.029534 Sardinian 7 | HGDP00672 -0.124405 -0.032339 Sardinian 8 | HGDP00673 -0.120324 -0.035232 Sardinian 9 | HGDP00674 -0.137120 -0.026784 Sardinian 10 | HGDP01062 -0.119201 -0.035362 Sardinian 11 | HGDP01063 -0.141085 -0.036116 Sardinian 12 | HGDP01064 -0.153843 -0.049779 Sardinian 13 | HGDP01065 -0.145904 -0.024355 Sardinian 14 | HGDP01066 -0.157103 -0.027543 Sardinian 15 | HGDP01067 -0.144155 -0.050578 Sardinian 16 | HGDP01068 -0.130147 -0.039257 Sardinian 17 | HGDP01069 -0.140198 -0.030983 Sardinian 18 | HGDP01070 -0.138745 -0.032343 Sardinian 19 | HGDP01071 -0.116760 -0.041106 Sardinian 20 | HGDP01072 -0.127844 -0.040682 Sardinian 21 | HGDP01073 -0.143137 -0.035532 Sardinian 22 | HGDP01074 -0.138522 -0.032142 Sardinian 23 | HGDP01075 -0.145153 -0.023029 Sardinian 24 | HGDP01076 -0.136974 -0.040912 Sardinian 25 | HGDP01077 -0.119744 -0.032696 Sardinian 26 | HGDP01078 -0.124987 -0.031961 Sardinian 27 | HGDP01079 -0.128279 -0.021665 Sardinian 28 | -------------------------------------------------------------------------------- /include/linsubs.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | void bal(double *a, double *b, int n) ; 7 | 8 | /* linear algebra */ 9 | void mulmat(double *a, double *b, double *c, int a1, int a2, int a3) ; 10 | void xmultx(double *a, double *b, int m, int n) ; 11 | void txmulx(double *a, double *b, int m, int n) ; 12 | 13 | int solvit (double *prod, double *rhs,int n, double *ans); 14 | int solvitfix (double *prod, double *rhs, int n, double *ans, int *vfix, double *vvals, int nfix) ; 15 | int oldsolvitfix (double *prod, double *rhs, int n, double *ans, int *vfix, double *vvals, int nfix) ; 16 | double pdinv(double *cinv, double *coeff, int n) ; 17 | 18 | /* numer recipes p 97 */ 19 | double logdet(double *mat, int n) ; 20 | int choldc (double *a, int n, double p[]); 21 | void cholsl (double *a, int n, double p[], double b[], double x[]); 22 | int isposdef (double *a, int n) ; 23 | void cholesky(double *cf, double *a, int n) ; 24 | void pmat(double *mat, int n) ; 25 | void imulmat(int *a, int *b, int *c, int a1, int a2, int a3) ; 26 | int linsolv(int n, double* pfMatr, double* pfVect, double* sol) ; // Developer: Henry Guennadi Levkin 27 | 28 | double qval(double *vv, double *q, double *l, int n) ; 29 | void qgrad(double *grad, double *vv, double *q, double *l, int n) ; 30 | double mquad(double y0, double y1, double y2, double *pmx) ; 31 | double qminpos(double *vv, double *q, double *l, int n) ; 32 | double qminposfix(double *vv, double *q, double *l, int n, int *fixlist, double *fixvals, int nfix) ; 33 | double qminposfixc(double *vv, double *q, double *l, int n, int *fixlist, double *fixvals, int nfix, int *constraint) ; 34 | double qmin(double *vv, double *q, double *l, int n) ; 35 | double qminfix(double *vv, double *q, double *l, int n, int *fixlist, double *fixvals, int nfix) ;; 36 | double qmpc (double *vnew, double *vold, double *q, double *l, int *dead, int level, int *constraint, int n) ; 37 | double qmp (double *vnew, double *vold, double *q, double *l, int *dead, int level, int n) ; 38 | -------------------------------------------------------------------------------- /POPGEN/example.log: -------------------------------------------------------------------------------- 1 | parameter file: par.example 2 | ### THE INPUT PARAMETERS 3 | ##PARAMETER NAME: VALUE 4 | genotypename: ../CONVERTF/example.ped 5 | snpname: ../CONVERTF/example.map 6 | indivname: ../CONVERTF/example.ped 7 | evecoutname: example.evec 8 | evaloutname: example.eval 9 | altnormstyle: NO 10 | numoutevec: 2 11 | familynames: NO 12 | grmoutname: grmjunk 13 | ## smartpca version: 13050 14 | norm used 15 | 16 | genotype file processed 17 | number of samples used: 5 number of snps used: 7 18 | Using 2 threads, and partial sum lookup algorithm. 19 | total number of snps killed in pass: 0 used: 7 20 | grm dumped 21 | 22 | ## To get Tracy-Widom statistics: recompile smartpca with TWTAB correctly specified in Makefile, or 23 | just run twstats (see README file in POPGEN directory) 24 | kurtosis snps indivs 25 | eigenvector 1 1.271 1.505 26 | eigenvector 2 1.796 2.127 27 | population: 0 Case 2 28 | population: 1 Control 3 29 | 30 | ## Average divergence between populations: 31 | Case Control popsize 32 | Case 0.849 3.298 2 33 | Control 3.298 1.151 3 34 | 35 | 36 | number of blocks for moving block jackknife: 1 37 | fst *1000: 38 | C C 39 | C 0 0 40 | C 0 0 41 | 42 | s.dev * 1000000: 43 | C C 44 | C 0 0 45 | C 0 0 46 | 47 | eigenvector 1:means 48 | Control -0.336 49 | Case 0.505 50 | ## Anova statistics for population differences along each eigenvector: 51 | p-value 52 | eigenvector_1_Case_Control_ 0.0262738 53 | eigenvector 2:means 54 | Control -0.056 55 | Case 0.084 56 | eigenvector_2_Case_Control_ 0.805663 57 | 58 | ## Statistical significance of differences beween populations: 59 | pop1 pop2 chisq p-value |pop1| |pop2| 60 | popdifference: Case Control 9.554 0.00842201 2 3 61 | 62 | ##end of smartpca run 63 | -------------------------------------------------------------------------------- /bin/gc.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | $P = $ARGV[0]; 4 | $out = $ARGV[1]; 5 | 6 | # get data 7 | $m=0; 8 | open(P,"$P") || die("COF"); 9 | while($line =

) { if($line =~ /Chisq/) { last; } } # header lines 10 | while($line =

) 11 | { 12 | chomp($line); 13 | if($line =~ /NA/) 14 | { 15 | $chisq1[$m] = -100; 16 | $chisq2[$m] = -100; 17 | $m++; 18 | next; 19 | } 20 | my @array = split(/[\t ]+/,$line); 21 | $chisq1[$m] = $array[0]; # Chisq 22 | $chisq2[$m] = $array[1]; # EIGENSTRAT 23 | $m++; 24 | $mvalid++; 25 | } 26 | close(P); 27 | $nSNP = $m; 28 | $nSNPvalid = $mvalid; 29 | 30 | # compute $lambda1 (Chisq) 31 | $CHISQTHRESH = 0.456; 32 | $step = 0.25; 33 | $oktoreducestep = 0; 34 | for($iter=0; $iter<20; $iter++) 35 | { 36 | $mm = 0; 37 | for($m=0; $m<$nSNP; $m++) 38 | { 39 | if($chisq1[$m] > $CHISQTHRESH) { $mm++; } 40 | } 41 | $frac = $mm/$nSNPvalid; # frac of SNPs exceeding CHISQTHRESH 42 | if($frac > 0.5) { $CHISQTHRESH += $step; } 43 | else { $CHISQTHRESH -= $step; $oktoreducestep = 1; } 44 | if($oktoreducestep) { $step *= 0.5; } 45 | } 46 | $lambda1 = $CHISQTHRESH/0.456; # 0.456 is median if no inflation 47 | if($lambda1 < 1) { $lambda1 = 1; } # not allowed to be less than 1 48 | 49 | # compute $lambda2 (EIGENSTRAT) 50 | $CHISQTHRESH = 0.456; 51 | $step = 0.25; 52 | $oktoreducestep = 1; 53 | for($iter=0; $iter<20; $iter++) 54 | { 55 | $mm = 0; 56 | for($m=0; $m<$nSNP; $m++) 57 | { 58 | if($chisq2[$m] > $CHISQTHRESH) { $mm++; } 59 | } 60 | $frac = $mm/$nSNPvalid; # frac of SNPs exceeding CHISQTHRESH 61 | if($frac > 0.5) { $CHISQTHRESH += $step; } 62 | else { $CHISQTHRESH -= $step; $oktoreducestep = 1; } 63 | if($oktoreducestep) { $step *= 0.5; } 64 | } 65 | $lambda2 = $CHISQTHRESH/0.456; # 0.456 is median if no inflation 66 | if($lambda2 < 1) { $lambda2 = 1; } # not allowed to be less than 1 67 | 68 | # output 69 | open(OUT,">$out") || die("COF"); 70 | print OUT ("Chisq EIGENSTRAT\n"); 71 | printf OUT ("lambda=%.03f lambda=%.03f\n",$lambda1,$lambda2); 72 | for($m=0; $m<$nSNP; $m++) 73 | { 74 | if($chisq1[$m] < 0) { print OUT ("NA NA\n"); next; } 75 | printf OUT ("%.04f %.04f\n",$chisq1[$m]/$lambda1,$chisq2[$m]/$lambda2); 76 | } 77 | -------------------------------------------------------------------------------- /POPGEN/twexample.eval: -------------------------------------------------------------------------------- 1 | 3.896803 2 | 1.823842 3 | 1.233871 4 | 1.227676 5 | 1.212209 6 | 1.200049 7 | 1.198344 8 | 1.188785 9 | 1.188666 10 | 1.185873 11 | 1.178875 12 | 1.172266 13 | 1.168976 14 | 1.168512 15 | 1.164908 16 | 1.157360 17 | 1.149275 18 | 1.146300 19 | 1.142036 20 | 1.137177 21 | 1.133412 22 | 1.128474 23 | 1.123303 24 | 1.121108 25 | 1.117918 26 | 1.112247 27 | 1.111754 28 | 1.108400 29 | 1.101575 30 | 1.098672 31 | 1.095195 32 | 1.092344 33 | 1.087750 34 | 1.085886 35 | 1.083626 36 | 1.081931 37 | 1.079625 38 | 1.078805 39 | 1.071742 40 | 1.068232 41 | 1.067591 42 | 1.063070 43 | 1.060513 44 | 1.058145 45 | 1.056450 46 | 1.054916 47 | 1.050893 48 | 1.048894 49 | 1.043173 50 | 1.042661 51 | 1.040887 52 | 1.036872 53 | 1.034616 54 | 1.030792 55 | 1.028936 56 | 1.027220 57 | 1.025316 58 | 1.022074 59 | 1.016391 60 | 1.015529 61 | 1.013203 62 | 1.008569 63 | 1.007275 64 | 1.003458 65 | 1.001885 66 | 1.001046 67 | 0.996586 68 | 0.995739 69 | 0.992099 70 | 0.988173 71 | 0.985532 72 | 0.982139 73 | 0.979806 74 | 0.976317 75 | 0.974724 76 | 0.971432 77 | 0.970185 78 | 0.966816 79 | 0.965728 80 | 0.962983 81 | 0.958791 82 | 0.956462 83 | 0.954499 84 | 0.952054 85 | 0.949986 86 | 0.945780 87 | 0.944417 88 | 0.939229 89 | 0.937765 90 | 0.933657 91 | 0.931285 92 | 0.928772 93 | 0.927551 94 | 0.922779 95 | 0.919530 96 | 0.917899 97 | 0.914707 98 | 0.909525 99 | 0.907726 100 | 0.902872 101 | 0.900004 102 | 0.899288 103 | 0.898200 104 | 0.896129 105 | 0.892450 106 | 0.889483 107 | 0.886985 108 | 0.880826 109 | 0.880009 110 | 0.877975 111 | 0.875729 112 | 0.870950 113 | 0.867717 114 | 0.866428 115 | 0.864954 116 | 0.861732 117 | 0.859268 118 | 0.858495 119 | 0.854881 120 | 0.851066 121 | 0.846823 122 | 0.842529 123 | 0.841246 124 | 0.838740 125 | 0.838459 126 | 0.834131 127 | 0.832612 128 | 0.827219 129 | 0.823981 130 | 0.821442 131 | 0.818558 132 | 0.815683 133 | 0.814061 134 | 0.810291 135 | 0.808223 136 | 0.806080 137 | 0.801127 138 | 0.795730 139 | 0.793393 140 | 0.790344 141 | 0.787822 142 | 0.784379 143 | 0.777760 144 | 0.776245 145 | 0.774965 146 | 0.764580 147 | 0.758267 148 | 0.747945 149 | 0.747072 150 | -------------------------------------------------------------------------------- /src/eigensrc/smartsubs.c: -------------------------------------------------------------------------------- 1 | #include "qpsubs.h" 2 | #include "eigsubs.h" 3 | #include "smartsubs.h" 4 | extern int fancynorm, verbose, plotmode, outnum; 5 | 6 | // static Indiv **indm ; 7 | // static void wjackestx(double *est, double *sig, double mean, double *jmean, double *jwt, int g) ; 8 | // static void wjackvestx(double *vest, double *var, int d, double *mean, double **jmean, double *jwt, int g) ; 9 | static int outliermode = 0; 10 | 11 | void 12 | setoutliermode (int mode) 13 | { 14 | outliermode = mode; 15 | } 16 | 17 | int 18 | ridoutlier (double *evecs, int n, int neigs, 19 | double thresh, int *badlist, OUTLINFO ** outinfo) 20 | { 21 | /* badlist contains list of outliers */ 22 | double *ww, *w2, y1, y2, yy, zz; 23 | int *vbad; 24 | int i, j; 25 | int nbad = 0; 26 | OUTLINFO *outpt; 27 | 28 | if (outliermode > 1) 29 | return 0; 30 | if (n < 3) 31 | return 0; 32 | ZALLOC (ww, n, double); 33 | ZALLOC (vbad, n, int); 34 | for (j = 0; j < n; j++) { 35 | outpt = outinfo[j]; 36 | outpt->vecno = -1; 37 | } 38 | for (i = 0; i < neigs; ++i) { 39 | copyarr (evecs + i * n, ww, n); 40 | if (outliermode == 0) { 41 | y1 = asum (ww, n) / (double) n; 42 | vsp (ww, ww, -y1, n); 43 | y2 = asum2 (ww, n) / (double) n; 44 | y2 = sqrt (y2); 45 | vst (ww, ww, 1.0 / y2, n); 46 | 47 | for (j = 0; j < n; j++) { 48 | if (fabs (ww[j]) > thresh) { 49 | vbad[j] = 1; 50 | outpt = outinfo[j]; 51 | if (outpt->vecno < 0) { 52 | outpt->vecno = i; 53 | outpt->score = ww[j]; 54 | } 55 | } 56 | } 57 | } 58 | if (outliermode == 1) { 59 | ZALLOC (w2, n, double); 60 | for (j = 0; j < n; j++) { 61 | yy = ww[j]; 62 | ww[j] = 0; 63 | y1 = asum (ww, n) / (double) (n - 1); 64 | vsp (w2, ww, -y1, n); 65 | w2[j] = 0; 66 | y2 = asum2 (w2, n) / (double) n; 67 | y2 = sqrt (y2); 68 | zz = yy - y1; 69 | zz /= y2; 70 | if (fabs (zz) > thresh) { 71 | vbad[j] = 1; 72 | outpt = outinfo[j]; 73 | if (outpt->vecno < 0) { 74 | outpt->vecno = i; 75 | outpt->score = zz; 76 | } 77 | } 78 | ww[j] = yy; 79 | } 80 | free (w2); 81 | } 82 | } 83 | for (j = 0; j < n; j++) { 84 | if (vbad[j] == 1) { 85 | badlist[nbad] = j; 86 | ++nbad; 87 | } 88 | } 89 | free (ww); 90 | free (vbad); 91 | return nbad; 92 | 93 | } 94 | -------------------------------------------------------------------------------- /bin/rescale_ell: -------------------------------------------------------------------------------- 1 | #!/usr//bin/perl -w 2 | 3 | ## rescale -i infile -o outfile -a inscale -b outscale 4 | 5 | use Getopt::Std ; 6 | use File::Basename ; 7 | 8 | ## pops : separated -x = make postscript and pdf -z use another separator 9 | ## -k keep intermediate files 10 | ## NEW if pops is a file names are read one per line 11 | ## scaling on x, y axes 12 | 13 | getopts('i:o:a:s:',\%opts) ; 14 | $infile = $opts{"i"} ; 15 | $outfile = $opts{"o"} ; 16 | $inscalex = $opts{"a"} ; 17 | $outscale = $opts{"s"} ; 18 | 19 | $inscale = 0.95 ; 20 | 21 | $inscale = $inscalex if (defined $inscalex) ; 22 | 23 | usage() unless (defined $infile) ; 24 | usage() unless (-r $infile) ; 25 | usage() unless (defined $outfile) ; 26 | usage() unless (defined $outscale) ; 27 | 28 | open (I1, "$infile") ; 29 | open (YY, ">$outfile") || die "can't open $outfile\n" ; 30 | 31 | $lnum = $junk = -99 ; 32 | foreach $line () { 33 | chomp $line ; 34 | ($a, $b, , $c, @Z) = split " ", $line ; 35 | next unless (defined $b) ; 36 | 37 | if ($a =~ /sample:/) { 38 | $lnum = 0 ; 39 | $L[0] = $line ; 40 | next ; 41 | } 42 | if ($lnum<0) { 43 | $inscale = $a ; 44 | printf YY "%9.3f :: %s\n", $outscale, $c ; 45 | next ; 46 | } 47 | ++$lnum ; 48 | $L[$lnum] = $line ; 49 | if ($a =~ /ellcoords:/) { 50 | $inscale = pop @Z unless (defined $inscalex) ; 51 | $fac2 = critchi($outscale)/critchi($inscale) ; 52 | $fac = sqrt($fac2) ; 53 | 54 | print YY "$L[0]\n" ; 55 | print YY "$L[1]\n" ; 56 | print YY "$L[2]\n" ; 57 | ($a, $b) = split " ", $L[3] ; $a *= $fac2; $b *= $fac2 ; printf YY "%15.9f %15.9f\n", $a, $b ; 58 | ($a, $b) = split " ", $L[4] ; $a *= $fac2; $b *= $fac2 ; printf YY "%15.9f %15.9f\n", $a, $b ; 59 | ($junk, $a, $b, $c, $d, $e) = split " ", $line ; 60 | print YY "$junk $a $b " ; 61 | $c *= $fac ; 62 | $d *= $fac ; 63 | printf YY "%15.9f ", $c ; 64 | printf YY "%15.9f ", $d ; 65 | printf YY "%15.9f ", $e ; 66 | printf YY ":: %9.3f\n", $outscale ; 67 | next ; 68 | } 69 | } 70 | close YY ; 71 | 72 | 73 | sub critchi { 74 | ## critical value for chisq 2 dof 75 | my ($scale) = @_ ; 76 | local $x ; 77 | 78 | $x = -2*log(1-$scale) ; 79 | ## printf "zz %12.6f %12.6f\n", $scale, $x ; 80 | 81 | return $x ; 82 | 83 | } 84 | 85 | sub usage { 86 | 87 | print "rescale_ell -i infile -o outfile -s outscale [-a inscale]\n" ; 88 | exit 0 ; 89 | 90 | } 91 | -------------------------------------------------------------------------------- /bin/smarteigenstrat.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | # perl wrapper for smarteigenstrat program. Run smarteigenstrat.perl with no options for usage 4 | 5 | use Getopt::Std; 6 | 7 | my @flaglist = ("i","a","b","o","p","q","l","k"); 8 | $x = @ARGV; 9 | for($n=0;$n<$x;$n++) { 10 | foreach $flag (@flaglist) { 11 | if ($ARGV[$n] eq "-$flag") { 12 | $specified{$flag} = 1; 13 | } 14 | } 15 | } 16 | 17 | # check for mandatory options 18 | foreach $flag ("i","a","b","p","o","l") { 19 | unless ($specified{$flag}) { 20 | usage(); 21 | die("Error: -$flag not specified"); 22 | } 23 | } 24 | 25 | # get opts from hash 26 | getopts('i:a:b:p:o:q:k:l:', \%opts); 27 | $genofilename = $opts{"i"}; 28 | $indfilename = $opts{"b"}; 29 | $snpfilename = $opts{"a"}; 30 | $pcafilename = $opts{"p"}; 31 | $outfilename = $opts{"o"}; 32 | $logfilename = $opts{"l"}; 33 | $qtmode = "NO"; 34 | if ( $specified{"q"} ) { 35 | $qtmode = $opts{"q"}; 36 | } 37 | $k = 1; 38 | if ( $specified{"k"} ) { 39 | $k = $opts{"k"}; 40 | } 41 | 42 | # write parameter file 43 | open(PAR, ">$outfilename.par") || die("Error: unable to open $outfilename.par\n"); 44 | print PAR "genotypename: $genofilename\n"; 45 | print PAR "snpname: $snpfilename\n"; 46 | print PAR "indivname: $indfilename\n"; 47 | print PAR "pcaname: $pcafilename\n"; 48 | print PAR "outputname: $outfilename\n"; 49 | print PAR "numpc: $k\n"; 50 | print PAR "qtmode: $qtmode\n"; 51 | close(PAR); 52 | 53 | # run smarteigenstrat 54 | $cmd = "smarteigenstrat -p $outfilename.par >$logfilename"; 55 | print "$cmd\n"; 56 | system($cmd); 57 | 58 | sub usage { 59 | print "smarteigenstrat.perl -i -a -b -p -o "; 60 | print " -l -k -q "; 61 | print "\n"; 62 | print "-i genotype file (PED, PACKEDPED, EIGENSTRAT, ANCESTRYMAP or PACKEDANCESTRYMAP format)"; 63 | print "-o output file (chisq)\n"; 64 | print "-l logfile (screen output,including error messages)\n"; 65 | print "-q YES for quantitative phenotype or NO otherwise\n"; 66 | print "\n"; 67 | print "For quantitative phenotype, sixth column of .ped file or third column of EIGENSTRAT .ind file\n"; 68 | print "should be real numbers. For non-quantitative phenotype, sixth column of .ped or third column\n"; 69 | print "should be 'Case' or 'Control'\n"; 70 | } 71 | 72 | 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /POPGEN/elldemo/rescale_ell: -------------------------------------------------------------------------------- 1 | #!/usr//bin/perl -w 2 | 3 | ## rescale -i infile -o outfile -a inscale -b outscale 4 | 5 | use Getopt::Std ; 6 | use File::Basename ; 7 | 8 | ## pops : separated -x = make postscript and pdf -z use another separator 9 | ## -k keep intermediate files 10 | ## NEW if pops is a file names are read one per line 11 | ## scaling on x, y axes 12 | 13 | getopts('i:o:a:s:',\%opts) ; 14 | $infile = $opts{"i"} ; 15 | $outfile = $opts{"o"} ; 16 | $inscalex = $opts{"a"} ; 17 | $outscale = $opts{"s"} ; 18 | 19 | $inscale = 0.95 ; 20 | 21 | $inscale = $inscalex if (defined $inscalex) ; 22 | 23 | usage() unless (defined $infile) ; 24 | usage() unless (-r $infile) ; 25 | usage() unless (defined $outfile) ; 26 | usage() unless (defined $outscale) ; 27 | 28 | open (I1, "$infile") ; 29 | open (YY, ">$outfile") || die "can't open $outfile\n" ; 30 | 31 | $lnum = $junk = -99 ; 32 | foreach $line () { 33 | chomp $line ; 34 | ($a, $b, , $c, @Z) = split " ", $line ; 35 | next unless (defined $b) ; 36 | 37 | if ($a =~ /sample:/) { 38 | $lnum = 0 ; 39 | $L[0] = $line ; 40 | next ; 41 | } 42 | if ($lnum<0) { 43 | $inscale = $a ; 44 | printf YY "%9.3f :: %s\n", $outscale, $c ; 45 | next ; 46 | } 47 | ++$lnum ; 48 | $L[$lnum] = $line ; 49 | if ($a =~ /ellcoords:/) { 50 | $inscale = pop @Z unless (defined $inscalex) ; 51 | $fac2 = critchi($outscale)/critchi($inscale) ; 52 | $fac = sqrt($fac2) ; 53 | 54 | print YY "$L[0]\n" ; 55 | print YY "$L[1]\n" ; 56 | print YY "$L[2]\n" ; 57 | ($a, $b) = split " ", $L[3] ; $a *= $fac2; $b *= $fac2 ; printf YY "%15.9f %15.9f\n", $a, $b ; 58 | ($a, $b) = split " ", $L[4] ; $a *= $fac2; $b *= $fac2 ; printf YY "%15.9f %15.9f\n", $a, $b ; 59 | ($junk, $a, $b, $c, $d, $e) = split " ", $line ; 60 | print YY "$junk $a $b " ; 61 | $c *= $fac ; 62 | $d *= $fac ; 63 | printf YY "%15.9f ", $c ; 64 | printf YY "%15.9f ", $d ; 65 | printf YY "%15.9f ", $e ; 66 | printf YY ":: %9.3f\n", $outscale ; 67 | next ; 68 | } 69 | } 70 | close YY ; 71 | 72 | 73 | sub critchi { 74 | ## critical value for chisq 2 dof 75 | my ($scale) = @_ ; 76 | local $x ; 77 | 78 | $x = -2*log(1-$scale) ; 79 | ## printf "zz %12.6f %12.6f\n", $scale, $x ; 80 | 81 | return $x ; 82 | 83 | } 84 | 85 | sub usage { 86 | 87 | print "rescale_ell -i infile -o outfile -s outscale [-a inscale]\n" ; 88 | exit 0 ; 89 | 90 | } 91 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | override CFLAGS += -I../include -I/usr/include/openblas 2 | #LDLIBS += -lgsl -lopenblas -lrt -lm 3 | override LDLIBS += -lgsl -lopenblas -lm -lpthread 4 | # Some Linux distributions require separate lapacke library 5 | # override LDLIBS += -llapacke 6 | # Mac additions using homebrew installations 7 | #override CFLAGS += -I/usr/local/opt/openblas/include -I/usr/local/opt/gsl/include 8 | #override LDFLAGS += -L/usr/local/opt/openblas/lib -L/usr/local/opt/gsl/lib 9 | # Harvard Medical School O2 cluster additions 10 | ifdef SLURM_CONF 11 | override CFLAGS += -I/n/app/openblas/0.2.19/include -I/n/app/gsl/2.3/include 12 | override LDFLAGS += -L/n/app/openblas/0.2.19/lib -L/n/app/gsl/2.3/lib/ 13 | endif 14 | 15 | ifeq ($(OPTIMIZE), 1) 16 | CFLAGS += -O2 17 | endif 18 | 19 | ifeq ($(DEBUG), 1) 20 | CFLAGS += -g # enable debugging 21 | endif 22 | 23 | ifeq ($(PROFILING), 1) 24 | CFLAGS += -pg # enable profiling 25 | endif 26 | 27 | ND=nicksrc 28 | ED=eigensrc 29 | KD=ksrc 30 | 31 | NLIB = $(ND)/libnick.a 32 | 33 | # ----- phony targets 34 | .PHONY: all clean clobber install 35 | 36 | EXE = baseprog convertf mergeit pca \ 37 | $(ED)/pcatoy $(ED)/smartrel $(ED)/smarteigenstrat \ 38 | $(ED)/twstats $(ED)/eigenstrat $(ED)/eigenstratQTL $(ED)/smartpca 39 | 40 | all: $(EXE) 41 | 42 | install: all 43 | mv $(EXE) ../bin 44 | 45 | clobber: 46 | rm -f *.o */*.o */*.a $(ND)/*.o $(ED)/*.o $(KD)/*.o 47 | rm -f $(EXE) 48 | cd ../bin/ ; rm -f $(notdir $(EXE)) ; cd ../src 49 | 50 | clean: 51 | rm -f *.o core core.* *.o $(ND)/*.o $(ED)/*.o $(KD)/*.o $(EXE) 52 | 53 | # ----- build nicksrc/libnick.a 54 | $(NLIB): 55 | $(MAKE) -C $(ND) 56 | 57 | baseprog: baseprog.o mcio.o egsubs.o admutils.o h2d.o $(ED)/exclude.o $(NLIB) 58 | 59 | convertf: convertf.o mcio.o egsubs.o admutils.o h2d.o $(ED)/exclude.o $(NLIB) 60 | 61 | mergeit: mergeit.o mcio.o admutils.o $(NLIB) 62 | 63 | pca: pca.o $(ED)/eigsubs.o eigensrc/eigx.o $(NLIB) 64 | 65 | $(ED)/pcatoy: $(ED)/pcatoy.o eigensrc/eigsubs.o eigensrc/eigx.o $(NLIB) 66 | 67 | $(ED)/smartrel: $(ED)/smartrel.o twsubs.o mcio.o qpsubs.o admutils.o egsubs.o regsubs.o \ 68 | $(ED)/eigsubs.o $(ED)/eigx.o $(ED)/smartsubs.o $(NLIB) 69 | 70 | $(ED)/smarteigenstrat: $(ED)/smarteigenstrat.o mcio.o admutils.o $(NLIB) 71 | 72 | $(ED)/twstats: $(ED)/twstats.o $(NLIB) 73 | 74 | $(ED)/smartpca: $(ED)/smartpca.o $(ED)/eigsubs.o $(ED)/exclude.o $(ED)/smartsubs.o $(ED)/eigx.o \ 75 | mcio.o qpsubs.o admutils.o egsubs.o regsubs.o gval.o \ 76 | $(NLIB) \ 77 | $(KD)/kjg_fpca.o $(KD)/kjg_gsl.o 78 | #-lpthread 79 | -------------------------------------------------------------------------------- /src/eigensrc/exclude.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define MAXRGN 1000 6 | 7 | void 8 | excluderegions (char *xregionname, SNP ** snps, int nsnps, 9 | char *deletesnpoutname) 10 | { 11 | FILE *fp; 12 | 13 | int chr[MAXRGN]; 14 | int lo[MAXRGN]; 15 | int hi[MAXRGN]; 16 | 17 | char line[MAXSTR]; 18 | char *spt[MAXFF]; 19 | int nsplit, nrgn, i, j; 20 | 21 | if ((fp = fopen (xregionname, "r")) == NULL) { 22 | printf ("excluderegions: can't open file %s\n", xregionname); 23 | return; 24 | } 25 | 26 | for (i = 0; i < MAXRGN; i++) { 27 | 28 | if (fgets (line, MAXSTR, fp) == NULL) 29 | break; 30 | 31 | nsplit = splitup (line, spt, MAXFF); 32 | if (nsplit != 3) 33 | continue; 34 | 35 | chr[i] = atoi (spt[0]); 36 | lo[i] = atoi (spt[1]); 37 | hi[i] = atoi (spt[2]); 38 | 39 | } 40 | fclose (fp); 41 | nrgn = i; 42 | 43 | for (i = 0; i < nsnps; i++) { 44 | SNP *cupt = snps[i]; 45 | for (j = 0; j < nrgn; j++) { 46 | if (cupt->chrom == chr[j] && cupt->physpos >= lo[j] 47 | && cupt->physpos <= hi[j]) { 48 | cupt->ignore = YES; 49 | if (deletesnpoutname != NULL) { 50 | logdeletedsnp (cupt->ID, "xregion", deletesnpoutname); 51 | } 52 | } 53 | } 54 | } 55 | 56 | return; 57 | 58 | } 59 | 60 | void 61 | hwfilter (SNP ** snps, int nsnps, int nindiv, double nhwfilter, 62 | char *deletesnpoutname) 63 | { 64 | 65 | int i, k; 66 | 67 | for (i = 0; i < nsnps; i++) { 68 | int num = 0, den = 0, het = 0, n0 = 0, n1 = 0, n2 = 0, nsamples; 69 | double p, Q, stdv; 70 | SNP *cupt = snps[i]; 71 | 72 | for (k = 0; k < nindiv; k++) { 73 | int g = getgtypes (cupt, k); 74 | if (g >= 0) { 75 | num += g; 76 | den += 2; 77 | } 78 | if (g == 1) { 79 | het++; 80 | n1++; 81 | } 82 | else if (g == 0) { 83 | n0++; 84 | } 85 | else if (g == 2) { 86 | n2++; 87 | } 88 | } 89 | 90 | if ((nsamples = den / 2) == 0) 91 | continue; 92 | p = (double) num / den; 93 | Q = 2 * p * (1 - p); 94 | stdv = sqrt (Q * (1 - Q) / nsamples); 95 | if (fabs ((double) het / nsamples - Q) > nhwfilter * stdv) { 96 | printf ("SNP %s removed by Hardy-Weinberg filter\n", cupt->ID); 97 | cupt->ignore = YES; 98 | if (deletesnpoutname != NULL) { 99 | logdeletedsnp (cupt->ID, "hwfilt", deletesnpoutname); 100 | } 101 | } 102 | } 103 | 104 | 105 | } 106 | -------------------------------------------------------------------------------- /bin/evec2pca-ped.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | ### translate .evec file to .pca file expected by eigenstrat program 4 | ### Note: .evec file does not contain entries for outliers 5 | ### .pca file does contain entries (set to all 0.0) for outliers 6 | 7 | # ----- This is a new version for PLINK input files. It differs from the 8 | # ----- original in two ways. (1) The indiv name is in the second column 9 | # ----- of the .fam file, but the first of a .ind file. (2) If the 10 | # ----- indiv names are not found in the .evec file, try the 11 | # ----- familyname:indivname combination. 12 | 13 | $k = $ARGV[0]; 14 | $evec = $ARGV[1]; 15 | $ind = $ARGV[2]; 16 | $pca = $ARGV[3]; 17 | open(EVEC,$evec) || die("OOPS couldn't open file $evec for reading"); 18 | open(PCA,">$pca") || die("OOPS couldn't open file $pca for writing"); 19 | 20 | print PCA ("$k\n"); # number of output eigenvectors/eigenvalues 21 | $line = ; chomp($line); # eigvals line 22 | my @array = split(/[\t ]+/,$line); 23 | for($x=0; $x<$k; $x++) { printf PCA ("%.04f\n",$array[$x+2]); } # x-th eval 24 | while($line = ) 25 | { 26 | chomp($line); 27 | $line = " " . $line; 28 | my @array = split(/[\t ]+/,$line); 29 | $l = @array; 30 | unless($l == 3+$k) { die("OOPS #evec in $evec is different from $k"); } 31 | $sample = $array[1]; 32 | for($x=0; $x<$k; $x++) { $evecarray{$sample}[$x] = $array[$x+2]; } 33 | $found{$sample} = 1; 34 | } 35 | 36 | # ----- Figure out which name convention to use 37 | my $count1 = 0; 38 | my $count2 = 0; 39 | open(IND,$ind) || die("OOPS couldn't open indiv file $ind for reading"); 40 | while ( my $line = ) { 41 | chomp($line); 42 | $line =~ s/^[\s]+//; # remove leading white-space 43 | my @E = split(/[\s]+/,$line); 44 | 45 | my $s = $E[1]; 46 | my $t = $E[0] . ":" . $E[1]; 47 | 48 | if ( exists $found{$s} ) { 49 | $count1++; 50 | } 51 | if ( exists $found{$t} ) { 52 | $count2++; 53 | } 54 | } 55 | close(IND); 56 | 57 | open(IND,$ind) || die("OOPS couldn't open indiv file $ind for reading"); 58 | while($line = ) 59 | { 60 | chomp($line); 61 | $line =~ s/^[\s]+//; 62 | my @array = split(/[\s]+/,$line); 63 | $sample = ($count1 >= $count2 ? $array[1] : $array[0] . ":" . $array[1]); 64 | if($sample eq "") { $sample = $array[1]; } 65 | unless($found{$sample}) 66 | { 67 | for($x=0; $x<$k; $x++) { $evecarray{$sample}[$x] = 0.0; } 68 | } 69 | for($x=0; $x<$k; $x++) 70 | { 71 | printf PCA (" "); 72 | if($evecarray{$sample}[$x] > 0) { printf PCA (" "); } 73 | printf PCA ("%.04f",$evecarray{$sample}[$x]); 74 | } 75 | printf PCA ("\n"); 76 | } 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /include/ranmath.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | 5 | #include 6 | 7 | #define BIGINT INT_MAX 8 | #define SRAND srandom 9 | #define LRAND random 10 | #define DRAND() ( (double) (random() % BIGINT) / (double) (BIGINT)) 11 | #define DRAND2() ( drand2() ) 12 | /* random must return random integer in range 0 to BIGINT-1 */ 13 | 14 | 15 | #define NORMAL gauss 16 | 17 | double gauss() ; // standard normal 18 | void gaussa(double *a, int n) ; // array of standard normals 19 | double gds(double a) ; // obsolete 20 | double poidev(double mean) ; // obsolete 21 | double ranpoiss(double mean) ; // poisson Note double 22 | double ranpoissx(double mean) ; // poisson | > 0 23 | void ranperm(int *a, int n) ; // randomly permute a; if random permulation wanted : idperm(a,n) ; ranperm(a,n) 24 | double ranexp( void) ; // exponential mean 1 25 | double rangam(double a) ; // standard gamma mean a 26 | int randis(double *a, int n) ; // element from discrete distribution a 27 | void ransamp(int *samp, int nsamp, double *p, int plen) ; // sample nsamp samples from p 28 | void pick2(int n, int *k1, int *k2) ; // pick 2 elements from 0..n-1 29 | int ranmod(int n) ; // random mod n 30 | int iranpick(int lo, int hi) ; // random int in [lo, hi] 31 | double ranbeta(double a, double b) ; // beta 32 | int ranbinom(int n, double p) ; // binomial 33 | void setrand(double *vv, int n) ; // filll vv with U[0,1] 34 | int ewens(int *a, int n, double theta) ; // ewens sampling formula 35 | void genmultgauss(double *rvec, int num, int n, double *covar) ; // multivariate 36 | double drand2() ; 37 | void ranmultinom(int *samp, int n, double *p, int len) ; // multinomial 38 | double ranchi (int d) ; // chisq d dof. 39 | void raninvwis(double *wis, int t, int d, double *s) ; // inverse wishart 40 | double uniform(double lo, double hi) ; // uniform (lo..hi) 41 | void ransimplex(double *x, int n) ; // uniform on n-simplex 42 | void randirichlet(double *x, double *pp, int n) ; // dirichlet parameter vector pp 43 | void randirmult(double *pp, int *aa, int len, int m) ; // dirichlet multinomial. Output aa 44 | int prob1(double p) ; // return YES with probability p 45 | double rant(double df) ; // t distribution 46 | double samppow(double e, double a, double b) ; 47 | double rejnorm(double lo, double hi) ; // usually call ranboundnorm 48 | double ranboundnorm(double lo, double hi) ; // sample standard normal in [lo, hi] 49 | double rtrunc2(double T) ; // sample standard normal > T Rayleigh rejection 50 | double rantruncnorm(double T, int upper) ; // sample standard normal > T (upper =1) < T (upper = 0) 51 | int ranhprob(int n, int a, int m) ; // n balls a block sample m . Fow many black 52 | double rangeom (double theta) ; // Geometric distribution, mean 1/theta 53 | -------------------------------------------------------------------------------- /POPGEN/smartpca.info: -------------------------------------------------------------------------------- 1 | This file contains documentation of standard output printed by smartpca. For 2 | general documentation of the smartpca program, see README in this directory. 3 | 4 | Parameter values: smartpca prints basic info on parameter values used. 5 | 6 | Outliers: smartpca prints a list of outlier individuals removed, if any. 7 | Especially in population genetic studies you may not wish for 8 | outliers to be removed. Code 9 | numoutliter: 0 10 | 11 | Tracy-Widom statistics: the column of interest is the "p-value" column which 12 | indicates the statistical significance of each principal component. 13 | To get Tracy-Widom statistics, you must recompile smartpca in your 14 | local src/ directory (and move it to bin/), 15 | or just run twstats (see README file in POPGEN directory). 16 | 17 | eigbestsnp: the SNP of maximum weight. SNP weights are proportional to 18 | the correlation (across samples) between each SNP and each PC. 19 | Equivalently, PC coordinates of a given sample can be computed as the 20 | weighted sum of normalized SNP genotypes. 21 | 22 | ---------------------------------------------------------------------------- 23 | 24 | POPULATION GENETIC STATISTICS (relevant to studying relationships between 25 | populations whose labels are explicitly specified in input indiv file) -- 26 | 27 | Average divergence between populations: smartpca prints a divergence matrix 28 | describing divergence between each pair of populations. Details: 29 | From the covariance matrix X whose eigenvectors were computed 30 | we can compute a "distance" d for each pair of individuals (i,j): 31 | d(i,j) = X(i,i) + X(j,j) - 2X(i,j) 32 | For each pair of populations (a,b) 33 | now define 34 | D(a,b) = \sum d(i,j) (in pop a, in pop b) / (| popa | * | pop b| ) 35 | an average distance. We then normalize D so that the diagonal has 36 | mean 1 and publish D. 37 | 38 | Fst statistics: prints fst estimate between each pair of populations, 39 | along with standard error of the estimate. 40 | [If there is only 1 population, no fst statistics are printed.] 41 | [If phylipoutname parameter is specified, this information is instead 42 | printed to an output file in PHYLIP format. See ./README for details.] 43 | 44 | Anova statistics for population differences along each eigenvector: 45 | For each eigenvector, a P-value for statistical significance of differences 46 | between each pair of populations along that eigenvector is printed. 47 | +++ is used to highlight P-values less than 1e-06. 48 | *** is used to highlight P-values between 1e-06 and 1e-03. 49 | If there are more than 2 populations, an overall P-value is also printed 50 | for that eigenvector. 51 | If there are more than 2 populations, the populations with minimum (minv) 52 | and maximum (maxv) eigenvector coordinate are also printed. 53 | [If there is only 1 population, no Anova statistics are printed.] 54 | 55 | Statistical significance of differences between populations: 56 | For each pair of populations, the above Anova statistics are summed 57 | across eigenvectors. The result is approximately chisq with 58 | d.o.f. equal to the number of eigenvectors. The chisq statistic and 59 | its p-value are printed. 60 | [If there is only 1 population, no statistics are printed.] 61 | 62 | ----------------------------------------------------------------------- 63 | 64 | Questions? nickp@broadinstitute.org 65 | -------------------------------------------------------------------------------- /src/eigensrc/twstats.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | int verbose = NO; 9 | double nval = -1; 10 | 11 | char *iname = NULL; 12 | char *parname = NULL; 13 | char *oname = NULL; 14 | 15 | char *twxtab = NULL; 16 | 17 | void readcommands (int argc, char **argv); 18 | 19 | #define VERSION "1000" 20 | 21 | int minleneig = 10; 22 | 23 | 24 | int 25 | main (int argc, char **argv) 26 | { 27 | FILE *ofile; 28 | int nlambda = 0; 29 | int i, m; 30 | double zn, zvar, tw, tail; 31 | double *xx[0], *lambda; 32 | 33 | readcommands (argc, argv); 34 | settwxtable (twxtab); 35 | 36 | if (oname == NULL) 37 | ofile = stdout; 38 | else 39 | openit (oname, &ofile, "w"); 40 | 41 | if (iname == NULL) 42 | fatalx ("i paraameter compulsory\n"); 43 | nlambda = numlines (iname); 44 | ZALLOC (lambda, nlambda, double); 45 | xx[0] = lambda; 46 | nlambda = getxx (xx, nlambda, 1, iname); 47 | vst (lambda, lambda, -1.0, nlambda); 48 | sortit (lambda, NULL, nlambda); 49 | vst (lambda, lambda, -1.0, nlambda); 50 | m = numgtz (lambda, nlambda); 51 | 52 | fprintf (ofile, "%4s %12s", "#N", "eigenvalue"); 53 | fprintf (ofile, "%12s", "difference"); 54 | fprintf (ofile, " %9s %12s", "twstat", "p-value"); 55 | fprintf (ofile, " %9s", "effect. n"); 56 | fprintf (ofile, "\n"); 57 | 58 | for (i = 0; i < m; ++i) { 59 | 60 | zn = nval; 61 | tail = dotwcalc (lambda + i, m - i, &tw, &zn, &zvar, minleneig); 62 | fprintf (ofile, "%4d %12.6f", i + 1, lambda[i]); 63 | if (i == 0) 64 | fprintf (ofile, "%12s", "NA"); 65 | else 66 | fprintf (ofile, "%12.6f", lambda[i] - lambda[i - 1]); 67 | if (tail >= 0.0) 68 | fprintf (ofile, " %9.3f %12.6g", tw, tail); 69 | else 70 | fprintf (ofile, " %9s %12s", "NA", "NA"); 71 | if (zn > 0.0) { 72 | fprintf (ofile, " %9.3f", zn); 73 | } 74 | else { 75 | fprintf (ofile, " %9s", "NA"); 76 | } 77 | fprintf (ofile, "\n"); 78 | } 79 | return 0; 80 | } 81 | 82 | void 83 | readcommands (int argc, char **argv) 84 | { 85 | 86 | int i; 87 | char *parname = NULL; 88 | phandle *ph; 89 | 90 | while ((i = getopt (argc, argv, "i:o:p:n:m:t:V")) != -1) { 91 | 92 | switch (i) { 93 | 94 | case 'i': 95 | iname = strdup (optarg); 96 | break; 97 | 98 | case 'o': 99 | oname = strdup (optarg); 100 | break; 101 | 102 | case 't': 103 | twxtab = strdup (optarg); 104 | break; 105 | 106 | case 'n': 107 | nval = atof (optarg); 108 | break; 109 | 110 | case 'm': 111 | minleneig = atoi (optarg); 112 | break; 113 | 114 | case 'p': 115 | parname = strdup (optarg); 116 | break; 117 | 118 | case 'V': 119 | verbose = YES; 120 | break; 121 | 122 | case '?': 123 | printf ("Usage: bad params.... \n"); 124 | fatalx ("bad params\n"); 125 | } 126 | } 127 | 128 | if (parname == NULL) 129 | return; 130 | 131 | printf ("parameter file: %s\n", parname); 132 | ph = openpars (parname); 133 | 134 | getstring (ph, "input:", &iname); 135 | getstring (ph, "output:", &oname); 136 | getdbl (ph, "nval:", &nval); 137 | getint (ph, "minleneig:", &minleneig); 138 | 139 | writepars (ph); 140 | closepars (ph); 141 | 142 | } 143 | -------------------------------------------------------------------------------- /include/kjg_gsl.h: -------------------------------------------------------------------------------- 1 | /** 2 | * @file kjg_gsl.h 3 | * @brief Augment GSL functions 4 | */ 5 | 6 | #ifndef KJG_GSL_H_ 7 | #define KJG_GSL_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | /** 14 | * Prints the matrix tab-delimited 15 | * @param *stream output file pointer 16 | * @param *m gsl_matrix to print 17 | * @param *template character template for fprintf 18 | */ 19 | 20 | void kjg_gsl_matrix_fprintf (FILE * stream, gsl_matrix * m, 21 | const char *template); 22 | 23 | /** 24 | * Prints the eigenvalues and then eigenvectors below 25 | * @param *stream output file pointer 26 | * @param *eval eigenvalues 27 | * @param *evec eigenvectors 28 | * @param *template character template for fprintf */ 29 | 30 | void kjg_gsl_evec_fprintf (FILE * stream, 31 | gsl_vector * eval, 32 | gsl_matrix * evec, const char *template); 33 | 34 | /** 35 | * Reads a matrix 36 | * @param *stream input file pointer 37 | * @param *m matrix to store 38 | */ 39 | 40 | void kjg_gsl_matrix_fscanf (FILE * stream, gsl_matrix * m); 41 | 42 | /** 43 | * Reads an evec 44 | * @param *stream input file pointer 45 | * @param *eval eigenvalues vector 46 | * @param *evec eigenvectors matrix 47 | */ 48 | 49 | int kjg_gsl_evec_fscanf (FILE * stream, gsl_vector * eval, gsl_matrix * evec); 50 | 51 | /** 52 | * Initializes random number generation. 53 | */ 54 | 55 | gsl_rng *kjg_gsl_rng_init (); 56 | 57 | /** 58 | * Initializes the matrix with random unit gaussians 59 | * @param *m matrix to be set 60 | * @param *r random number generator 61 | */ 62 | 63 | void kjg_gsl_ran_ugaussian_pair (const gsl_rng * r, double x[2]); 64 | 65 | /** Fills a matrix with unit Gaussian random variates 66 | * @param *r random number generator 67 | * @param *m matrix to be filled 68 | */ 69 | 70 | void kjg_gsl_ran_ugaussian_matrix (const gsl_rng * r, gsl_matrix * m); 71 | 72 | /** 73 | * Normalizes the matrix so the Frobenius norm is M*N 74 | * @param *m matrix to normalize 75 | * @return if error 76 | */ 77 | 78 | int kjg_gsl_matrix_frobenius_normalize (gsl_matrix * m); 79 | 80 | /** 81 | * Calculates the norm of a matrix 82 | * @param norm type of norm to return, see lapack dlange 83 | * @param *m matrix to find norm of 84 | * @return norm 85 | */ 86 | 87 | double kjg_gsl_dlange (const char norm, const gsl_matrix * m); 88 | 89 | /** 90 | * Performs the QR decomposition on the matrix and return Q in the matrix 91 | * @param *m matrix to orthogonalize 92 | */ 93 | 94 | void kjg_gsl_matrix_QR (gsl_matrix * m); 95 | 96 | /** 97 | * Calls LAPACK dgeqrf and return R and compacted Q matrix 98 | * @param *m input matrix 99 | * @param *tau see LAPACK documentation 100 | * @return LAPACK return 101 | */ 102 | 103 | int kjg_gsl_dgeqrf (gsl_matrix * m, gsl_vector * tau); 104 | 105 | /** 106 | * Calls LAPACK dorgqr to extract Q matrix 107 | * @param *m matrix with compacted Q and will store unpacked Q 108 | * @param *tau see LAPACK documentation 109 | * @return LAPACK return 110 | */ 111 | int kjg_gsl_dorgqr (gsl_matrix * m, gsl_vector * tau); 112 | 113 | /** 114 | * Calls LAPACK dgesvd, keeping u (in m) and s, discarding v^T 115 | * @param *m input matrix / where u is stored 116 | * @param *s entries of the diagonal matrix 117 | * @return LAPACK return 118 | */ 119 | int kjg_gsl_SVD (gsl_matrix * M, gsl_matrix * V, gsl_vector * S); 120 | 121 | #endif /* KJG_GSL_H_ */ 122 | -------------------------------------------------------------------------------- /src/egsubs.c: -------------------------------------------------------------------------------- 1 | #include "mcio.h" 2 | #include "egsubs.h" 3 | 4 | 5 | int 6 | makeeglist (char **eglist, int maxnumeg, Indiv ** indivmarkers, int numindivs) 7 | // old routine mkeglist 8 | { 9 | 10 | Indiv *indx; 11 | int i, k, numeg = 0; 12 | for (i = 0; i < numindivs; i++) { 13 | indx = indivmarkers[i]; 14 | if (indx->ignore) 15 | continue; 16 | k = indxindex (eglist, numeg, indx->egroup); 17 | if (k < 0) { 18 | if (numeg >= maxnumeg) { 19 | printf 20 | ("number of populations too large. Increase maxpops if you wish\n"); 21 | fatalx 22 | ("(makeeglist) You really want to analyse more than %d populations?\n", 23 | maxnumeg); 24 | } 25 | eglist[numeg] = strdup (indx->egroup); 26 | ++numeg; 27 | } 28 | } 29 | return numeg; 30 | } 31 | 32 | int 33 | mkeglist (Indiv ** indm, int numindivs, char **eglist) 34 | { 35 | Indiv *indx; 36 | int i, k, numeg = 0; 37 | for (i = 0; i < numindivs; i++) { 38 | indx = indm[i]; 39 | if (indx->ignore) 40 | continue; 41 | k = indxindex (eglist, numeg, indx->egroup); 42 | if (k < 0) { 43 | eglist[numeg] = strdup (indx->egroup); 44 | ++numeg; 45 | } 46 | } 47 | return numeg; 48 | } 49 | 50 | int 51 | loadlist_type (char **list, char *listname, int *ztypes, int off) 52 | // listname is just a list of names ... 53 | { 54 | FILE *lfile; 55 | char line[MAXSTR]; 56 | char *spt[MAXFF]; 57 | char *sx; 58 | Indiv *indx; 59 | int nsplit, i, n = 0, tt; 60 | 61 | if (listname == NULL) 62 | return 0; 63 | openit (listname, &lfile, "r"); 64 | while (fgets (line, MAXSTR, lfile) != NULL) { 65 | nsplit = splitup (line, spt, MAXFF); 66 | if (nsplit == 0) 67 | continue; 68 | sx = spt[0]; 69 | if (sx[0] == '#') { 70 | freeup (spt, nsplit); 71 | continue; 72 | } 73 | if (nsplit < 2) 74 | fatalx ("bad listname: %s\n", sx); 75 | list[n] = strdup (sx); 76 | tt = atoi (spt[1]); 77 | ztypes[n] = tt + off; 78 | ++n; 79 | freeup (spt, nsplit); 80 | } 81 | return n; 82 | } 83 | 84 | 85 | void 86 | seteglist (Indiv ** indm, int nindiv, char *eglistname) 87 | { 88 | FILE *egfile; 89 | char line[MAXSTR]; 90 | char *spt[MAXFF]; 91 | char *sx; 92 | Indiv *indx; 93 | int nsplit, i; 94 | 95 | if (eglistname == NULL) 96 | return; 97 | openit (eglistname, &egfile, "r"); 98 | while (fgets (line, MAXSTR, egfile) != NULL) { 99 | nsplit = splitup (line, spt, MAXFF); 100 | if (nsplit == 0) 101 | continue; 102 | sx = spt[0]; 103 | if (sx[0] == '#') 104 | continue; 105 | setstatus (indm, nindiv, sx); 106 | freeup (spt, nsplit); 107 | } 108 | fclose (egfile); 109 | } 110 | 111 | void 112 | seteglistv (Indiv ** indm, int nindiv, char *eglistname, int val) 113 | { 114 | FILE *egfile; 115 | char line[MAXSTR]; 116 | char *spt[MAXFF]; 117 | char *sx = NULL; 118 | Indiv *indx; 119 | int nsplit, i; 120 | 121 | if (eglistname == NULL) { 122 | setstatusv (indm, nindiv, NULL, val); 123 | } 124 | 125 | openit (eglistname, &egfile, "r"); 126 | while (fgets (line, MAXSTR, egfile) != NULL) { 127 | nsplit = splitup (line, spt, MAXFF); 128 | if (nsplit == 0) 129 | continue; 130 | sx = spt[0]; 131 | if (sx[0] == '#') 132 | continue; 133 | setstatusv (indm, nindiv, sx, val); 134 | freeup (spt, nsplit); 135 | } 136 | fclose (egfile); 137 | } 138 | -------------------------------------------------------------------------------- /POPGEN/elldemo/sicaa.evec: -------------------------------------------------------------------------------- 1 | #eigvals: 1.774 1.458 2 | GEO-002 0.124960 -0.019309 Georgian 3 | GEO-005 0.098987 -0.025584 Georgian 4 | GEO-010 0.124701 -0.043397 Georgian 5 | GEO-015 0.131265 -0.027730 Georgian 6 | GEO-020 0.135953 -0.053088 Georgian 7 | GEO-028 0.134414 -0.036569 Georgian 8 | GEO-031 0.157799 -0.025231 Georgian 9 | GEO-032 0.146574 -0.026196 Georgian 10 | GEO-039 0.158120 -0.022020 Georgian 11 | GEO-051 0.144490 -0.042876 Georgian 12 | GEO-061 0.153842 -0.030906 Georgian 13 | GEO-082 0.158142 -0.028626 Georgian 14 | GEO001 0.156173 -0.043278 Georgian 15 | HGDP00666 -0.116760 -0.030823 Sardinian 16 | HGDP00667 -0.116709 -0.017192 Sardinian 17 | HGDP00668 -0.137047 -0.027317 Sardinian 18 | HGDP00669 -0.126519 -0.024759 Sardinian 19 | HGDP00670 -0.135795 -0.031889 Sardinian 20 | HGDP00671 -0.128935 -0.029534 Sardinian 21 | HGDP00672 -0.124405 -0.032339 Sardinian 22 | HGDP00673 -0.120324 -0.035232 Sardinian 23 | HGDP00674 -0.137120 -0.026784 Sardinian 24 | HGDP01062 -0.119201 -0.035362 Sardinian 25 | HGDP01063 -0.141085 -0.036116 Sardinian 26 | HGDP01064 -0.153843 -0.049779 Sardinian 27 | HGDP01065 -0.145904 -0.024355 Sardinian 28 | HGDP01066 -0.157103 -0.027543 Sardinian 29 | HGDP01067 -0.144155 -0.050578 Sardinian 30 | HGDP01068 -0.130147 -0.039257 Sardinian 31 | HGDP01069 -0.140198 -0.030983 Sardinian 32 | HGDP01070 -0.138745 -0.032343 Sardinian 33 | HGDP01071 -0.116760 -0.041106 Sardinian 34 | HGDP01072 -0.127844 -0.040682 Sardinian 35 | HGDP01073 -0.143137 -0.035532 Sardinian 36 | HGDP01074 -0.138522 -0.032142 Sardinian 37 | HGDP01075 -0.145153 -0.023029 Sardinian 38 | HGDP01076 -0.136974 -0.040912 Sardinian 39 | HGDP01077 -0.119744 -0.032696 Sardinian 40 | HGDP01078 -0.124987 -0.031961 Sardinian 41 | HGDP01079 -0.128279 -0.021665 Sardinian 42 | LithuanianF1 0.030379 0.322035 Lithuanian 43 | lithuania3 0.031552 0.282867 Lithuanian 44 | lithuania10 0.023618 0.342455 Lithuanian 45 | lithuania9 0.032949 0.338347 Lithuanian 46 | LithuanianA1 0.023348 0.316050 Lithuanian 47 | LithuanianE2 0.031801 0.273176 Lithuanian 48 | lithuania1 0.015194 0.254946 Lithuanian 49 | lithuania8 0.030478 0.305058 Lithuanian 50 | lithuania2 0.032994 0.334543 Lithuanian 51 | LithuanianD1 0.016299 0.282471 Lithuanian 52 | mg43 0.159324 -0.022091 Georgian 53 | mg47 0.140898 -0.038999 Georgian 54 | mg22 0.138969 -0.035963 Georgian 55 | mg49 0.153820 -0.036044 Georgian 56 | mg23 0.140328 -0.032225 Georgian 57 | mg62 0.158555 -0.026727 Georgian 58 | mg27 0.154282 -0.028294 Georgian 59 | mg31 0.141187 -0.024043 Georgian 60 | mg34 0.149388 -0.030698 Georgian 61 | mg40 0.162562 -0.032568 Georgian 62 | I3122 -0.094106 -0.019738 Sicily_EBA 63 | I11442 -0.061346 -0.019553 Sicily_EBA 64 | I7796 -0.046874 -0.045908 Sicily_EBA 65 | I7800 -0.066847 -0.032000 Sicily_EBA 66 | I7807 -0.073119 -0.032957 Sicily_EBA 67 | -------------------------------------------------------------------------------- /include/strsubs.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int splitup (char *strin, char *strpt[],int maxpt) ; 4 | int splitupx(char *strin, char **spt, int maxpt, char splitc) ; 5 | int splitupwxbuff(char *strin, char **spt, int maxpt, char *bigbuff, int bigbufflen) ; 6 | int splitupxbuff(char *strin, char **spt, int maxpt, char splitc, char *bigbuff, int bigbufflen) ; 7 | int oldsplitup (char *strin, char *strpt[],int maxpt) ; 8 | void freeup (char *strpt[],int numpt) ; 9 | int split1 (char *strin, char *strpt[], char splitc); 10 | int first_word(char *string, char *word, char *rest) ; 11 | char *fnwhite (char *ss) ; 12 | char *fwhite (char *ss) ; 13 | char *ftab (char *ss) ; 14 | int NPisnumber (char c) ; 15 | int isnumword (char *str) ; 16 | void ffprint (FILE *fff, char *fmt, ...) ; 17 | void enuf( char *fmt, ...) ; 18 | void fatalx( char *fmt, ...) ; 19 | int docommand( char *fmt, ...) ; 20 | long seednum() ; 21 | void printbl(int n) ; 22 | void printnl() ; 23 | void striplead(char *sss, char c) ; 24 | void striptrail(char *sss, char c) ; 25 | void catx(char *sout, char **spt, int n) ; 26 | void catxx(char *sout, char **spt, int n) ; 27 | void catxc(char *sout, char **spt, int n, char c) ; 28 | void makedfn(char *dirname, char *fname, char *outname, int maxstr) ; 29 | int substring (char **ap, char *inx, char *outx) ; 30 | int mapstrings(char **pstr, char **insub, char **outsub, int n) ; 31 | int upstring (char *ss) ; 32 | int numcols (char *name) ; 33 | int numlines(char *name) ; 34 | int openit_trap (char *name, FILE ** fff, char *type); 35 | void openit(char *name, FILE **fff, char *type) ; 36 | int ftest(char *aname) ; 37 | void fcheckr(char *name) ; 38 | void fcheckw(char *name) ; 39 | int getxx(double **xx, int maxrow, int numcol, char *fname) ; 40 | int getss(char **ss, char *fname) ; 41 | int loadlist(char **list, char *listname) ; // with dup check 42 | void printdups(char **list, int n) ; 43 | int checkdup(char **list, int n) ; 44 | double clocktime() ; // cpu time in seconds 45 | void crevcomp(char *sout, char *sin) ; 46 | int indxstring(char **namelist, int len, char *strid) ; 47 | int indxstringr(char **namelist, int len, char *strid) ; 48 | char *strstrx(char *s1, char *s2) ; // case insensitive strstr 49 | int getxxnames(char ***pnames, double **xx, int maxrow, int numcol, char *fname); 50 | int getjjnames(char ***pnames, int **xx, int maxrow, int numcol, char *fname); 51 | int getxxnamesf(char ***pnames, double **xx, int maxrow, int numcol, FILE *fff) ; 52 | int getnameslohi(char ****pnames, int maxrow, int numcol, char *fname, int lo, int hi) ; 53 | int getnamesstripcolon(char ****pnames, int maxrow, int numcol, char *fname, int lo, int hi) ; 54 | int getnames(char ****pnames, int maxrow, int numcol, char *fname) ; 55 | char num2iub (int num) ; 56 | char revchar(char c) ; 57 | int iub2num(char c) ; 58 | char num2base (int num) ; 59 | int base2num(char c) ; 60 | char *int_string(int a, int len, int base) ; 61 | char *binary_string(int a, int len) ; 62 | int string_binary(char *sx) ; 63 | void freestring (char **ss) ; 64 | void copystrings(char **sa, char **sb, int n) ; 65 | void printstringsw(char **ss, int n, int slen, int width) ; 66 | void printstrings(char **ss, int n) ; 67 | void printstringsx(char **ss, int n) ; 68 | int ridfile(char *fname) ; 69 | char compbase(char x) ; 70 | void mkupper(char *sx) ; 71 | void mklower(char *sx) ; 72 | int iubdekode(char *a, char iub) ; 73 | int isiub(char iub) ; 74 | int isiub2(char iub) ; 75 | int iubcbases(char *cbases, char iub) ; 76 | int ishet(char c) ; 77 | int cttype(char cc) ; 78 | int char2int(char cc) ; 79 | char int2char(int x) ; 80 | void chomp(char *str) ; 81 | 82 | int numcmatch(char *cc, int len, char c) ; 83 | int numcnomatch(char *cc, int len, char c) ; 84 | char *strnotchar(char *s, char c) ; 85 | char *findupper(char *s) ; 86 | char *fgetstrap(char *buff, int maxlen, FILE *fff, int *ret) ; 87 | char readtonl(FILE *fff) ; 88 | int filehash(char *name) ; 89 | char *mytemp (char *qqq) ; 90 | void printslurmenv () ; 91 | int getfline(char *ss, char *fname, int maxstr) ; 92 | int copyfs(char *infile, FILE *fff) ; 93 | int getxxq(double **xx, int maxrow, int numcol, char *fname) ; 94 | int numcolsq (char *name) ; 95 | int getdata(char *buff, int nbytes, char *fname) ; 96 | int putdata(char *buff, int nbytes, char *fname) ; 97 | void writestrings(char *fname, char **ss, int n) ; 98 | 99 | 100 | #define ZALLOC(item,n,type) if ((item = (type *)calloc((n),sizeof(type))) == NULL) \ 101 | fatalx("Unable to allocate %ld unit(s) for item \n", (long) n) 102 | 103 | #undef MAX 104 | #undef MIN 105 | 106 | #define MAX(a,b) ( (a) < (b) ? (b) : (a) ) 107 | #define MIN(a,b) ( (a) < (b) ? (a) : (b) ) 108 | #define YES 1 109 | #define NO 0 110 | #define TRUE 1 111 | #define FALSE 0 112 | #define CNULL '\0' 113 | #define CNL '\n' 114 | #define CTAB '\t' 115 | 116 | -------------------------------------------------------------------------------- /src/h2d.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | extern int verbose; 10 | extern long rlen, packlen ; 11 | extern char *packgenos ; 12 | 13 | static char *pack2 ; 14 | 15 | static int pseudodip = YES ; // for hap to dip. 16 | 17 | int 18 | mkindh2d (Indiv ** indivmarkers, Indiv *** pindm2, int numindivs) 19 | { 20 | char ss[50]; 21 | Indiv *indx, **indm2, *indp; 22 | int n, len, k; 23 | int numind2; 24 | 25 | numind2 = numindivs / 2; 26 | ZALLOC (*pindm2, numind2, Indiv *); 27 | indm2 = *pindm2; 28 | n = 0; 29 | for (k = 0; k < numindivs; k++) { 30 | indx = indivmarkers[k]; 31 | strcpy (ss, indx->ID); 32 | len = strlen (ss); 33 | if (ss[len - 1] != 'A') 34 | continue; 35 | ss[len - 2] = CNULL; 36 | ZALLOC (indm2[n], 1, Indiv); 37 | indp = indm2[n]; 38 | *indp = *indx; 39 | strcpy (indp->ID, ss); 40 | ++n; 41 | } 42 | if (n != numind2) 43 | fatalx ("(mkindh2d) bug\n"); 44 | return n; 45 | } 46 | 47 | void 48 | remaph2d (SNP ** snpmarkers, int numsnps, Indiv ** indivmarkers, 49 | Indiv ** indm2, int numindivs, int numind2) 50 | { 51 | 52 | int *g1, *g2; 53 | int *x1, *x2; 54 | int *tind, tt, t, i, j, k, j1, j2; 55 | Indiv *indx; 56 | SNP *cupt; 57 | char s1[50], s2[50]; 58 | 59 | ZALLOC (g2, numind2, int); 60 | ZALLOC (g1, numindivs, int); 61 | ZALLOC (x1, numindivs, int); 62 | ZALLOC (x2, numindivs, int); 63 | 64 | for (k = 0; k < numind2; ++k) { 65 | indx = indm2[k]; 66 | sprintf (s1, "%s:A", indx->ID); 67 | sprintf (s2, "%s:B", indx->ID); 68 | t = x1[k] = indindex (indivmarkers, numindivs, s1); 69 | if (t < 0) { 70 | sprintf (s1, "%s_A", indx->ID); 71 | sprintf (s2, "%s_B", indx->ID); 72 | t = x1[k] = indindex (indivmarkers, numindivs, s1); 73 | } 74 | if (t < 0) 75 | fatalx ("bad newindiv: %s\n", indx->ID); 76 | t = x2[k] = indindex (indivmarkers, numindivs, s2); 77 | if (t < 0) 78 | fatalx ("bad newindiv: %s\n", indx->ID); 79 | } 80 | 81 | for (i = 0; i < numsnps; i++) { 82 | cupt = snpmarkers[i]; 83 | 84 | for (j = 0; j < numind2; ++j) { 85 | t = x1[j]; 86 | g1[j] = getgtypes (cupt, t); 87 | t = x2[j]; 88 | g2[j] = getgtypes (cupt, t); 89 | tt = -1; 90 | if ((g1[j] >= 0) && (g2[j] >= 0)) { 91 | tt = g1[j] + g2[j]; 92 | if (pseudodip) tt /= 2 ; 93 | } 94 | putgtypes (cupt, j, tt); 95 | } 96 | } 97 | 98 | free (g1); 99 | free (g2); 100 | free (x1); 101 | free (x2); 102 | 103 | } 104 | 105 | int mkindd2h (Indiv ** indivmarkers, Indiv *** pindm2, int numindivs) 106 | { 107 | char ss[50], s1[50], s2[50] ; 108 | Indiv *indx, **indm2, *indp, *ind1, *ind2 ; 109 | int n, len, k; 110 | int numind2; 111 | 112 | numind2 = numindivs * 2; 113 | ZALLOC (*pindm2, numind2, Indiv *); 114 | indm2 = *pindm2; 115 | n = 0; 116 | for (k = 0; k < numindivs; k++) { 117 | indx = indivmarkers[k]; 118 | strcpy (s1, indx->ID); 119 | strcat (s1, ":A") ; 120 | strcpy (s2, indx->ID); 121 | strcat (s2, ":B") ; 122 | if (strlen(s1) >=IDSIZE) fatalx("d2h overflow: %s\n", indx -> ID) ; 123 | ZALLOC (indm2[n], 1, Indiv); 124 | indp = indm2[n]; 125 | *indp = *indx; 126 | strcpy (indp->ID, s1); 127 | ++n; 128 | ZALLOC (indm2[n], 1, Indiv); 129 | indp = indm2[n]; 130 | *indp = *indx; 131 | strcpy (indp->ID, s2); 132 | ++n; 133 | } 134 | if (n != numind2) fatalx ("(mkindh2d) bug\n"); 135 | return n; 136 | } 137 | 138 | void 139 | remapd2h (SNP ** snpmarkers, int numsnps, Indiv ** indivmarkers, 140 | Indiv ** indm2, int numindivs, int numind2) 141 | { 142 | 143 | int g1, g2, gg; 144 | int *xx, *x2; 145 | int *tind, tt, t, i, j, k, j1, j2, x; 146 | Indiv *indx; 147 | SNP *cupt; 148 | char s1[50], s2[50]; 149 | long rlen2, packlen2 ; 150 | char *pbuff ; 151 | 152 | rlen2 = rlen * 2 ; 153 | packlen2 = packlen * 2 ; 154 | 155 | ZALLOC(pack2, packlen2, char) ; 156 | pbuff = pack2 ; 157 | 158 | ZALLOC (xx, numind2, int); 159 | ZALLOC (x2, numindivs, int); 160 | 161 | x = 0 ; 162 | for (i=0 ; i < numsnps; ++i) { 163 | cupt = snpmarkers[i] ; 164 | ivclear(xx, -1, numind2) ; 165 | for (k=0 ; k < numindivs; ++k) { 166 | gg = getgtypes(cupt, k) ; 167 | if (gg<0) gg = 3 ; 168 | g1 = g2 = gg ; 169 | if (gg == 1) { 170 | x = ranmod(2) ; 171 | g1 = 2*x ; 172 | g2 = 2*(1-x) ; 173 | } 174 | xx[2*k] = g1 ; 175 | xx[2*k+1] = g2 ; 176 | } 177 | for (k=0; k pbuff = pbuff ; 182 | pbuff += rlen2 ; 183 | } 184 | 185 | free (xx); 186 | free (x2); 187 | free (packgenos) ; 188 | packgenos = pack2 ; 189 | rlen = rlen2 ; 190 | packlen = packlen2 ; 191 | 192 | } 193 | 194 | -------------------------------------------------------------------------------- /src/ksrc/kjg_fpca.c: -------------------------------------------------------------------------------- 1 | /* 2 | * kjg_fpca.c 3 | * 4 | * Created on: Apr 28, 2014 5 | * Author: Kevin 6 | */ 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "kjg_fpca.h" 16 | #include "kjg_gsl.h" 17 | 18 | #include "admutils.h" 19 | #include "../gval.h" 20 | 21 | size_t KJG_FPCA_ROWS = 256; 22 | 23 | void 24 | kjg_fpca (size_t K, size_t L, size_t I, double *eval, double *evec) 25 | { 26 | if (K >= L) 27 | exit (1); 28 | if (I == 0) 29 | exit (1); 30 | 31 | size_t m = get_ncols (); 32 | size_t n = get_nrows (); 33 | 34 | // PART A - compute Q such that X ~ Q * (Q^T) * X 35 | gsl_matrix *G1 = gsl_matrix_alloc (n, L); 36 | gsl_matrix *G2 = gsl_matrix_alloc (n, L); 37 | gsl_matrix *Q = gsl_matrix_alloc (m, (I + 1) * L); 38 | gsl_matrix *Gswap; 39 | 40 | gsl_rng *r = kjg_gsl_rng_init (); 41 | kjg_gsl_ran_ugaussian_matrix (r, G1); 42 | gsl_rng_free (r); 43 | 44 | size_t i; 45 | for (i = 0; i < I; i++) 46 | { 47 | gsl_matrix_view Qi = gsl_matrix_submatrix (Q, 0, i * L, m, L); 48 | 49 | // do the multiplication 50 | kjg_fpca_XTXA (G1, &Qi.matrix, G2); 51 | 52 | // scale to prevent G2 from blowing up 53 | gsl_matrix_scale (G2, 1.0 / m); 54 | 55 | Gswap = G2; 56 | G2 = G1; 57 | G1 = Gswap; 58 | } 59 | 60 | gsl_matrix_view Qi = gsl_matrix_submatrix (Q, 0, I * L, m, L); 61 | kjg_fpca_XA (G1, &Qi.matrix); 62 | 63 | { 64 | gsl_matrix *V = gsl_matrix_alloc (Q->size2, Q->size2); 65 | gsl_vector *S = gsl_vector_alloc (Q->size2); 66 | 67 | kjg_gsl_SVD (Q, V, S); 68 | 69 | gsl_matrix_free (V); 70 | gsl_vector_free (S); 71 | } 72 | 73 | // kjg_gsl_matrix_QR(Q); // QR decomposition is less accurate than SVD 74 | 75 | gsl_matrix_free (G1); 76 | gsl_matrix_free (G2); 77 | 78 | // PART B - compute B matrix, take SVD and return 79 | gsl_matrix *B = gsl_matrix_alloc (n, (I + 1) * L); 80 | kjg_fpca_XTB (Q, B); 81 | 82 | gsl_matrix *Utilda = gsl_matrix_alloc ((I + 1) * L, (I + 1) * L); 83 | gsl_vector *Stilda = gsl_vector_alloc ((I + 1) * L); 84 | 85 | kjg_gsl_SVD (B, Utilda, Stilda); 86 | 87 | gsl_matrix_view Vk = gsl_matrix_submatrix (B, 0, 0, n, K); 88 | gsl_matrix_view evec_view = gsl_matrix_view_array (evec, n, K); 89 | gsl_matrix_memcpy (&evec_view.matrix, &Vk.matrix); 90 | 91 | gsl_vector_view Sk = gsl_vector_subvector (Stilda, 0, K); 92 | gsl_vector_view eval_view = gsl_vector_view_array (eval, K); 93 | gsl_vector_mul (&Sk.vector, &Sk.vector); 94 | gsl_vector_scale (&Sk.vector, 1.0 / m); 95 | gsl_vector_memcpy (&eval_view.vector, &Sk.vector); 96 | 97 | gsl_matrix_free (Q); 98 | gsl_matrix_free (B); 99 | gsl_matrix_free (Utilda); 100 | gsl_vector_free (Stilda); 101 | } 102 | 103 | void 104 | kjg_fpca_XTXA (const gsl_matrix * A1, gsl_matrix * B, gsl_matrix * A2) 105 | { 106 | size_t m = get_ncols (); 107 | size_t n = get_nrows (); 108 | 109 | size_t i, r; // row index 110 | double *Y = malloc (sizeof (double) * n * KJG_FPCA_ROWS); // normalized 111 | 112 | gsl_matrix_view Bi, Xi; 113 | 114 | gsl_matrix_set_zero (A2); 115 | 116 | for (i = 0; i < m; i += KJG_FPCA_ROWS) 117 | { 118 | r = kjg_geno_get_normalized_rows (i, KJG_FPCA_ROWS, Y); 119 | Xi = gsl_matrix_view_array (Y, r, n); 120 | Bi = gsl_matrix_submatrix (B, i, 0, r, B->size2); 121 | gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1, &Xi.matrix, A1, 0, 122 | &Bi.matrix); 123 | gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1, &Xi.matrix, &Bi.matrix, 124 | 1, A2); 125 | } 126 | 127 | free (Y); 128 | } 129 | 130 | void 131 | kjg_fpca_XA (const gsl_matrix * A, gsl_matrix * B) 132 | { 133 | size_t n = get_nrows (); 134 | size_t m = get_ncols (); 135 | 136 | size_t i, r; 137 | double *Y = malloc (sizeof (double) * n * KJG_FPCA_ROWS); 138 | 139 | gsl_matrix_view Hmat, Xmat; 140 | 141 | gsl_matrix_set_zero (B); 142 | 143 | for (i = 0; i < m; i += KJG_FPCA_ROWS) 144 | { 145 | r = kjg_geno_get_normalized_rows (i, KJG_FPCA_ROWS, Y); 146 | Xmat = gsl_matrix_view_array (Y, r, n); 147 | Hmat = gsl_matrix_submatrix (B, i, 0, r, B->size2); 148 | gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1, &Xmat.matrix, A, 0, 149 | &Hmat.matrix); 150 | } 151 | 152 | free (Y); 153 | } 154 | 155 | void 156 | kjg_fpca_XTB (const gsl_matrix * B, gsl_matrix * A) 157 | { 158 | size_t n = get_nrows (); 159 | size_t m = get_ncols (); 160 | 161 | size_t i, r; 162 | double *Y = malloc (sizeof (double) * n * KJG_FPCA_ROWS); 163 | gsl_matrix_view Xmat; 164 | 165 | gsl_matrix_set_zero (A); 166 | 167 | for (i = 0; i < m; i += KJG_FPCA_ROWS) 168 | { 169 | r = kjg_geno_get_normalized_rows (i, KJG_FPCA_ROWS, Y); 170 | Xmat = gsl_matrix_view_array (Y, r, n); 171 | gsl_matrix_const_view Hmat = gsl_matrix_const_submatrix (B, i, 0, r, 172 | B->size2); 173 | gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1, &Xmat.matrix, &Hmat.matrix, 174 | 1, A); 175 | } 176 | 177 | free (Y); 178 | } 179 | -------------------------------------------------------------------------------- /src/ksrc/kjg_gsl.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include "kjg_gsl.h" 12 | 13 | void 14 | kjg_gsl_matrix_fprintf (FILE * stream, gsl_matrix * m, const char *template) 15 | { 16 | size_t i, j; 17 | for (i = 0; i < m->size1; i++) 18 | { 19 | fprintf (stream, template, gsl_matrix_get (m, i, 0)); 20 | for (j = 1; j < m->size2; j++) 21 | { 22 | fprintf (stream, "\t"); 23 | fprintf (stream, template, gsl_matrix_get (m, i, j)); 24 | } 25 | fprintf (stream, "\n"); 26 | } 27 | } 28 | 29 | void 30 | kjg_gsl_matrix_fscanf (FILE * stream, gsl_matrix * m) 31 | { 32 | size_t i, j; 33 | double x; 34 | for (i = 0; i < m->size1; i++) 35 | { 36 | for (j = 0; j < m->size2; j++) 37 | { 38 | fscanf (stream, "%lg", &x); 39 | gsl_matrix_set (m, i, j, x); 40 | } 41 | } 42 | } 43 | 44 | void 45 | kjg_gsl_evec_fprintf (FILE * stream, 46 | gsl_vector * eval, 47 | gsl_matrix * evec, const char *template) 48 | { 49 | size_t i, j; 50 | fprintf (stream, "#"); 51 | fprintf (stream, template, gsl_vector_get (eval, 0)); 52 | for (i = 1; i < eval->size; i++) 53 | { 54 | fprintf (stream, "\t"); 55 | fprintf (stream, template, gsl_vector_get (eval, i)); 56 | } 57 | fprintf (stream, "\n"); 58 | kjg_gsl_matrix_fprintf (stream, evec, template); 59 | } 60 | 61 | int 62 | kjg_gsl_evec_fscanf (FILE * stream, gsl_vector * eval, gsl_matrix * evec) 63 | { 64 | size_t i, j; 65 | int r; 66 | double x; 67 | 68 | r = fscanf (stream, "#%lg", &x); 69 | if (r != 1) 70 | return (r); 71 | gsl_vector_set (eval, 0, x); 72 | 73 | for (i = 1; i < eval->size; i++) 74 | { 75 | r = fscanf (stream, "%lg", &x); 76 | if (r != 1) 77 | return (r); 78 | gsl_vector_set (eval, i, x); 79 | } 80 | 81 | for (i = 0; i < evec->size1; i++) 82 | { 83 | for (j = 0; j < evec->size2; j++) 84 | { 85 | r = fscanf (stream, "%lg", &x); 86 | if (r != 1) 87 | return (r); 88 | gsl_matrix_set (evec, i, j, x); 89 | } 90 | } 91 | 92 | return (0); 93 | } 94 | 95 | gsl_rng * 96 | kjg_gsl_rng_init () 97 | { 98 | const gsl_rng_type *T; 99 | gsl_rng *r; 100 | extern long seed; 101 | 102 | gsl_rng_env_setup (); 103 | 104 | gsl_rng_default_seed = seed; 105 | 106 | T = gsl_rng_default; 107 | r = gsl_rng_alloc (T); 108 | 109 | // fprintf (stderr, "generator type: %s\n", gsl_rng_name (r)); 110 | // fprintf (stderr, "seed = %lu\n", gsl_rng_default_seed); 111 | 112 | return (r); 113 | } 114 | 115 | int 116 | kjg_gsl_matrix_frobenius_normalize (gsl_matrix * m) 117 | { 118 | double s = kjg_gsl_dlange ('F', m); 119 | double d = m->size1 * m->size2; 120 | return (gsl_matrix_scale (m, d / s)); 121 | } 122 | 123 | double 124 | kjg_gsl_dlange (const char norm, const gsl_matrix * m) 125 | { 126 | return (LAPACKE_dlange (LAPACK_ROW_MAJOR, norm, m->size1, m->size2, m->data, 127 | m->tda)); 128 | } 129 | 130 | int 131 | kjg_gsl_dgeqrf (gsl_matrix * m, gsl_vector * tau) 132 | { 133 | return (LAPACKE_dgeqrf (LAPACK_ROW_MAJOR, m->size1, m->size2, m->data, 134 | m->tda, tau->data)); 135 | } 136 | 137 | int 138 | kjg_gsl_dorgqr (gsl_matrix * m, gsl_vector * tau) 139 | { 140 | return (LAPACKE_dorgqr (LAPACK_ROW_MAJOR, m->size2, m->size2, m->size2, 141 | m->data, m->tda, tau->data)); 142 | } 143 | 144 | void 145 | kjg_gsl_ran_ugaussian_pair (const gsl_rng * r, double x[2]) 146 | { 147 | double r2; 148 | 149 | do 150 | { 151 | /* choose x,y in uniform square (-1,-1) to (+1,+1) */ 152 | x[0] = -1 + 2 * gsl_rng_uniform_pos (r); 153 | x[1] = -1 + 2 * gsl_rng_uniform_pos (r); 154 | 155 | /* see if it is in the unit circle */ 156 | r2 = x[0] * x[0] + x[1] * x[1]; 157 | } 158 | while (r2 > 1.0 || r2 == 0); 159 | 160 | r2 = sqrt (-2.0 * log (r2) / r2); 161 | 162 | x[0] *= r2; 163 | x[1] *= r2; 164 | } 165 | 166 | void 167 | kjg_gsl_ran_ugaussian_matrix (const gsl_rng * r, gsl_matrix * m) 168 | { 169 | size_t i, j; 170 | double *data; 171 | double x, y, r2; 172 | 173 | for (i = 0; i < m->size1; i++) 174 | { 175 | data = gsl_matrix_ptr (m, i, 0); 176 | 177 | for (j = 0; j < m->size2 - 1; j += 2) 178 | { 179 | kjg_gsl_ran_ugaussian_pair (r, data); 180 | data += 2; 181 | } 182 | 183 | if (m->size2 % 2) 184 | *data = gsl_rng_uniform_pos (r); 185 | } 186 | } 187 | 188 | void 189 | kjg_gsl_matrix_QR (gsl_matrix * m) 190 | { 191 | gsl_vector *tau = gsl_vector_alloc (m->size2); 192 | kjg_gsl_dgeqrf (m, tau); 193 | kjg_gsl_dorgqr (m, tau); 194 | gsl_vector_free (tau); 195 | } 196 | 197 | int 198 | kjg_gsl_SVD (gsl_matrix * M, gsl_matrix * V, gsl_vector * S) 199 | { 200 | size_t big_enough = M->size1 + V->size2; 201 | double *superb = malloc (big_enough * sizeof (double)); 202 | double *U; 203 | int info = LAPACKE_dgesvd (LAPACK_ROW_MAJOR, // row major 204 | 'O', 'S', M->size1, M->size2, M->data, M->tda, 205 | S->data, U, 206 | big_enough, V->data, V->tda, superb); 207 | free (superb); 208 | return (info); 209 | } 210 | -------------------------------------------------------------------------------- /bin/smartpca.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use Getopt::Std ; 4 | use File::Basename ; 5 | 6 | ### process flags 7 | # -w poplist is compute eigenvectors using populations in poplist, then project 8 | # -y poplistplot is use populations in poplistplot for plot 9 | # -z badsnpfile is use badsnpname: badsnpfile in call to smartpca 10 | my @flaglist = ("i","a","b","k","o","p","e","l","m","q","t","s","w","y","z"); 11 | $x = @ARGV; 12 | for($n=0; $n<$x; $n++) 13 | { 14 | foreach $flag (@flaglist) 15 | { 16 | if($ARGV[$n] eq "-$flag") { $specified{$flag} = 1; } 17 | } 18 | } 19 | foreach $flag ("i","a","b","o","p","e","l") 20 | { 21 | unless($specified{$flag}) { die("OOPS -$flag flag not specified"); } 22 | } 23 | getopts('i:a:b:k:o:p:e:l:m:t:s:w:y:z:q:',\%opts); 24 | $i = $opts{"i"}; 25 | $a = $opts{"a"}; 26 | $b = $opts{"b"}; 27 | $k = 10; if($specified{"k"}) { $k = $opts{"k"}; } 28 | $o = $opts{"o"}; 29 | $q = 0; if($specified{"q"}) { $q = $opts{"q"}; } 30 | $p = $opts{"p"}; 31 | $e = $opts{"e"}; 32 | $l = $opts{"l"}; 33 | $m = 5; if($specified{"m"}) { $m = $opts{"m"}; } 34 | $t = 10; if($specified{"t"}) { $t = $opts{"t"}; } 35 | $s = 6.0; if($specified{"s"}) { $s = $opts{"s"}; } 36 | if($specified{"w"}) { $w = $opts{"w"}; } 37 | if($specified{"y"}) { $y = $opts{"y"}; } 38 | if($specified{"z"}) { $z = $opts{"z"}; } 39 | 40 | ### run smartpca 41 | $parfile = "$o.par"; 42 | $evec = "$o.evec"; 43 | open(PARFILE,">$parfile") || die("OOPS couldn't open file $parfile for writing"); 44 | print PARFILE ("genotypename: $i\n"); 45 | print PARFILE ("snpname: $a\n"); 46 | print PARFILE ("indivname: $b\n"); 47 | print PARFILE ("evecoutname: $evec\n"); 48 | print PARFILE ("evaloutname: $e\n"); 49 | print PARFILE ("altnormstyle: NO\n"); 50 | print PARFILE ("numoutevec: $k\n"); 51 | print PARFILE ("numoutlieriter: $m\n"); 52 | print PARFILE ("numoutlierevec: $t\n"); 53 | print PARFILE ("outliersigmathresh: $s\n"); 54 | print PARFILE ("qtmode: $q\n"); 55 | if($specified{"w"}) { print PARFILE ("poplistname: $w\n"); } 56 | if($specified{"z"}) { print PARFILE ("badsnpname: $z\n"); } 57 | close(PARFILE); 58 | $command = "smartpca"; # MUST put bin directory in path 59 | $command .= " -p $parfile >$l"; 60 | print("$command\n"); 61 | system("$command"); 62 | 63 | ### make string of populations for ploteig 64 | $popstring = ""; 65 | open(EVEC,$evec) || die("OOPS couldn't open file $evec for reading"); 66 | while($line = ) 67 | { 68 | chomp($line); 69 | my @array = split(/[\t ]+/,$line); 70 | $x = @array; 71 | if($array[1] =~ /eigvals/) { next; } # eigvals header line 72 | $pop = $array[$x-1]; 73 | if($popfound{$pop}) { next; } 74 | $popstring = $popstring . "$pop:"; 75 | $popfound{$pop} = 1; 76 | } 77 | close(EVEC); 78 | chop($popstring); # remove last ":" 79 | 80 | if($specified{"y"}) 81 | { 82 | ### make string of populations for ploteig based on -y flag input 83 | $popstring = ""; 84 | open(Y,$y) || die("COF"); 85 | while($line = ) 86 | { 87 | chomp($line); 88 | $popstring .= "$line:"; 89 | } 90 | chop($popstring); 91 | } 92 | 93 | ### cax ploteig 94 | $command = "ploteig"; # MUST put bin directory in path 95 | $command .= " -i $evec"; 96 | $command .= " -c 1:2 "; 97 | $command .= " -p $popstring "; 98 | $command .= " -x "; 99 | $command .= " -y "; 100 | $command .= " -o $p.xtxt "; # must end in .xtxt 101 | print("$command\n"); 102 | system("$command"); 103 | 104 | ### translate .evec file to .pca file expected by eigenstrat program 105 | ### Note: .evec file does not contain entries for outliers 106 | ### .pca file does contain entries (set to all 0.0) for outliers 107 | 108 | # ----- If this looks like a PLINK run, call the PLINK kludge 109 | if ( $i =~ m/\.ped$/ || $i =~ m/\.PED/ ) { 110 | $command = "evec2pca-ped.perl $k $evec $b $o"; 111 | } 112 | else { 113 | $command = "evec2pca.perl $k $evec $b $o"; 114 | } 115 | print("$command\n"); 116 | system("$command"); 117 | 118 | ### If labels are Case and Control only, compute correlations between 119 | ### Case/Control status and each eigenvector. Append to logfile. 120 | if(($popstring eq "Case:Control") || ($popstring eq "Control:Case")) 121 | { 122 | open(LOG,">>$l") || die("OOPS couldn't open file $l for appending"); 123 | print LOG ("\n"); 124 | for($x=0; $x<$k; $x++) # compute correlation for evec $x 125 | { 126 | open(EVEC,$evec) || die("OOPS couldn't open file $evec for reading"); 127 | $sum1=0; $sumx=0; $sumxx=0; $sumy=0; $sumyy=0; $sumxy=0; 128 | $line = ; chomp($line); # eigvals line 129 | while($line = ) 130 | { 131 | chomp($line); 132 | my @array = split(/[\t ]+/,$line); 133 | $this = $array[2+$x]; 134 | $sumy += $this; 135 | $sumyy += $this*$this; 136 | $sum1 += 1; 137 | if($line =~ /Case/) # Case is 1, Control is 0 138 | { 139 | $sumx += 1; 140 | $sumxx += 1; 141 | $sumxy += $this; 142 | } 143 | } 144 | close(EVEC); 145 | $meanx = $sumx/$sum1; 146 | $meany = $sumy/$sum1; 147 | if($sum1 == 0) { next; } 148 | $sdevx = sqrt($sumxx/$sum1 - $meanx*$meanx); 149 | $sdevy = sqrt($sumyy/$sum1 - $meany*$meany); 150 | if($sdevx * $sdevy == 0) { next; } 151 | $corr = ($sumxy/$sum1) / ($sdevx*$sdevy); 152 | $x1 = $x+1; 153 | printf LOG ("Correlation between eigenvector $x1 (of $k) and Case/Control status is %.03f\n",$corr); 154 | } 155 | close(LOG); 156 | } 157 | -------------------------------------------------------------------------------- /src/gval.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | #include "admutils.h" 15 | #include "mcio.h" 16 | #include "gval.h" 17 | 18 | static SNP **xxsnps = NULL; 19 | static Indiv **xindivmarkers = NULL; 20 | static int xnrows, xncols; 21 | static int xnumindivs; 22 | static int *xxindex = NULL; 23 | static double *xmean, *xfancy; 24 | static double **gtable = NULL; 25 | 26 | int getcolxz (double *xcol, SNP * cupt, int *xindex, int *xtypes, 27 | int nrows, int col, double *xmean, double *xfancy, int *n0, 28 | int *n1); 29 | 30 | void 31 | setgval (SNP ** xsnps, int nrows, Indiv ** indivmarkers, int numindivs, 32 | int *xindex, int *xtypes, int ncols) 33 | { 34 | 35 | double *cc; 36 | int t, n0, n1, i, k, col; 37 | SNP *cupt; 38 | double mean, y; 39 | 40 | unsetgval (); 41 | 42 | xxsnps = xsnps; 43 | xnrows = nrows; 44 | xncols = ncols; 45 | xindivmarkers = indivmarkers; 46 | xnumindivs = numindivs; 47 | xxindex = xindex; 48 | 49 | for (i = 1; i < nrows; i++) { 50 | if (xxindex[i] < xxindex[i - 1]) { 51 | fprintf (stderr, "xindex not sorted\n"); 52 | exit (1); 53 | } 54 | } 55 | 56 | ZALLOC (cc, nrows, double); 57 | ZALLOC (xmean, ncols, double); 58 | ZALLOC (xfancy, ncols, double); 59 | vclear (xfancy, 1.0, ncols); 60 | gtable = initarray_2Ddouble (ncols, 4, 0); 61 | 62 | for (i = 0; i < ncols; ++i) { 63 | col = i; 64 | cupt = xsnps[i]; 65 | 66 | /** 67 | if (i>=0) { 68 | printf("zz: %d %s\n", cupt -> ID) ; fflush(stdout) ; 69 | } 70 | */ 71 | getcolxz (cc, cupt, xindex, xtypes, nrows, i, xmean, xfancy, &n0, &n1); 72 | 73 | mean = xmean[col] / xfancy[col]; 74 | for (k = 0; k < 3; ++k) { 75 | y = ((double) k) - mean; 76 | y *= xfancy[col]; 77 | gtable[col][k] = y / sqrt (2.0); 78 | } 79 | gtable[col][3] = 0; 80 | 81 | t = MIN (n0, n1); 82 | if (t == 0) 83 | cupt->ignore = YES; // side-effect 84 | } 85 | 86 | free (cc); 87 | } 88 | 89 | void 90 | unsetgval () 91 | { 92 | if (xxsnps == NULL) 93 | return; 94 | 95 | xxsnps = NULL; 96 | xindivmarkers = NULL; 97 | xxindex = NULL; 98 | 99 | free2D (>able, xncols); 100 | 101 | gtable = NULL; 102 | 103 | free (xmean); 104 | free (xfancy); 105 | } 106 | 107 | int 108 | getgval (int row, int col, double *val) 109 | { 110 | 111 | /** 112 | if (row>=xnrows) fatalx("row index overflow\n") ; 113 | if (col>=xncols) fatalx("col index overflow\n") ; 114 | */ 115 | 116 | return getggval (xxindex[row], col, val); 117 | 118 | } 119 | 120 | int 121 | getggval (int indindx, int col, double *val) 122 | // indindex is index in full array 123 | { 124 | SNP *cupt; 125 | int t, z; 126 | double y, mean; 127 | 128 | if (gtable == NULL) fatalx("(gval) bug\n") ; 129 | *val = 0; 130 | if (xindivmarkers[indindx]->ignore) 131 | return -1; 132 | cupt = xxsnps[col]; 133 | t = getgtypes (cupt, indindx); 134 | if (t < 0) 135 | return t; 136 | 137 | *val = gtable[col][t]; 138 | return t; 139 | 140 | } 141 | 142 | // Unpack lookup table 143 | 144 | // macro to unpack a single byte 145 | #define U(n) { ((n) >> 6) & 3, ((n) >> 4) & 3, ((n) >> 2) & 3, (n) & 3 } 146 | 147 | // macros to build the u(n)packi(n)g table 148 | #define U1(n) U(n), U((n) + 1), U((n) + 2), U((n) + 3) 149 | #define U2(n) U1(n), U1((n) + 4), U1((n) + 8), U1((n) + 12) 150 | #define U3(n) U2(n), U2((n) + 16), U2((n) + 32), U2((n) + 48) 151 | 152 | // the unpacking table 153 | static const uint8_t UL[256][4] = { U3 (0), U3 (64), U3 (128), U3 (192) }; 154 | 155 | size_t 156 | get_nrows () 157 | { 158 | return (xnrows); 159 | } 160 | 161 | size_t 162 | get_ncols () 163 | { 164 | return (xncols); 165 | } 166 | 167 | /** 168 | * Unpacks a SNP column 169 | * @param snp_index 170 | * @param *y arrayref to store data 171 | */ 172 | void 173 | kjg_geno_get_normalized_row (const size_t snp_index, double *y) 174 | { 175 | uint8_t *packed = xxsnps[snp_index]->pbuff; 176 | double *norm_lookup = gtable[snp_index]; 177 | 178 | size_t i = 0, j = xxindex[i]; 179 | while (1) { 180 | size_t k = j / 4; // packed location 181 | size_t jf = (k + 1) * 4; // last index in packed location 182 | 183 | uint8_t p = packed[k]; // packed data 184 | const uint8_t *u = UL[p]; // unpacked data 185 | 186 | while (j < jf) { 187 | size_t o = j % 4; // offset in packed data 188 | size_t t = u[o]; // unpacked data 189 | y[i] = norm_lookup[t]; // normalized data 190 | 191 | if (++i == xnrows) // move onto next entry 192 | return; // break if we are done with SNP 193 | j = xxindex[i]; // perform the lookup 194 | } 195 | } 196 | } 197 | 198 | /** 199 | * Unpacks several SNP coluns 200 | * @param snp_index index of the SNP 201 | * @param *unpacked arrayref to store data 202 | */ 203 | 204 | size_t 205 | kjg_geno_get_normalized_rows (const size_t i, const size_t r, double *Y) 206 | { 207 | size_t j; 208 | for (j = i; j < i + r && j < xncols; j++) { 209 | kjg_geno_get_normalized_row (j, Y); 210 | Y += xnrows; 211 | } 212 | return (j - i); 213 | } 214 | -------------------------------------------------------------------------------- /include/statsubs.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "strsubs.h" 7 | 8 | #define CHI_EPSILON 0.000001 /* accuracy of critchi approximation */ 9 | #define CHI_MAX 99999.0 /* maximum chi square value */ 10 | 11 | #define LOG_SQRT_PI 0.5723649429247000870717135 /* log (sqrt (pi)) */ 12 | #define I_SQRT_PI 0.5641895835477562869480795 /* 1 / sqrt (pi) */ 13 | #define I_PI 0.3183098861837906715377675 /* 1 / pi */ 14 | #define PI 3.1415926535897932384626434 15 | #define BIGX 20.0 /* max value to represent exp (x) */ 16 | #define ex(x) (((x) < -BIGX) ? 0.0 : exp (x)) 17 | #define SQRT_PI (1.0/I_SQRT_PI) /* sqrt (pi) */ 18 | 19 | double medchi(int *cls, int len, int *n0, int *n1, double *kstail) ; 20 | double ks2(int *cls, int len, int *n0, int *n1, double *kstail) ; 21 | double probks(double lam) ; 22 | 23 | double nordis(double z) ; 24 | double ndens(double val, double mean, double sig) ; 25 | double lognordis(double z) ; 26 | double logndens(double val, double mean, double sig) ; 27 | 28 | double ntail(double z) ; 29 | void tailstats(double *x, double a, int isupper) ; 30 | double zprob(double p) ; 31 | void setzptable() ; 32 | double z2x2(double *a) ; 33 | double conchi(double *a, int m, int n) ; 34 | double conchiv(double *a, int m, int n) ; 35 | double chitest(double *a, double *p, int n) ; 36 | double pi() ; 37 | 38 | double xlgamma(double x) ; 39 | double psi(double x) ; 40 | double tau(double x) ; 41 | double logbessi0(double x) ; 42 | double bessi0(double x) ; 43 | double logbessi1(double x) ; 44 | double bessi1(double x) ; 45 | void bernload() ; 46 | double bernum(int x) ; 47 | 48 | void gpars(double *p, double *lam, double mean, double var) ; 49 | void mlegamma(double *a, int n, double *p, double *lam) ; 50 | void mleg(double a1, double a2, double *p, double *lam) ; 51 | 52 | double dilog(double x) ; 53 | double li2(double x) ; 54 | 55 | double hwstat(double *x) ; 56 | 57 | double gammprob(double x, double p, double lam) ; 58 | double bprob(double p, double a, double b) ; 59 | double lbeta(double a, double b) ; 60 | double poissloglike(int kk, double mean) ; 61 | double poissexp(int kk, double mean) ; 62 | double dirmult(double *pp, int *aa, int len) ; 63 | double dawson(double t) ; 64 | 65 | double binomtail(int n, int t, double p, char c) ; 66 | double binlogtail(int n, int t, double p, char c) ; 67 | void genbin(double *a, int n, double p) ; 68 | void genlogbin(double *a, int n, double p) ; 69 | int ifirstgt(int val, int *tab, int n) ; 70 | int firstgt(double val, double *tab, int n) ; 71 | 72 | void cinterp(double val, double x0, double x1, 73 | double f0, double f0p, double f1, double f1p, double *fv, double *fvp) ; 74 | int firstgtx(double val, double *tab, int n) ; 75 | int jfirstgtx(int val, int *tab, int n) ; 76 | 77 | double rtlchsq(int df, double z) ; 78 | double critchi(int df, double z) ; 79 | double rtlf(int df1, int df2, double f) ; 80 | 81 | double ltlg(double a, double x) ; 82 | double rtlg(double a, double x) ; 83 | 84 | double twdens(double twstat) ; 85 | double twtail(double twstat) ; 86 | double twtailx(double twstat) ; 87 | double twdensx(double twstat) ; 88 | double twnorm(double lam, double p, double n) ; 89 | void twfree() ; 90 | int settwxtable(char *table) ; 91 | void gettw(double x, double *tailp, double *densp) ; 92 | double dotwcalc(double *lambda, int m, double *ptw, double *pzn, double *pzvar, int minm) ; 93 | int numgtz(double *a, int n) ; 94 | 95 | double betaix(double a, double b, double lo, double hi) ; 96 | double betai(double a, double b, double x) ; 97 | void bpars(double *a, double *b, double mean, double var) ; 98 | void bmoments(double a, double b, double *mean, double *var) ; 99 | double unbiasedest(int *ndx, int ndsize, int **counts) ; 100 | void weightjack(double *est, double *sig, double mean, double *jmean, double *jwt, int g) ; 101 | int modehprob(int n, int a, int m) ; 102 | void calcfc(double *c, int n, double rho) ; 103 | void circconv(double *xout, double *xa, double *xb, int n) ; 104 | 105 | double bino(int a, int b) ; 106 | void setbino(int maxbco) ; 107 | void destroy_bino() ; 108 | double exx(double x) ; 109 | double ubias(int a, int n, int k) ; 110 | double scx(double *W, double *mean, double *x, int d) ; // maybe should be in vsubs 111 | void dither(double *xout, double *xin, int n) ; 112 | void probit(double *xout, double *xin, int n) ; 113 | // Berk-Jones 114 | double bjugauss(double *p, double *u, double *a, int n) ; // return M_n (Berk-Jones) 115 | void bjasympt(double *ptail, double *mtail, double *tail, double mplus, double mminus, int n) ; 116 | void bj2(double *aa, double *bb, int a, int b, double *plpv, double *prpv, double *ppv) ; 117 | double genhp(double **hp, int a, int b) ; 118 | double genhpt(int a, int b, int *lt, int *rt) ; 119 | void gentail(double **ltail, double **rtail, double **hp, int a, int b) ; 120 | void setthresh(int *thresh, double **tail, int a, int b, double stat, int mode) ; 121 | void bj2x(int *type, int a, int b, double *plpv, double *prpv, double *ppv) ; 122 | 123 | void mlebeta(double *a, int n, double *p1, double *p2); 124 | void estbpars(double *a, int n, double *p1, double *p2); 125 | void mleb(double *p1, double *p2, double u, double v) ; 126 | 127 | int loadmptable(double ***mptable) ; 128 | double wynn(double *v, int n, double *acc, int *nacc) ; 129 | double *vwynn(double **vv, int n, int dim, double **acc, int *nacc) ; 130 | 131 | double rad2deg(double rad) ; 132 | double deg2rad(double deg) ; 133 | 134 | double quartile(double *x, int n, double q) ; 135 | int qinterp(double *a, double *b, int n, double val, double *ans) ; 136 | double truncexpmean(double m, double thresh, int isupper) ; 137 | 138 | void jitter(double *xout, double *xin, int n) ; 139 | void mannwhit(double *a, int na, double *b, int nb, double *pu, double *pv, double *ppv) ; 140 | 141 | -------------------------------------------------------------------------------- /src/twsubs.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include "admutils.h" 7 | #include "eigsubs.h" 8 | 9 | /* ********************************************************************* */ 10 | 11 | extern int verbose; 12 | 13 | int twl2mode = YES; 14 | int mval = -1; 15 | int nval = -1; 16 | int numsamp = 100; 17 | double mul1 = 1.0; 18 | 19 | 20 | double xxlike (int m, double a, double var, double logsum, double lsum); 21 | double xxlikex (int m, double a, double logsum, double lsum); 22 | double xxliked (int m, double a, double logsum, double lsum); 23 | double xxliked2 (int m, double a, double logsum, double lsum); 24 | double oldtwestxx (double *lam, int m, double *pzn, double *pzvar); 25 | double doeig2 (double *vals, int m, double *pzn, double *ptw); 26 | 27 | double 28 | twestxx (double *lam, int m, double *pzn, double *pzvar) 29 | { 30 | double tw, y; 31 | 32 | if (twl2mode == NO) 33 | return oldtwestxx (lam, m, pzn, pzvar); 34 | (void) doeig2 (lam, m, pzn, &tw); 35 | 36 | y = (*pzn) * (double) m; 37 | *pzvar = asum (lam, m) / y; 38 | return tw; 39 | 40 | } 41 | 42 | double 43 | oldtwestxx (double *lam, int m, double *pzn, double *pzvar) 44 | { 45 | double lsum, logsum; 46 | double *ww; 47 | double a, p, yn, var; 48 | double ylike, ybase, y, ylmax, ynmax, yld, yld2, ainc, ym; 49 | int k; 50 | 51 | 52 | ZALLOC (ww, m, double); 53 | copyarr (lam, ww, m); 54 | lsum = asum (ww, m); 55 | vlog (ww, ww, m); 56 | logsum = asum (ww, m); 57 | 58 | ylmax = -1.0e20; 59 | yn = (double) m; 60 | ybase = xxlikex (m, yn, logsum, lsum); 61 | 62 | for (k = 1; k <= 100; ++k) { 63 | a = yn / 2.0; 64 | ylike = xxlikex (m, a, logsum, lsum); 65 | yld = xxliked (m, a, logsum, lsum); 66 | ylike -= ybase; 67 | if (verbose) 68 | printf ("ynloop %12.3f %12.3f %12.3f\n", yn / (double) m, ylike, yld); 69 | if (ylike < ylmax) 70 | break; 71 | ylmax = ylike; 72 | ynmax = yn; 73 | yn *= 1.1; 74 | } 75 | a = ynmax / 2.0; 76 | for (k = 1; k <= 10; ++k) { 77 | // newton iteration 78 | ylike = xxlikex (m, a, logsum, lsum); 79 | yld = xxliked (m, a, logsum, lsum); 80 | yld2 = xxliked2 (m, a, logsum, lsum); 81 | ylike -= ybase; 82 | ainc = -yld / yld2; 83 | a += ainc; 84 | if (verbose) 85 | printf ("newton: %3d %15.9f %15.9f %15.9f\n", k, ylike, yld, ainc); 86 | } 87 | fflush (stdout); 88 | yn = 2.0 * a; 89 | ym = (double) m; 90 | var = lsum / (2.0 * a * ym); 91 | 92 | *pzn = yn; 93 | *pzvar = var; 94 | 95 | free (ww); 96 | return 0; 97 | } 98 | 99 | double 100 | xxlike (int m, double a, double var, double logsum, double lsum) 101 | { 102 | double p, yl = 0.0; 103 | double ym, x; 104 | int j; 105 | 106 | p = 0.5 * (double) (m + 1); 107 | ym = (double) m; 108 | 109 | yl = -ym * a * log (2.0); 110 | for (j = 1; j <= m; ++j) { 111 | x = a - 0.5 * (double) (m - j); 112 | yl -= lgamma (x); 113 | } 114 | // so far this is log (C_L) normalizing constant 115 | yl -= ym * a * log (var); 116 | yl += (a - p) * logsum; 117 | yl -= lsum / (2.0 * var); 118 | 119 | return yl; 120 | 121 | } 122 | 123 | double 124 | xxlikex (int m, double a, double logsum, double lsum) 125 | { 126 | double p, yl = 0.0; 127 | double ym, x, var, lco; 128 | int j; 129 | 130 | p = 0.5 * (double) (m + 1); 131 | ym = (double) m; 132 | lco = lsum / (2.0 * ym); 133 | var = lco / a; 134 | 135 | 136 | yl = -ym * a * log (2.0); 137 | for (j = 1; j <= m; ++j) { 138 | x = a - 0.5 * (double) (m - j); 139 | yl -= lgamma (x); 140 | } 141 | // so far this is log (C_L) normalizing constant 142 | yl -= ym * a * log (var); 143 | yl += (a - p) * logsum; 144 | yl -= ym * a; // plugging in var 145 | 146 | return yl; 147 | 148 | } 149 | 150 | double 151 | xxliked (int m, double a, double logsum, double lsum) 152 | // first deriv wrt a 153 | { 154 | double p, yl = 0.0; 155 | double ym, x, var, vard; 156 | int j; 157 | 158 | p = 0.5 * (double) (m + 1); 159 | ym = (double) m; 160 | var = lsum / (2.0 * a * ym); 161 | vard = -var / a; 162 | 163 | 164 | yl = -ym * log (2.0); 165 | for (j = 1; j <= m; ++j) { 166 | x = a - 0.5 * (double) (m - j); 167 | if (x < 0.0) 168 | return 100.0; 169 | yl -= psi (x); 170 | } 171 | // so far this is log (C_L) normalizing constant 172 | yl -= ym * log (var); 173 | yl -= (ym * a / var) * vard; 174 | yl += logsum; 175 | yl -= ym; // plugging in var 176 | 177 | return yl; 178 | 179 | } 180 | 181 | double 182 | xxliked2 (int m, double a, double logsum, double lsum) 183 | // second deriv wrt a 184 | { 185 | double p, yl = 0.0; 186 | double ym, x, var, vard, vard2, y; 187 | int j; 188 | 189 | p = 0.5 * (double) (m + 1); 190 | ym = (double) m; 191 | var = lsum / (2.0 * a * ym); 192 | vard = -var / a; 193 | vard2 = 2.0 * var / (a * a); 194 | 195 | 196 | yl = 0.0; 197 | for (j = 1; j <= m; ++j) { 198 | x = a - 0.5 * (double) (m - j); 199 | if (x < 0.0) 200 | return 100.0; 201 | yl -= tau (x); 202 | } 203 | // so far this is log (C_L) normalizing constant 204 | yl -= 2.0 * (ym / var) * vard; 205 | yl -= (ym * a / var) * vard2; 206 | y = vard / var; 207 | yl += (ym * a) * y * y; 208 | 209 | return yl; 210 | 211 | } 212 | 213 | double 214 | doeig2 (double *vals, int m, double *pzn, double *ptw) 215 | { 216 | static int ncall = 0; 217 | double y, tw, tail; 218 | double zn, zvar, top, bot; 219 | double *evals; 220 | 221 | ++ncall; 222 | ZALLOC (evals, m, double); 223 | copyarr (vals, evals, m); 224 | y = (double) m / asum (evals, m); 225 | vst (evals, evals, y, m); 226 | top = (double) (m * (m + 2)); 227 | bot = asum2 (evals, m) - (double) m; 228 | zn = top / bot; 229 | y = evals[0] * zn; 230 | tw = twnorm (y, (double) m, zn); 231 | tail = twtail (tw); 232 | free (evals); 233 | *pzn = zn; 234 | *ptw = tw; 235 | return tail; 236 | } 237 | 238 | double 239 | rhoinv (double x, double gam) 240 | // Lee et al. page 5 for \rho^{-1} 241 | { 242 | double y1, y2; 243 | 244 | y1 = x + 1.0 - gam; 245 | y2 = y1 * y1 - 4.0 * x; 246 | if (y2 <= 0.0) 247 | return -1.0; 248 | 249 | y1 += sqrt (y2); 250 | 251 | return 0.5 * y1; 252 | 253 | } 254 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | ## shrinkmode added. 3/15 2 | EIGENSOFT version 8.0.0, 03/30/21 (for Linux only) 3 | 4 | The EIGENSOFT package implements methods from the following 2 papers: 5 | Patterson et al. 2006 PLoS Genet 2:e190 (population structure) 6 | Price et al. 2006 Nat Genet 38:904-9 (EIGENSTRAT stratification correction) 7 | 8 | NEW features of EIGENSOFT version 7.2.0 9 | -- shrinkmode 10 | 11 | NEW features of EIGENSOFT version 6.1.4 include: 12 | -- pcaselection was omitted from 6.1.3 by accident 13 | -- Statically linked GSL/openblas 14 | -- Fixed memory allocation bug in pcaselection 15 | -- Some routines moved into nicklib 16 | -- Error message on allocate failure now prints length as "%ld" 17 | supporting long values. 18 | 19 | NEW features of EIGENSOFT version 6.1.3 include: 20 | -- Restored script file extensions to .perl instead of .pl 21 | -- Added updated ploteig script that disappeared from the repository 22 | 23 | NEW features of EIGENSOFT version 6.1.2 include: 24 | -- Updated license info to be GPL compliant required by linking the GSL 25 | 26 | NEW features of EIGENSOFT version 6.1.1 include: 27 | -- Minor bug fix to correctly merge version 6.0.2 and version 6.1 changes. 28 | -- pcaselection operates on evec files. Added examples. 29 | -- Backported twtable.c/h from EIGENSOFT 7alpha 30 | 31 | NEW features of EIGENSOFT version 6.1 include: 32 | -- The range finding step of PCA fastmode only scales the multiplied matrix, 33 | as orthogonalization is unnecessary. This appears to improve accuracy. 34 | 35 | NEW features of EIGENSOFT version 6.0.2 include: 36 | -- Fixed Makefile and documentation to build eigenstrat properly 37 | -- Moved Tracy-Widom table into a header file for easier building 38 | 39 | NEW features of EIGENSOFT version 6.0.1 include: 40 | -- Minor bug fix which prevents smartpca from trying to print out eigenvalues 41 | if fastmode is set. 42 | 43 | NEW features of EIGENSOFT version 6.0.0beta included: 44 | -- New option fastmode which implements a very fast pca approximation. 45 | See POPGEN/README and Galinsky 2014 ASHG talk. 46 | -- Changes to external packages required. EIGENSOFT version 5.0.2 required 47 | lapack + blas. On the other hand, EIGENSOFT version 6.0beta requires 48 | GSL + lapack + OpenBLAS (but does not require the native version of blas). 49 | The Makefile has been changed accordingly. 50 | -- EIGENSOFT version 6.0beta supports multi-threading. See POPGEN/README. 51 | -- Bug fix for ldregress option. 52 | 53 | See CONVERTF/README for documentation of programs for converting file formats. 54 | See POPGEN/README for documentation of population structure programs. 55 | See EIGENSTRAT/README for documentation of EIGENSTRAT programs. 56 | 57 | Questions? 58 | See https://www.hsph.harvard.edu/alkes-price/eigensoft-frequently-asked-questions/ 59 | https://github.com/DReichLab/EIG 60 | 61 | For questions about building this software: 62 | Matthew Mah 63 | 64 | For questions about smartpca: 65 | Nick Patterson 66 | 67 | For questions about eigenstrat: 68 | Alkes Price 69 | 70 | Executables and source code: 71 | ---------------------------- 72 | All C executables are in the bin/ directory. 73 | 74 | We have placed source code for all C executables in the src/ directory, 75 | for users who wish to modify and recompile our programs. 76 | "cd src" 77 | "make" 78 | "make install" 79 | 80 | Note that some of our software will only compile if your system has the 81 | GSL + lapack + OpenBLAS packages installed. 82 | 83 | On Mac OSX, you can install gsl and OpenBLAS with lapack using homebrew: 84 | "brew install gsl" 85 | "brew install openblas" 86 | 87 | If these packages are not in standard directories, you can specify the 88 | appropriate include and library directories with the CFLAGS and LDFLAGS 89 | make variables. 90 | For example, on the Harvard Medical School O2 cluster, the command is: 91 | 'make CFLAGS="-I/n/app/openblas/0.2.19/include -I/n/app/gsl/2.3/include" LDFLAGS="-L/n/app/openblas/0.2.19/lib -L/n/app/gsl/2.3/lib/"' 92 | On Mac OSX: 93 | 'make CFLAGS="-I/usr/local/opt/openblas/include -I/usr/local/opt/gsl/include" LDFLAGS="-L/usr/local/opt/openblas/lib -L/usr/local/opt/gsl/lib"' 94 | 95 | If you have issues with missing lapacke symbols, for example "undefined reference to `LAPACKE_dlange'", run make with the corresponding additional libraries linked: 96 | 'make LDLIBS="-llapacke"' 97 | This has been encountered on Linux Mint 18. 98 | 99 | If you have trouble compiling and running our code, try compiling and 100 | running the pcatoy program in the src directory: 101 | "cd src" 102 | "make pcatoy" 103 | "./pcatoy" 104 | If you are unable to run the pcatoy program successfully, please contact 105 | your system administrator for help, as this is a systems issue which is 106 | beyond our scope. Your system administrator will be able to troubleshoot 107 | your systems issue using this trivial program. [You can also try running 108 | the pcatoy program in the bin directory, which we have already compiled.] 109 | 110 | To remake the entire package: 111 | "cd src" 112 | "make clobber" 113 | "make install" 114 | 115 | To remake EIG7.2 it is necessary to link to the OpenBLAS library. On orchestra, 116 | the path is /opt/openblas and should work automatically. On Broad institute machines, 117 | the user should execute "use .openblas-0.2.8" and "use GCC-4.9" at the command 118 | prompt before attempting to remake. All other users should install OpenBLAS and 119 | set the variable OPENBLAS to the path at the make command line, 120 | e.g. "make install OPENBLAS=/usr/local/openblas" 121 | 122 | ---------------------------- 123 | Acknowledgements: 124 | EIGENSOFT was written by Nick Patterson, Alkes Price, Samuela Pollack, 125 | Kevin Galinsky, Chris Chang, and Sasha Gusev. 126 | 127 | We thank John Novembre and Mike Boursnell for code improvements, Matt Hanna 128 | for the first implementation of multi-threading, and Angela Yu for a bugfix. 129 | 130 | ---------------------------- 131 | SOFTWARE COPYRIGHT NOTICE AGREEMENT 132 | This software and its documentation are copyright (2010) by Harvard University 133 | and The Broad Institute. All rights are reserved. This software is supplied 134 | without any warranty or guaranteed support whatsoever. Neither Harvard 135 | University nor The Broad Institute can be responsible for its use, misuse, or 136 | functionality. The software may be freely copied for non-commercial purposes, 137 | provided this copyright notice is retained. 138 | 139 | -------------------------------------------------------------------------------- /src/eigensrc/eigsubs.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | 9 | /* ********************************************************************* */ 10 | void eigx_ (double *pmat, double *ev, int *n); 11 | void eigxv_ (double *pmat, double *eval, double *evec, int *n); 12 | void cdc_ (double *pmat, int *n); 13 | void inverse_ (double *pmat, int *n); 14 | void solve_ (double *pmat, double *v, int *n); 15 | void geneigsolve_ (double *pmat, double *qmat, double *eval, int *n); 16 | 17 | void packsym (double *pmat, double *mat, int n); 18 | 19 | 20 | void 21 | eigvals (double *mat, double *evals, int n) 22 | { 23 | double *pmat; 24 | int len; 25 | 26 | len = n * (n + 1); 27 | len /= 2; 28 | ZALLOC (pmat, 2*len, double); 29 | 30 | vst (mat, mat, -1.0, n * n); 31 | packsym (pmat, mat, n); 32 | eigx_ (pmat, evals, &n); 33 | free (pmat); 34 | vst (mat, mat, -1.0, n * n); 35 | vst (evals, evals, -1.0, n); 36 | } 37 | 38 | void 39 | eigvecs (double *mat, double *evals, double *evecs, int n) 40 | { 41 | double *pmat; 42 | int len; 43 | 44 | len = n * (n + 1); 45 | len /= 2; 46 | ZALLOC (pmat, 2*len, double); 47 | 48 | vst (mat, mat, -1.0, n * n); 49 | packsym (pmat, mat, n); 50 | 51 | eigxv_ (pmat, evals, evecs, &n); 52 | free (pmat); 53 | vst (mat, mat, -1.0, n * n); 54 | vst (evals, evals, -1.0, n); 55 | } 56 | 57 | /* note: dpotrf requires the entire matrix, not packed lower-tri */ 58 | void 59 | chdecomp (double *mat, int n) 60 | { 61 | /* symetric matrix - don't need to 62 | * convert to column major order */ 63 | 64 | cdc_ (mat, &n); 65 | } 66 | 67 | void 68 | inverse (double *mat, int n) 69 | { 70 | int i, j; 71 | 72 | /* convert to column-major order */ 73 | for (i = 0; i < n; i++) { 74 | for (j = 0; j < i; j++) { 75 | double t = mat[n * i + j]; 76 | mat[n * i + j] = mat[n * j + i]; 77 | mat[n * j + i] = t; 78 | } 79 | } 80 | 81 | /*** DEBUGGING: ***/ 82 | { 83 | FILE *fid = fopen ("eigsubs.dbg", "a"); 84 | fprintf (fid, "matrix U\n"); 85 | for (i = 0; i < n; i++) { 86 | for (j = 0; j < n; j++) { 87 | fprintf (fid, "%8.4f ", mat[i * n + j]); 88 | } 89 | fprintf (fid, "\n"); 90 | } 91 | } 92 | /*******************/ 93 | 94 | inverse_ (mat, &n); 95 | 96 | /*** DEBUGGING: ***/ 97 | { 98 | FILE *fid = fopen ("eigsubs.dbg", "a"); 99 | fprintf (fid, "inverse of matrix U\n"); 100 | for (i = 0; i < n; i++) { 101 | for (j = 0; j < n; j++) { 102 | fprintf (fid, "%8.4f ", mat[i * n + j]); 103 | } 104 | fprintf (fid, "\n"); 105 | } 106 | } 107 | /*******************/ 108 | 109 | for (i = 0; i < n; i++) { 110 | for (j = 0; j < i; j++) { 111 | double t = mat[n * i + j]; 112 | mat[n * i + j] = mat[n * j + i]; 113 | mat[n * j + i] = t; 114 | } 115 | } 116 | } 117 | 118 | void 119 | solve (double *mat, double *b, double *v, int n) 120 | { 121 | int i, j; 122 | 123 | double *mat2 = (double *) malloc (n * n * sizeof (double)); 124 | 125 | /* lapack is going to put the lu-decomp into the matrix, 126 | * so make a copy and convert to column-major order */ 127 | for (i = 0; i < n; i++) { 128 | for (j = 0; j < n; j++) { 129 | mat2[n * i + j] = mat[n * j + i]; 130 | } 131 | } 132 | 133 | /* copy b into v */ 134 | for (i = 0; i < n; i++) { 135 | v[i] = b[i]; 136 | } 137 | 138 | solve_ (mat2, v, &n); 139 | 140 | free (mat2); 141 | return; 142 | } 143 | 144 | void 145 | packsym (double *pmat, double *mat, int n) 146 | // lapack L mode (fortran) 147 | { 148 | int i, j, k = 0; 149 | for (i = 0; i < n; i++) { 150 | for (j = i; j < n; j++) { 151 | pmat[k] = mat[i * n + j]; 152 | ++k; 153 | } 154 | } 155 | } 156 | 157 | void 158 | geneigsolve (double *pmat, double *qmat, double *evec, double *eval, int n) 159 | { 160 | 161 | /* save copy of A and B, which LAPACK will overwrite */ 162 | double *amat = (double *) malloc (n * n * sizeof (double)); 163 | double *bmat = (double *) malloc (n * n * sizeof (double)); 164 | 165 | int i, j; 166 | for (i = 0; i < n * n; i++) { 167 | amat[i] = pmat[i]; 168 | bmat[i] = qmat[i]; 169 | } 170 | 171 | 172 | 173 | 174 | { 175 | FILE *fid = fopen ("eigsubs.dbg", "a"); 176 | fprintf (fid, "matrix A\n"); 177 | for (i = 0; i < n; i++) { 178 | for (j = 0; j < n; j++) { 179 | fprintf (fid, "%8.4f ", amat[i * n + j]); 180 | } 181 | fprintf (fid, "\n"); 182 | } 183 | 184 | fprintf (fid, "matrix B\n"); 185 | for (i = 0; i < n; i++) { 186 | for (j = 0; j < n; j++) { 187 | fprintf (fid, "%8.4f ", bmat[i * n + j]); 188 | } 189 | fprintf (fid, "\n"); 190 | } 191 | fclose (fid); 192 | } 193 | 194 | 195 | /* matrices have to be symetric-definite, so don't 196 | * need to convert to column-major order */ 197 | geneigsolve_ (pmat, qmat, eval, &n); 198 | 199 | 200 | /* copy eigenvectors to A and original A,B back */ 201 | /* ith eigenvector should be in row i */ 202 | for (i = 0; i < n; i++) { 203 | for (j = 0; j < n; j++) { 204 | evec[i * n + j] = pmat[i * n + j]; /* don't put back in row-major order (?) */ 205 | } 206 | } 207 | for (i = 0; i < n * n; i++) { 208 | pmat[i] = amat[i]; 209 | qmat[i] = bmat[i]; 210 | } 211 | 212 | /* LAPACK puts evals and evecs in ascending order */ 213 | /* reorder evals and evecs so evals are in descending order */ 214 | 215 | for (i = 0; i < n / 2; i++) { 216 | double t = eval[i]; 217 | eval[i] = eval[n - 1 - i]; 218 | eval[n - 1 - i] = t; 219 | 220 | for (j = 0; j < n; j++) { /* exchange row i and row(n-1-i) */ 221 | t = evec[i * n + j]; 222 | evec[i * n + j] = evec[(n - 1 - i) * n + j]; 223 | evec[(n - 1 - i) * n + j] = t; 224 | } 225 | } 226 | 227 | free (amat); 228 | free (bmat); 229 | 230 | } 231 | 232 | void 233 | mkorth (double *orth, double *ww, int n) 234 | // special purpose. Construct basis of vectors orthogonal to ww 235 | { 236 | 237 | double *vv, *evec, *qq; 238 | double y; 239 | 240 | ZALLOC (vv, n * n, double); 241 | ZALLOC (evec, n * n, double); 242 | ZALLOC (qq, n, double); 243 | 244 | y = asum2 (ww, n); 245 | vst (qq, ww, 1.0 / sqrt (y), n); 246 | setidmat (vv, n); 247 | addouter (vv, qq, n); 248 | 249 | eigvecs (vv, orth, evec, n); 250 | copyarr (evec + n, orth, n * (n - 1)); 251 | 252 | free (vv); 253 | free (qq); 254 | free (evec); 255 | 256 | } 257 | -------------------------------------------------------------------------------- /POPGEN/twtable: -------------------------------------------------------------------------------- 1 | ### this is a table of TW using a Runge-Kutta solver suggested by Per-Olaf Persson 2 | ### algorithm coded in C by NP using NAG ODE solver 3 | -8.000 1.000000000 0.000000000 4 | -7.900 1.000000000 0.000000000 5 | -7.800 1.000000000 0.000000000 6 | -7.700 1.000000000 0.000000000 7 | -7.600 1.000000000 0.000000000 8 | -7.500 1.000000000 0.000000001 9 | -7.400 1.000000000 0.000000002 10 | -7.300 0.999999999 0.000000005 11 | -7.200 0.999999999 0.000000010 12 | -7.100 0.999999997 0.000000019 13 | -7.000 0.999999995 0.000000039 14 | -6.900 0.999999989 0.000000076 15 | -6.800 0.999999978 0.000000146 16 | -6.700 0.999999958 0.000000276 17 | -6.600 0.999999920 0.000000511 18 | -6.500 0.999999849 0.000000932 19 | -6.400 0.999999723 0.000001670 20 | -6.300 0.999999498 0.000002942 21 | -6.200 0.999999105 0.000005097 22 | -6.100 0.999998431 0.000008683 23 | -6.000 0.999997293 0.000014554 24 | -5.900 0.999995401 0.000024005 25 | -5.800 0.999992309 0.000038969 26 | -5.700 0.999987331 0.000062279 27 | -5.600 0.999979441 0.000098012 28 | -5.500 0.999967125 0.000151923 29 | -5.400 0.999948187 0.000231995 30 | -5.300 0.999919496 0.000349097 31 | -5.200 0.999876655 0.000517756 32 | -5.100 0.999813597 0.000757035 33 | -5.000 0.999722082 0.001091485 34 | -4.900 0.999591101 0.001552137 35 | -4.800 0.999406175 0.002177466 36 | -4.700 0.999148569 0.003014256 37 | -4.600 0.998794427 0.004118267 38 | -4.500 0.998313849 0.005554591 39 | -4.400 0.997669962 0.007397591 40 | -4.300 0.996818016 0.009730295 41 | -4.200 0.995704571 0.012643159 42 | -4.100 0.994266851 0.016232112 43 | -4.000 0.992432322 0.020595851 44 | -3.900 0.990118582 0.025832397 45 | -3.800 0.987233631 0.032034971 46 | -3.700 0.983676579 0.039287325 47 | -3.600 0.979338843 0.047658716 48 | -3.500 0.974105853 0.057198759 49 | -3.400 0.967859270 0.067932445 50 | -3.300 0.960479677 0.079855636 51 | -3.200 0.951849687 0.092931337 52 | -3.100 0.941857369 0.107087044 53 | -3.000 0.930399881 0.122213418 54 | -2.900 0.917387157 0.138164458 55 | -2.800 0.902745495 0.154759279 56 | -2.700 0.886420892 0.171785501 57 | -2.600 0.868381957 0.189004169 58 | -2.500 0.848622271 0.206156009 59 | -2.400 0.827162053 0.222968755 60 | -2.300 0.804049066 0.239165233 61 | -2.200 0.779358684 0.254471803 62 | -2.100 0.753193114 0.268626779 63 | -2.000 0.725679802 0.281388431 64 | -1.900 0.696969061 0.292542221 65 | -1.800 0.667231036 0.301906945 66 | -1.700 0.636652122 0.309339558 67 | -1.600 0.605430961 0.314738516 68 | -1.500 0.573774198 0.318045543 69 | -1.400 0.541892124 0.319245849 70 | -1.300 0.509994383 0.318366852 71 | -1.200 0.478285870 0.315475570 72 | -1.100 0.446962951 0.310674866 73 | -1.000 0.416210105 0.304098784 74 | -0.900 0.386197065 0.295907232 75 | -0.800 0.357076521 0.286280263 76 | -0.700 0.328982392 0.275412215 77 | -0.600 0.302028689 0.263505933 78 | -0.500 0.276308949 0.250767272 79 | -0.400 0.251896179 0.237400053 80 | -0.300 0.228843301 0.223601597 81 | -0.200 0.207183986 0.209558915 82 | -0.100 0.186933854 0.195445624 83 | 0.000 0.168091934 0.181419571 84 | 0.100 0.150642330 0.167621190 85 | 0.200 0.134556018 0.154172511 86 | 0.300 0.119792709 0.141176787 87 | 0.400 0.106302721 0.128718659 88 | 0.500 0.094028817 0.116864772 89 | 0.600 0.082907953 0.105664756 90 | 0.700 0.072872924 0.095152500 91 | 0.800 0.063853860 0.085347620 92 | 0.900 0.055779577 0.076257058 93 | 1.000 0.048578763 0.067876743 94 | 1.100 0.042180992 0.060193257 95 | 1.200 0.036517582 0.053185457 96 | 1.300 0.031522284 0.046826015 97 | 1.400 0.027131832 0.041082856 98 | 1.500 0.023286351 0.035920459 99 | 1.600 0.019929640 0.031301023 100 | 1.700 0.017009350 0.027185487 101 | 1.800 0.014477062 0.023534398 102 | 1.900 0.012288293 0.020308645 103 | 2.000 0.010402429 0.017470054 104 | 2.100 0.008782605 0.014981856 105 | 2.200 0.007395547 0.012809046 106 | 2.300 0.006211384 0.010918644 107 | 2.400 0.005203434 0.009279861 108 | 2.500 0.004347977 0.007864200 109 | 2.600 0.003624031 0.006645482 110 | 2.700 0.003013114 0.005599836 111 | 2.800 0.002499018 0.004705636 112 | 2.900 0.002067590 0.003943413 113 | 3.000 0.001706520 0.003295741 114 | 3.100 0.001405143 0.002747112 115 | 3.200 0.001154255 0.002283795 116 | 3.300 0.000945945 0.001893694 117 | 3.400 0.000773431 0.001566204 118 | 3.500 0.000630927 0.001292071 119 | 3.600 0.000513508 0.001063253 120 | 3.700 0.000416999 0.000872795 121 | 3.800 0.000337871 0.000714702 122 | 3.900 0.000273152 0.000583831 123 | 4.000 0.000220344 0.000475784 124 | 4.100 0.000177359 0.000386816 125 | 4.200 0.000142452 0.000313749 126 | 4.300 0.000114170 0.000253894 127 | 4.400 0.000091308 0.000204987 128 | 4.500 0.000072871 0.000165125 129 | 4.600 0.000058035 0.000132716 130 | 4.700 0.000046124 0.000106431 131 | 4.800 0.000036582 0.000085163 132 | 4.900 0.000028955 0.000067996 133 | 5.000 0.000022872 0.000054172 134 | 5.100 0.000018030 0.000043066 135 | 5.200 0.000014185 0.000034164 136 | 5.300 0.000011138 0.000027045 137 | 5.400 0.000008728 0.000021365 138 | 5.500 0.000006826 0.000016843 139 | 5.600 0.000005328 0.000013250 140 | 5.700 0.000004151 0.000010403 141 | 5.800 0.000003228 0.000008151 142 | 5.900 0.000002505 0.000006374 143 | 6.000 0.000001941 0.000004974 144 | 6.100 0.000001501 0.000003874 145 | 6.200 0.000001158 0.000003011 146 | 6.300 0.000000892 0.000002336 147 | 6.400 0.000000686 0.000001809 148 | 6.500 0.000000527 0.000001398 149 | 6.600 0.000000403 0.000001078 150 | 6.700 0.000000308 0.000000830 151 | 6.800 0.000000235 0.000000638 152 | 6.900 0.000000179 0.000000489 153 | 7.000 0.000000136 0.000000375 154 | 7.100 0.000000104 0.000000286 155 | 7.200 0.000000079 0.000000218 156 | 7.300 0.000000059 0.000000166 157 | 7.400 0.000000045 0.000000126 158 | 7.500 0.000000034 0.000000096 159 | 7.600 0.000000025 0.000000073 160 | 7.700 0.000000019 0.000000055 161 | 7.800 0.000000014 0.000000041 162 | 7.900 0.000000011 0.000000031 163 | 8.000 0.000000008 0.000000023 164 | -------------------------------------------------------------------------------- /src/smarttables/twtable: -------------------------------------------------------------------------------- 1 | ### this is a table of TW using a Runge-Kutta solver suggested by Per-Olaf Persson 2 | ### algorithm coded in C by NP using NAG ODE solver 3 | ### arg r tail pdf 4 | -8.000 1.000000000 0.000000000 5 | -7.900 1.000000000 0.000000000 6 | -7.800 1.000000000 0.000000000 7 | -7.700 1.000000000 0.000000000 8 | -7.600 1.000000000 0.000000000 9 | -7.500 1.000000000 0.000000001 10 | -7.400 1.000000000 0.000000002 11 | -7.300 0.999999999 0.000000005 12 | -7.200 0.999999999 0.000000010 13 | -7.100 0.999999997 0.000000019 14 | -7.000 0.999999995 0.000000039 15 | -6.900 0.999999989 0.000000076 16 | -6.800 0.999999978 0.000000146 17 | -6.700 0.999999958 0.000000276 18 | -6.600 0.999999920 0.000000511 19 | -6.500 0.999999849 0.000000932 20 | -6.400 0.999999723 0.000001670 21 | -6.300 0.999999498 0.000002942 22 | -6.200 0.999999105 0.000005097 23 | -6.100 0.999998431 0.000008683 24 | -6.000 0.999997293 0.000014554 25 | -5.900 0.999995401 0.000024005 26 | -5.800 0.999992309 0.000038969 27 | -5.700 0.999987331 0.000062279 28 | -5.600 0.999979441 0.000098012 29 | -5.500 0.999967125 0.000151923 30 | -5.400 0.999948187 0.000231995 31 | -5.300 0.999919496 0.000349097 32 | -5.200 0.999876655 0.000517756 33 | -5.100 0.999813597 0.000757035 34 | -5.000 0.999722082 0.001091485 35 | -4.900 0.999591101 0.001552137 36 | -4.800 0.999406175 0.002177466 37 | -4.700 0.999148569 0.003014256 38 | -4.600 0.998794427 0.004118267 39 | -4.500 0.998313849 0.005554591 40 | -4.400 0.997669962 0.007397591 41 | -4.300 0.996818016 0.009730295 42 | -4.200 0.995704571 0.012643159 43 | -4.100 0.994266851 0.016232112 44 | -4.000 0.992432322 0.020595851 45 | -3.900 0.990118582 0.025832397 46 | -3.800 0.987233631 0.032034971 47 | -3.700 0.983676579 0.039287325 48 | -3.600 0.979338843 0.047658716 49 | -3.500 0.974105853 0.057198759 50 | -3.400 0.967859270 0.067932445 51 | -3.300 0.960479677 0.079855636 52 | -3.200 0.951849687 0.092931337 53 | -3.100 0.941857369 0.107087044 54 | -3.000 0.930399881 0.122213418 55 | -2.900 0.917387157 0.138164458 56 | -2.800 0.902745495 0.154759279 57 | -2.700 0.886420892 0.171785501 58 | -2.600 0.868381957 0.189004169 59 | -2.500 0.848622271 0.206156009 60 | -2.400 0.827162053 0.222968755 61 | -2.300 0.804049066 0.239165233 62 | -2.200 0.779358684 0.254471803 63 | -2.100 0.753193114 0.268626779 64 | -2.000 0.725679802 0.281388431 65 | -1.900 0.696969061 0.292542221 66 | -1.800 0.667231036 0.301906945 67 | -1.700 0.636652122 0.309339558 68 | -1.600 0.605430961 0.314738516 69 | -1.500 0.573774198 0.318045543 70 | -1.400 0.541892124 0.319245849 71 | -1.300 0.509994383 0.318366852 72 | -1.200 0.478285870 0.315475570 73 | -1.100 0.446962951 0.310674866 74 | -1.000 0.416210105 0.304098784 75 | -0.900 0.386197065 0.295907232 76 | -0.800 0.357076521 0.286280263 77 | -0.700 0.328982392 0.275412215 78 | -0.600 0.302028689 0.263505933 79 | -0.500 0.276308949 0.250767272 80 | -0.400 0.251896179 0.237400053 81 | -0.300 0.228843301 0.223601597 82 | -0.200 0.207183986 0.209558915 83 | -0.100 0.186933854 0.195445624 84 | 0.000 0.168091934 0.181419571 85 | 0.100 0.150642330 0.167621190 86 | 0.200 0.134556018 0.154172511 87 | 0.300 0.119792709 0.141176787 88 | 0.400 0.106302721 0.128718659 89 | 0.500 0.094028817 0.116864772 90 | 0.600 0.082907953 0.105664756 91 | 0.700 0.072872924 0.095152500 92 | 0.800 0.063853860 0.085347620 93 | 0.900 0.055779577 0.076257058 94 | 1.000 0.048578763 0.067876743 95 | 1.100 0.042180992 0.060193257 96 | 1.200 0.036517582 0.053185457 97 | 1.300 0.031522284 0.046826015 98 | 1.400 0.027131832 0.041082856 99 | 1.500 0.023286351 0.035920459 100 | 1.600 0.019929640 0.031301023 101 | 1.700 0.017009350 0.027185487 102 | 1.800 0.014477062 0.023534398 103 | 1.900 0.012288293 0.020308645 104 | 2.000 0.010402429 0.017470054 105 | 2.100 0.008782605 0.014981856 106 | 2.200 0.007395547 0.012809046 107 | 2.300 0.006211384 0.010918644 108 | 2.400 0.005203434 0.009279861 109 | 2.500 0.004347977 0.007864200 110 | 2.600 0.003624031 0.006645482 111 | 2.700 0.003013114 0.005599836 112 | 2.800 0.002499018 0.004705636 113 | 2.900 0.002067590 0.003943413 114 | 3.000 0.001706520 0.003295741 115 | 3.100 0.001405143 0.002747112 116 | 3.200 0.001154255 0.002283795 117 | 3.300 0.000945945 0.001893694 118 | 3.400 0.000773431 0.001566204 119 | 3.500 0.000630927 0.001292071 120 | 3.600 0.000513508 0.001063253 121 | 3.700 0.000416999 0.000872795 122 | 3.800 0.000337871 0.000714702 123 | 3.900 0.000273152 0.000583831 124 | 4.000 0.000220344 0.000475784 125 | 4.100 0.000177359 0.000386816 126 | 4.200 0.000142452 0.000313749 127 | 4.300 0.000114170 0.000253894 128 | 4.400 0.000091308 0.000204987 129 | 4.500 0.000072871 0.000165125 130 | 4.600 0.000058035 0.000132716 131 | 4.700 0.000046124 0.000106431 132 | 4.800 0.000036582 0.000085163 133 | 4.900 0.000028955 0.000067996 134 | 5.000 0.000022872 0.000054172 135 | 5.100 0.000018030 0.000043066 136 | 5.200 0.000014185 0.000034164 137 | 5.300 0.000011138 0.000027045 138 | 5.400 0.000008728 0.000021365 139 | 5.500 0.000006826 0.000016843 140 | 5.600 0.000005328 0.000013250 141 | 5.700 0.000004151 0.000010403 142 | 5.800 0.000003228 0.000008151 143 | 5.900 0.000002505 0.000006374 144 | 6.000 0.000001941 0.000004974 145 | 6.100 0.000001501 0.000003874 146 | 6.200 0.000001158 0.000003011 147 | 6.300 0.000000892 0.000002336 148 | 6.400 0.000000686 0.000001809 149 | 6.500 0.000000527 0.000001398 150 | 6.600 0.000000403 0.000001078 151 | 6.700 0.000000308 0.000000830 152 | 6.800 0.000000235 0.000000638 153 | 6.900 0.000000179 0.000000489 154 | 7.000 0.000000136 0.000000375 155 | 7.100 0.000000104 0.000000286 156 | 7.200 0.000000079 0.000000218 157 | 7.300 0.000000059 0.000000166 158 | 7.400 0.000000045 0.000000126 159 | 7.500 0.000000034 0.000000096 160 | 7.600 0.000000025 0.000000073 161 | 7.700 0.000000019 0.000000055 162 | 7.800 0.000000014 0.000000041 163 | 7.900 0.000000011 0.000000031 164 | 8.000 0.000000008 0.000000023 165 | -------------------------------------------------------------------------------- /bin/ploteig: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/perl -w 2 | 3 | ### ploteig -i eigfile -p pops -c a:b [-t title] [-s stem] [-g gstem] [-o outfile] [-x] [-k] [-y] [-z sep] -r colorstring -m xmul -n ymul 4 | use Getopt::Std ; 5 | use File::Basename ; 6 | 7 | ## pops : separated -x = make postscript and pdf -z use another separator 8 | ## -k keep intermediate files 9 | ## NEW if pops is a file names are read one per line 10 | ## scaling on x, y axes 11 | 12 | getopts('i:o:p:c:s:d:z:t:r:m:n:g:xkyq',\%opts) ; 13 | $postscmode = $opts{"x"} ; 14 | $oldkeystyle = $opts{"y"} ; 15 | $nopoptitle = $opts{"q"} ; 16 | $kflag = $opts{"k"} ; 17 | $keepflag = 1 if ($kflag) ; 18 | $keepflag = 1 unless ($postscmode) ; 19 | 20 | $zsep = ":" ; 21 | if (defined $opts{"z"}) { 22 | $zsep = $opts{"z"} ; 23 | $zsep = "\+" if ($zsep eq "+") ; 24 | } 25 | 26 | if (defined $opts{"r"}) { 27 | $colorstr = $opts{"r"} ; 28 | setcolor($colorstr) ; 29 | } 30 | $xmul = $opts{"m"} ; 31 | $xmul = 1 unless (defined $xmul) ; 32 | 33 | $ymul = $opts{"n"} ; 34 | $ymul = 1 unless (defined $ymul) ; 35 | 36 | $title = "" ; 37 | if (defined $opts{"t"}) { 38 | $title = $opts{"t"} ; 39 | } 40 | if (defined $opts{"i"}) { 41 | $infile = $opts{"i"} ; 42 | } 43 | else { 44 | usage() ; 45 | exit 0 ; 46 | } 47 | 48 | open (FF, $infile) || die "can't open $infile\n" ; 49 | @L = () ; 50 | chomp @L ; 51 | $nf = 0 ; 52 | foreach $line (@L) { 53 | next if ($line =~ /\#/) ; 54 | @Z = split " ", $line ; 55 | $x = @Z ; 56 | $nf = $x if ($nf < $x) ; 57 | } 58 | printf "## number of fields: %d\n", $nf ; 59 | $popcol = $nf-1 ; 60 | 61 | 62 | if (defined $opts{"p"}) { 63 | $pops = $opts{"p"} ; 64 | } 65 | else { 66 | die "p parameter compulsory\n" ; 67 | } 68 | 69 | $popsname = setpops ($pops) ; 70 | print "$popsname\n" ; 71 | 72 | $c1 = 1; $c2 =2 ; 73 | if (defined $opts{"c"}) { 74 | $cols = $opts{"c"} ; 75 | ($c1, $c2) = split ":", $cols ; 76 | die "bad c param: $cols\n" unless (defined $cols) ; 77 | } 78 | 79 | $stem = "$infile.$c1:$c2" ; 80 | if (defined $opts{"s"}) { 81 | $stem = $opts{"s"} ; 82 | } 83 | 84 | if (defined $opts{"g"}) { 85 | $stem = $gstem = $opts{"g"} ; 86 | } 87 | 88 | $gstem = "$stem.$popsname" unless (defined $gstem) ; 89 | $gnfile = "$gstem.xtxt" ; 90 | 91 | if (defined $opts{"o"}) { 92 | $gnfile = $opts{"o"} ; 93 | } 94 | 95 | 96 | @T = () ; ## trash 97 | open (GG, ">$gnfile") || die "can't open $gnfile\n" ; 98 | print GG "## " unless ($postscmode) ; 99 | print GG "set terminal postscript color noenhanced\n" ; 100 | print GG "set title \"$title\" \n" ; 101 | print GG "set key outside\n" unless ($oldkeystyle) ; 102 | print GG "set xlabel \"eigenvector $c1\" \n" if ($xmul == 1) ; 103 | print GG "set xlabel \"eigenvector $c1 (* $xmul) \" \n" if ($xmul != 1) ; 104 | print GG "set ylabel \"eigenvector $c2\" \n" if ($ymul == 1) ; 105 | print GG "set ylabel \"eigenvector $c1 (* $ymul) \" \n" if ($ymul != 1) ; 106 | print GG "plot " ; 107 | $np = @P ; 108 | $lastpop = $P[$np-1] ; 109 | $d1 = $c1+1 ; 110 | $d2 = $c2+1 ; 111 | foreach $pop (@P) { 112 | $dfile = "$stem:$pop" ; 113 | push @T, $dfile ; 114 | print GG " \"$dfile\" using (\$$d1)*$xmul:(\$$d2)*$ymul " ; 115 | print GG "notitle " if (defined $nopoptitle) ; 116 | print GG "title \"$pop\" " unless (defined $nopoptitle) ; 117 | if (defined $COL{$pop}) { 118 | $color = $COL{$pop} ; 119 | print GG "lt rgb \"$color\" " ; 120 | } 121 | print GG ", \\\n" unless ($pop eq $lastpop) ; 122 | open (YY, ">$dfile") || die "can't open $dfile\n" ; 123 | foreach $line (@L) { 124 | next if ($line =~ /\#/) ; 125 | @Z = split " ", $line ; 126 | next unless (defined $Z[$popcol]) ; 127 | next unless ($Z[$popcol] eq $pop) ; 128 | print YY "$line\n" ; 129 | } 130 | close YY ; 131 | } 132 | print GG "\n" ; 133 | print GG "## " if ($postscmode) ; 134 | print GG "pause 9999\n" ; 135 | close GG ; 136 | 137 | if ($postscmode) { 138 | $psfile = "$stem.ps" ; 139 | 140 | if ($gnfile =~ /xtxt/) { 141 | $psfile = $gnfile ; 142 | $psfile =~ s/xtxt/ps/ ; 143 | } 144 | system "gnuplot < $gnfile > $psfile" ; 145 | system "/home/np29/bin/fixgreen $psfile" ; 146 | system "ps2pdf $psfile " ; 147 | } 148 | unlink (@T) unless $keepflag ; 149 | 150 | sub setcolor { 151 | my ($colorst) = @_ ; 152 | local ($cp, $pop, $color, @CP, $line) ; 153 | if (-r $colorst) { 154 | open (C1, $colorst) || die "can't open $colorst\n" ; 155 | foreach $line () { 156 | chomp $line ; 157 | ($pop, $color) = split " ", $line ; 158 | next if ($pop =~ /\#/) ; 159 | next unless (defined $color) ; 160 | print STDERR "setting color for $pop to $color\n" ; 161 | $COL{$pop} = $color ; 162 | } 163 | close C1 ; 164 | return ; 165 | } 166 | 167 | @CP = split " ", $colorst ; 168 | foreach $cp (@CP) { 169 | ($pop, $color) = split ":", $cp ; 170 | $COL{$pop} = $color ; 171 | } 172 | } 173 | 174 | sub usage { 175 | 176 | print "ploteig -i eigfile -p pops -c a:b [-t title] [-s stem] [-o outfile] [-x] [-k] -c colorstringh [-m xmul] [-n ymul]\n" ; 177 | print "-i eigfile input file first col indiv-id last col population\n" ; 178 | print "## as output by smartpca in outputvecs \n" ; 179 | print "-c a:b a, b columns to plot. 1:2 would be common and leading 2 eigenvectors\n" ; 180 | print "-p pops Populations to plot. : delimited. eg -p Bantu:San:French\n" ; 181 | print "## pops can also be a filename. List populations 1 per line\n" ; 182 | print "[-s stem] stem will start various output files\n" ; 183 | print "[-o ofile] ofile will be gnuplot control file. Should have xtxt suffix\n"; 184 | print "[-x] make ps and pdf files\n" ; 185 | print "[-k] keep various intermediate files although -x set\n" ; 186 | print "## necessary if .xtxt file is to be hand edited\n" ; 187 | print "[-r colorstringpairs or colorstringfile]\n" ; 188 | print "[-g gstem] make gstem.xtxt gstem.ps gstem.pdf \n" ; 189 | print "[-y] put key at top right inside box (old mode)\n" ; 190 | print "[-t] title (legend)\n" ; 191 | 192 | print "The xtxt file is a gnuplot file and can be easily hand edited. Intermediate files 193 | needed if you want to make your own plot\n" ; 194 | 195 | } 196 | sub setpops { 197 | my ($pops) = @_ ; 198 | local (@a, $d, $b, $e) ; 199 | 200 | if (-e $pops) { 201 | open (FF1, $pops) || die "can't open $pops\n" ; 202 | @P = () ; 203 | foreach $line () { 204 | ($a) = split " ", $line ; 205 | next unless (defined $a) ; 206 | next if ($a =~ /\#/) ; 207 | push @P, $a ; 208 | } 209 | $out = join ":", @P ; 210 | print "## pops: $out\n" ; 211 | ($b, $d , $e) = fileparse($pops) ; 212 | return $b ; 213 | } 214 | @P = split $zsep, $pops ; 215 | return $pops ; 216 | 217 | } 218 | -------------------------------------------------------------------------------- /include/admutils.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #define IDSIZE 40 5 | 6 | #ifndef ADMUTILS 7 | 8 | typedef struct 9 | { 10 | char ID[IDSIZE]; 11 | char *egroup; 12 | char gender; /* 'M' or 'F' */ 13 | double theta_mode; /* most likely theta on mesh */ 14 | double lambda_mode; /* mean of log10(lambda) from probability distribution */ 15 | 16 | double Xtheta_mode; /* most likely theta on mesh */ 17 | double Xlambda_mode; /* most likely lambda on mesh */ 18 | 19 | int idnum; 20 | int affstatus; /* affected status */ 21 | int ignore; /* YES => do not use */ 22 | int flag; 23 | double thetatrue; 24 | double Xthetatrue; 25 | double lambdatrue; 26 | double Xlambdatrue; 27 | double totgamms[3]; 28 | double totscore; 29 | double rawqval; 30 | double qval; 31 | } Indiv; 32 | 33 | typedef struct 34 | { 35 | char ID[IDSIZE]; 36 | int chrom; 37 | char cchrom[6]; 38 | double genpos; 39 | double physpos; 40 | double aa_cauc_freq; /* frequencies in ancestral pop to AA */ 41 | double aa_af_freq; 42 | double cauc_freq; /* frequencies of variant allele */ 43 | double af_freq; 44 | double cftrue; 45 | double aftrue; 46 | double aa_cftrue; 47 | double aa_aftrue; 48 | int markernum; /* marker number */ 49 | char *pbuff; 50 | char *ebuff; /* for random ethnic path */ 51 | int isfake; /* 1 if fake marker else 0 */ 52 | int isrfake; 53 | /* real marker treated as fake. Used for 2D scoring */ 54 | int ignore; 55 | int ngtypes; /* number of gtypes */ 56 | int *gtypes; 57 | int af_nn[2]; 58 | int cauc_nn[2]; 59 | double *modelscores; 60 | double *totmodelscores; 61 | double score; 62 | double weight; 63 | double estgenpos; 64 | double estdis; 65 | double dis; 66 | double esum; 67 | double lsum; 68 | double gpsum; 69 | double gpnum; 70 | void *pcupt; 71 | int tagnumber; 72 | char alleles[2]; 73 | int chimpfudge; 74 | } SNP; 75 | 76 | typedef struct 77 | { 78 | char ID[IDSIZE]; 79 | Indiv *father; 80 | Indiv *mother; 81 | Indiv *child; 82 | int findex; 83 | int mindex; 84 | int cindex; 85 | int base; 86 | } TRIO; 87 | // ?index into original Indiv array. base is work variable used in phasetrio to store init index in new array 88 | 89 | typedef struct 90 | { 91 | char gname[IDSIZE]; 92 | SNP **snpm; 93 | Indiv **indivm; 94 | int numsnps; 95 | int numindivs; 96 | int rlen; 97 | int fdes; 98 | int snpindex; 99 | unsigned char *buff; 100 | } genofile; 101 | 102 | typedef struct 103 | { 104 | double xd[4]; 105 | double xc[9]; 106 | double ap1; 107 | double ap2; 108 | double cp1; 109 | double cp2; 110 | double rpowersum; 111 | double crpowersum; 112 | double gammasum[2]; 113 | double gammanum[2]; 114 | int pubfmodern; 115 | } SNPMC; 116 | // gammasum for cases/controls 117 | #endif 118 | #define ADMUTILS 119 | 120 | void loadstats (FILE * statsfile, Indiv * indiv_array, int *numindivs); 121 | void loadXstats (FILE * Xstatsfile, Indiv * indiv_array, int numindivs, 122 | int *numloaded); 123 | 124 | void sett1 (double *tt, double theta, int numstates); 125 | void sett1r (double *tt, double theta, int numstate, double risks); 126 | void gettln (SNP * cupt, Indiv * indx, 127 | double *ptheta, double *plambda, int *pnumstates, int *pignore); 128 | 129 | void puttln (SNP * cupt, Indiv * indx, double theta, double lambda); 130 | 131 | 132 | /* UTILITY FUNCTIONS */ 133 | 134 | int countcol (char *fname); 135 | int countcolumns (FILE * fp); 136 | 137 | void fataly (const char *name); 138 | int compare_doubles (const void *a, const void *b); 139 | 140 | void pcheck (char *name, char x); 141 | void printm (double **M, int numstates); 142 | int numvalids (Indiv * indx, SNP ** snpmarkers, int fc, int lc); 143 | void gethpos (int *fc, int *lc, SNP ** snpm, int numsnps, 144 | int xchrom, int lo, int hi); 145 | int numvalidgtypes (SNP * cupt); 146 | double malefreq (Indiv ** indivmarkers, int numindivs); 147 | int isimatch (int a, int b); 148 | void makedir (char *dirname); 149 | int indxindex (char **namelist, int len, char *strid); 150 | int indindex (Indiv ** indivmarkers, int numindivs, char *indid); 151 | int snpindex (SNP ** snpmarkers, int numsnps, char *snpid); 152 | void inddupcheck (Indiv ** indivmarkers, int numindivs); 153 | void freesnpindex (); 154 | int ignoresnp (SNP * cupt); 155 | double entrop (double *a, int n); 156 | double xxlog2 (double t); 157 | void testnan (double *a, int n); 158 | void hap2dip (SNP * cupt); 159 | void flipalleles (SNP * cupt); 160 | void flipalleles_phased (SNP * cupt); 161 | int getgtypes (SNP * cupt, int k); 162 | void putgtypes (SNP * cupt, int k, int val); 163 | int getep (SNP * cupt, int k); 164 | void putep (SNP * cupt, int k, int val); 165 | int hasharr (char **xarr, int nxarr); 166 | void wbuff (unsigned char *buff, int num, int g); 167 | int rbuff (unsigned char *buff, int num); 168 | int ridfile (char *fname); 169 | double hwcheck (SNP * cupt, double *cc); 170 | double hwcheckx (SNP * cupt, Indiv ** indm, double *cc); 171 | void cntit (double *xc, SNP * cupt1, SNP * cupt2); 172 | int setid2pops (char *idpopstring, Indiv ** indmarkers, int numindivs) ; 173 | 174 | // dup routines 175 | void setfastdupnum (int num); 176 | void setfastdupthresh (double thresh, double kill); 177 | void killxhets (SNP ** snpmarkers, Indiv ** indivmarkers, int numsnps, 178 | int numindivs); 179 | void fastdupcheck (SNP ** snpmarkers, Indiv ** indivmarkers, int numsnps, 180 | int numindivs); 181 | int grabgtypes (int *gtypes, SNP * cupt, int numindivs); 182 | int kcode (int *w, int len, int base); 183 | void cdup (SNP ** snpm, Indiv ** indm, int nsnp, int *buff, int lbuff); 184 | void printdup (SNP ** snpm, int nsnp, Indiv * inda, Indiv * indb, int nmatch, 185 | int nnomatch); 186 | void killdup (Indiv * inda, Indiv * indb, SNP ** snpm, int nsnp); 187 | double kurtosis (double *a, int n); 188 | int getlist (char *name, char **list); 189 | void printvers (char *progname, char *vers); 190 | int numvalidind (Indiv ** indivmarkers, int numind); 191 | void numvalidgtallind (int *x, SNP ** snpm, int numsnps, int numind); 192 | int numvalidgtind (SNP ** snpm, int numsnps, int ind); 193 | int numvalidgt (Indiv ** indivmarkers, SNP * cupt); 194 | int numvalidgtx (Indiv ** indivmarkers, SNP * cupt, int affst); 195 | int isxmale (SNP * cupt, Indiv * indx); 196 | 197 | void printmatz (double *ww, char **eglist, int n); 198 | void printmatz5 (double *ww, char **eglist, int n); 199 | void printmatz10 (double *ww, char **eglist, int n); 200 | char *get3 (char *ss); 201 | char *getshort (char *ss, int n); 202 | 203 | 204 | #undef max 205 | #define max(A,B) ((A) > (B) ? (A) : (B)) 206 | 207 | #define MAXNUMR 200 208 | // max number models 209 | 210 | #define CNULL '\0' 211 | -------------------------------------------------------------------------------- /include/mcio.h: -------------------------------------------------------------------------------- 1 | #ifndef _MCIO_ 2 | #define _MCIO_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | #define MAXSTR 512 14 | #define LONGSTR 10000 15 | #define MAXFF 200 16 | #define MAXCH 100 17 | #define MTCHROM 90 18 | #define XYCHROM 91 19 | #define BADCHROM 99 20 | #define GDISMUL 1000000 21 | // multiplier for gdis to make integer for sort 22 | 23 | enum outputmodetype { 24 | ANCESTRYMAP, 25 | EIGENSTRAT, 26 | PED, 27 | PACKEDPED, 28 | PACKEDANCESTRYMAP } ; 29 | 30 | 31 | typedef struct { 32 | char ID[IDSIZE]; 33 | double gpos ; 34 | double ppos ; 35 | int chrom ; 36 | char cchrom[6] ; 37 | int nn[4] ; 38 | int ignore ; 39 | int isrfake ; 40 | char alleles[2] ; 41 | int inputrow ; 42 | int cuptnum ; 43 | int chimpfudge ; 44 | } SNPDATA ; 45 | 46 | 47 | int numfakes(SNPDATA **snpraw, int *snpindx, int nreal, double spacing) ; 48 | double nextmesh(double val, double spacing) ; 49 | double interp (double l, double r, double x, double al, double ar) ; 50 | 51 | int 52 | loadsnps(SNP **snpm, SNPDATA **snpraw, 53 | int *snpindx, int nreal, double spacing, int *numignore) ; 54 | 55 | int readsnpdata(SNPDATA **snpraw, char *fname) ; 56 | int readinddata(Indiv **indivmarkers, char *fname) ; 57 | int readindpeddata(Indiv **indivmarkers, char *fname) ; 58 | void pedname(char *name, char *sx0, char *sx1) ; 59 | 60 | int readtldata(Indiv **indivmarkers, int numind, char *fname) ; 61 | int readindval(Indiv **indivmarkers, int numind, char *fname) ; 62 | int readfreqdata(SNP **snpm, int numsnps, char *fname) ; 63 | void clearsnp(SNP *cupt) ; 64 | int rmindivs(SNP **snpm, int numsnps, Indiv **indivmarkers, int numindivs) ; 65 | int rmsnps(SNP **snpm, int numsnps, char *deletesnpoutname) ; 66 | void clearind(Indiv **indm, int numind) ; 67 | void cleartg(Indiv **indm, int nind) ; 68 | 69 | double mknn(int *nn, int n0, int n1) ; 70 | void clearsnpord() ; 71 | int getsnps(char *snpfname, SNP ***snpmarkpt, double spacing, 72 | char *badsnpname, int *nignore, int numrisks) ; 73 | int getsizex(char *fname) ; 74 | int getindivs(char *indivfname, Indiv ***indmarkpt) ; 75 | 76 | void setfamilypopnames(int fpop) ; 77 | int setstatus(Indiv **indm, int numindivs, char *smatch) ; 78 | int setstatusv(Indiv **indm, int numindivs, char *smatch, int val) ; 79 | int setstatuslist(Indiv **indm, int numindivs, char **smatchlist, int slen) ; 80 | 81 | long getgenos(char *genoname, SNP **snpmarkers, Indiv **indivmarkers, 82 | int numsnps, int numindivs, int nignore) ; 83 | void getgenos_list(char *genotypelist, SNP **snpmarkers, Indiv **indivmarkers, 84 | int numsnps, int numindivs, int nignore) ; 85 | void printsnps(char *snpoutfilename, SNP **snpm, int num, 86 | Indiv **indm, int printfake, int printvalids) ; 87 | int checkxval(SNP *cupt, Indiv *indx, int val) ; 88 | void printdata(char *genooutfilename, char *indoutfilename, 89 | SNP **snpm, Indiv **indiv, int numsnps,int numind, int packmode); 90 | int readgdata(Indiv **indivmarkers, int numind, char *gname) ; 91 | int numvalidind(Indiv **indivmarkers, int numind) ; 92 | int numvalidgtind(SNP **snpm, int numsnps, int ind) ; 93 | int numvalidgt(Indiv **indivmarkers, SNP *cupt) ; 94 | int numvalidgtx(Indiv **indivmarkers, SNP *cupt, int affst) ; 95 | int getweights(char *fname, SNP **snpm, int numsnps) ; 96 | int getindvals (char *fname, Indiv ** indivmarkers, int numindivs) ; 97 | void outpack(char *genooutfilename, SNP **snpm, Indiv **indiv, int numsnps, int numind) ; 98 | int ispack(char *gname) ; 99 | int iseigenstrat(char *gname) ; 100 | void inpack(char *genooutfilename, SNP **snpm, Indiv **indiv, int numsnps, int numind) ; 101 | int inpack2(char *genooutfilename, SNP **snpm, Indiv **indiv, int numsnps, int numind) ; 102 | int ineigenstrat(char *genooutfilename, SNP **snpm, Indiv **indiv, int numsnps, int numind) ; 103 | void setepath(SNP **snpm, int n) ; 104 | void clearepath(char *xpack) ; 105 | long bigread(int fdes, char *packg, long numbytes) ; 106 | 107 | // pedfile support 108 | int getpedgenos(char *genoname, SNP **snpmarkers, Indiv **indivmarkers, 109 | int numsnps, int numindivs, int nignore) ; 110 | void genopedcnt(char *genoname, int **gcounts, int nsnp) ; 111 | 112 | int pedval(char *sx) ; 113 | int xpedval(char c) ; 114 | int ptoachrom(char *ss) ; 115 | 116 | void setgref(int **gcounts, int nsnp, int *gvar, int *gref) ; 117 | void cleargdata(SNP **snpmarkers, int numsnps, int numindivs) ; 118 | void setgenotypename(char **gname, char *iname) ; 119 | void settersemode(int mode) ; 120 | 121 | void dobadsnps(SNPDATA **snpraw, int nreal, char *badsnpname) ; 122 | int snprawindex(SNPDATA **snpraw, int nreal, char *sname) ; 123 | int readsnpmapdata(SNPDATA **snpraw, char *fname) ; 124 | int checkfake(char *ss) ; 125 | void setbadpedignore() ; 126 | int setsdpos( SNPDATA *sdpt, int pos) ; 127 | 128 | void 129 | outeigenstrat(char *snpname, char *indname, char *gname, 130 | SNP **snpm, Indiv **indiv, int numsnps, int numind) ; 131 | 132 | void 133 | outped(char *snpname, char *indname, char *gname, 134 | SNP **snpm, Indiv **indiv, int numsnps, int numind, int ogmode) ; 135 | 136 | void 137 | outpackped(char *snpname, char *indname, char *gname, SNP **snpm, Indiv **indiv, 138 | int numsnps, int numind, int ogmode) ; 139 | 140 | void setbedbuff(char *buff, int *gtypes, int numind) ; 141 | int bedval(int g) ; 142 | int str2chrom(char *ss) ; 143 | 144 | void outindped(char *indname, Indiv **indiv, int numind, int ogmode) ; 145 | 146 | void 147 | printmap(char *snpname, SNP **snpm, int numsnps, Indiv **indiv) ; 148 | 149 | int maxlinelength(char *fname) ; 150 | int checksize(int numindivs, int numsnps, enum outputmodetype outputmode) ; 151 | 152 | void setomode(enum outputmodetype *outmode, char *omode) ; 153 | 154 | void 155 | outfiles(char *snpname, char *indname, char *gname, SNP **snpm, 156 | Indiv **indiv, int numsnps, int numind, int packem, int ogmode) ; 157 | 158 | void snpdecimate(SNP **snpm, int nsnp, int decim, int mindis, int maxdis) ; 159 | void decimate(SNP **cbuff, int n, int decim, int mindis, int maxdis) ; 160 | int vvadjust(double *cc, int n, double *pmean) ; 161 | int killhir2(SNP **snpm, int numsnps, int numind, double physlim, double genlim, double rhothresh) ; 162 | void freecupt(SNP **cupt) ; 163 | void freeped() ; 164 | void cntpops(int *count, Indiv **indm, int numindivs, char **eglist, int numeg) ; 165 | void printalleles(SNP *cupt, FILE *fff) ; 166 | char *getpackgenos() ; 167 | void clearpackgenos() ; 168 | void setchr(int mode) ; 169 | void setchimpmode(int mode) ; 170 | 171 | int genoopenit(genofile **gfile, char *geno2name, SNP **snp2m, 172 | Indiv **indiv2m, int numsnp2, int numindiv2, int nignore) ; 173 | int genoreadit(genofile *gfile, SNP **pcupt) ; 174 | void genocloseit(genofile *gfile) ; 175 | 176 | void putped(int num) ; 177 | void getped(int num) ; 178 | 179 | void logdeletedsnp(char *snpname, char *cmnt, char *deletesnpoutname); 180 | void sortsnps(SNP **snpa, SNP **snpb, int n) ; 181 | void setpordercheck (int mode) ; 182 | void putsnpordered(int mode) ; 183 | int getsnpordered() ; 184 | void ckdup(char **eglist, int n) ; 185 | 186 | 187 | #endif 188 | -------------------------------------------------------------------------------- /src/eigensrc/eigx.c: -------------------------------------------------------------------------------- 1 | // LAPACK-using version of eigensrc/eigx.f, easier to build on OS X 2 | // Christopher Chang (chrchang@alumni.caltech.edu), BGI Cognitive Genomics Lab 3 | 4 | #include 5 | #include 6 | 7 | #ifdef __APPLE__ 8 | #include 9 | #else 10 | 11 | #if __LP64__ 12 | typedef int __CLPK_integer; 13 | #else 14 | typedef long int __CLPK_integer; 15 | #endif 16 | typedef double __CLPK_doublereal; 17 | 18 | int dspev_ (char *jobz, char *uplo, __CLPK_integer * n, 19 | __CLPK_doublereal * ap, __CLPK_doublereal * w, 20 | __CLPK_doublereal * z__, __CLPK_integer * ldz, 21 | __CLPK_doublereal * work, __CLPK_integer * info); 22 | 23 | int dpotrf_ (char *uplo, __CLPK_integer * n, __CLPK_doublereal * a, 24 | __CLPK_integer * lda, __CLPK_integer * info); 25 | 26 | int dgetrf_ (__CLPK_integer * m, __CLPK_integer * n, __CLPK_doublereal * a, 27 | __CLPK_integer * lda, __CLPK_integer * ipiv, 28 | __CLPK_integer * info); 29 | 30 | int dgetri_ (__CLPK_integer * n, __CLPK_doublereal * a, __CLPK_integer * lda, 31 | __CLPK_integer * ipiv, __CLPK_doublereal * work, 32 | __CLPK_integer * lwork, __CLPK_integer * info); 33 | 34 | int dgetrs_ (char *trans, __CLPK_integer * n, __CLPK_integer * nrhs, 35 | __CLPK_doublereal * a, __CLPK_integer * lda, 36 | __CLPK_integer * ipiv, __CLPK_doublereal * b, 37 | __CLPK_integer * ldb, __CLPK_integer * info); 38 | 39 | int dsygv_ (__CLPK_integer * itype, char *jobz, char *uplo, 40 | __CLPK_integer * n, __CLPK_doublereal * a, __CLPK_integer * lda, 41 | __CLPK_doublereal * b, __CLPK_integer * ldb, 42 | __CLPK_doublereal * w, __CLPK_doublereal * work, 43 | __CLPK_integer * lwork, __CLPK_integer * info); 44 | #endif // end !__APPLE__ 45 | 46 | void 47 | mem_error () 48 | { 49 | fprintf (stderr, "CM\n"); 50 | exit (1); 51 | } 52 | 53 | void 54 | inverse_error (char *procname, int info) 55 | { 56 | if (info < 0) { 57 | fprintf (stderr, "error (%s): illegal argument %d\n", procname, -info); 58 | } 59 | else { 60 | fprintf (stderr, "error (%s): singular matrix %d\n", procname, info); 61 | } 62 | exit (1); 63 | } 64 | 65 | void 66 | eigx_ (double *pmat, double *ev, __CLPK_integer * n) 67 | { 68 | char jobz = 'N'; 69 | char uplo = 'L'; 70 | __CLPK_integer ldz = *n; 71 | __CLPK_integer info; 72 | double *z; 73 | double *work; 74 | z = (double *) malloc (ldz * ldz * sizeof (double)); 75 | if (!z) { 76 | mem_error (); 77 | } 78 | work = (double *) malloc (3 * ldz * sizeof (double)); 79 | if (!work) { 80 | free (z); 81 | mem_error (); 82 | } 83 | dspev_ (&jobz, &uplo, n, pmat, ev, z, &ldz, work, &info); 84 | free (z); 85 | free (work); 86 | if (info) { 87 | #if __LP64__ 88 | fprintf (stderr, "INFO: %d\n", info); 89 | #else 90 | fprintf (stderr, "INFO: %ld\n", info); 91 | #endif 92 | exit (1); 93 | } 94 | } 95 | 96 | void 97 | eigxv_ (double *pmat, double *eval, double *evec, __CLPK_integer * n) 98 | { 99 | char jobz = 'V'; 100 | char uplo = 'L'; 101 | __CLPK_integer ldz = *n; 102 | __CLPK_integer info; 103 | double *work = (double *) malloc (3 * ldz * sizeof (double)); 104 | if (!work) { 105 | mem_error (); 106 | } 107 | dspev_ (&jobz, &uplo, n, pmat, eval, evec, &ldz, work, &info); 108 | free (work); 109 | if (info) { 110 | #if __LP64__ 111 | fprintf (stderr, "INFO: %d\n", info); 112 | #else 113 | fprintf (stderr, "INFO: %ld\n", info); 114 | #endif 115 | exit (1); 116 | } 117 | } 118 | 119 | void 120 | cdc_ (double *pmat, __CLPK_integer * n) 121 | { 122 | char uplo = 'L'; 123 | __CLPK_integer lda = *n; 124 | __CLPK_integer info; 125 | dpotrf_ (&uplo, n, pmat, &lda, &info); 126 | if (info) { 127 | if (info < 0) { 128 | #if __LP64__ 129 | fprintf (stderr, "error (CDC): illegal argument %d\n", -info); 130 | #else 131 | fprintf (stderr, "error (CDC): illegal argument %ld\n", -info); 132 | #endif 133 | } 134 | else { 135 | #if __LP64__ 136 | fprintf (stderr, "error (CDC): minor not positive definite %d\n", info); 137 | #else 138 | fprintf (stderr, "error (CDC): minor not positive definite %ld\n", 139 | info); 140 | #endif 141 | } 142 | exit (1); 143 | } 144 | } 145 | 146 | void 147 | inverse_ (double *pmat, __CLPK_integer * n) 148 | { 149 | __CLPK_integer lwork = (*n) * (*n); 150 | __CLPK_integer info; 151 | __CLPK_integer *ipiv; 152 | double *work; 153 | ipiv = (__CLPK_integer *) malloc ((*n) * sizeof (__CLPK_integer)); 154 | if (!ipiv) { 155 | mem_error (); 156 | } 157 | work = (double *) malloc (lwork * sizeof (double)); 158 | if (!work) { 159 | free (ipiv); 160 | mem_error (); 161 | } 162 | dgetrf_ (n, n, pmat, n, ipiv, &info); 163 | if (info) { 164 | free (ipiv); 165 | free (work); 166 | inverse_error ("INVERSE", info); 167 | exit (1); 168 | } 169 | dgetri_ (n, pmat, n, ipiv, work, &lwork, &info); 170 | free (ipiv); 171 | free (work); 172 | if (info) { 173 | inverse_error ("INVERSE", info); 174 | } 175 | } 176 | 177 | void 178 | solve_ (double *pmat, double *v, __CLPK_integer * n) 179 | { 180 | __CLPK_integer ldb = *n; 181 | char trans = 'N'; 182 | __CLPK_integer nrhs = 1; 183 | double *work; 184 | __CLPK_integer *ipiv; 185 | __CLPK_integer info; 186 | ipiv = (__CLPK_integer *) malloc (ldb * sizeof (__CLPK_integer)); 187 | if (!ipiv) { 188 | mem_error (); 189 | } 190 | work = (double *) malloc (ldb * ldb * sizeof (double)); 191 | if (!work) { 192 | free (ipiv); 193 | mem_error (); 194 | } 195 | dgetrf_ (n, n, pmat, n, ipiv, &info); 196 | if (info) { 197 | free (ipiv); 198 | free (work); 199 | inverse_error ("SOLVE", info); 200 | } 201 | dgetrs_ (&trans, n, &nrhs, pmat, n, ipiv, v, &ldb, &info); 202 | free (ipiv); 203 | free (work); 204 | if (info < 0) { 205 | inverse_error ("SOLVE", info); 206 | } 207 | } 208 | 209 | void 210 | geneigsolve_ (double *pmat, double *qmat, double *eval, __CLPK_integer * n) 211 | { 212 | __CLPK_integer lwork = (*n) * (*n); 213 | double *work = (double *) malloc (lwork * sizeof (double)); 214 | __CLPK_integer wood_elf = 1; // Sameer Merchant memorial temporary variable 215 | __CLPK_integer info; 216 | if (!work) { 217 | mem_error (); 218 | } 219 | dsygv_ (&wood_elf, "V", "U", n, pmat, n, qmat, n, eval, work, &lwork, 220 | &info); 221 | free (work); 222 | if (info && (info <= 2 * (*n))) { 223 | if (info < 0) { 224 | #if __LP64__ 225 | fprintf (stderr, "error (GENEIGSOLVE): illegal argument %d\n", -info); 226 | #else 227 | fprintf (stderr, "error (GENEIGSOLVE): illegal argument %ld\n", -info); 228 | #endif 229 | } 230 | else if (info <= (*n)) { 231 | #if __LP64__ 232 | fprintf (stderr, "error (GENEIGSOLVE): failure to converge %d\n", info); 233 | #else 234 | fprintf (stderr, "error (GENEIGSOLVE): failure to converge %ld\n", 235 | info); 236 | #endif 237 | } 238 | else { 239 | #if __LP64__ 240 | fprintf (stderr, "error (GENEIGSOLVE): not positive definite %d\n", 241 | info); 242 | #else 243 | fprintf (stderr, "error (GENEIGSOLVE): not positive definite %ld\n", 244 | info); 245 | #endif 246 | } 247 | exit (1); 248 | } 249 | } 250 | -------------------------------------------------------------------------------- /src/nicksrc/sortit.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "strsubs.h" 6 | #include "sortit.h" 7 | #include "vsubs.h" 8 | 9 | /** 10 | a simple set of sort routines 11 | */ 12 | 13 | static double *ttt; 14 | static long *lttt; 15 | static int *ittt; 16 | static int **pttt; 17 | static int plen = 0; 18 | static int *porder = NULL; 19 | 20 | void 21 | setorder (int *pp, int rlen) 22 | { 23 | int *tt; 24 | 25 | if (plen > 0) { 26 | if (porder != NULL) 27 | free (porder); 28 | } 29 | 30 | if (pp == NULL) { 31 | porder = NULL; 32 | plen = rlen; 33 | return; 34 | } 35 | ZALLOC (porder, rlen, int); 36 | ZALLOC (tt, rlen, int); 37 | copyiarr (pp, tt, rlen); 38 | isortit (tt, porder, rlen); 39 | free (tt); 40 | plen = rlen; 41 | } 42 | 43 | double 44 | median (double *aa, int len) 45 | // should be O(len) algorithm 46 | { 47 | double *b, y; 48 | int t, x, a, n; 49 | 50 | ZALLOC (b, len, double); 51 | n = 0; 52 | for (a = 0; a < len; ++a) { 53 | y = aa[a]; 54 | if (isfinite (y)) { 55 | b[n] = y; 56 | ++n; 57 | } 58 | } 59 | if (n == 0) 60 | fatalx ("(median) no valids\n"); 61 | if (n == 1) 62 | return b[0]; 63 | if (n == 2) 64 | return 0.5 * (b[0] + b[1]); 65 | sortit (b, NULL, n); 66 | t = n % 2; 67 | x = n / 2; 68 | y = b[x]; 69 | if (t == 0) 70 | y = 0.5 * (b[x] + b[x - 1]); 71 | 72 | free (b); 73 | // printf("zzmed: %d %d %d %9.3f\n", len, n, x, y) ; 74 | return y; 75 | 76 | 77 | } 78 | 79 | void 80 | sortit (double *a, int *ind, int len) 81 | { 82 | int i, k; 83 | int *inda; 84 | 85 | if (len == 0) 86 | fatalx ("(sortit) len = 0\n"); 87 | ZALLOC (ttt, len, double); 88 | ZALLOC (inda, len, int); 89 | 90 | for (i = 0; i < len; i++) { 91 | inda[i] = i; 92 | } 93 | 94 | copyarr (a, ttt, len); 95 | qsort ((int *) inda, len, sizeof (int), 96 | (int (*)(const void *, const void *)) compit); 97 | 98 | for (i = 0; i < len; i++) { 99 | k = inda[i]; 100 | a[i] = ttt[k]; 101 | } 102 | free (ttt); 103 | if (ind != NULL) 104 | copyiarr (inda, ind, len); 105 | free (inda); 106 | } 107 | 108 | int 109 | compit (int *a1, int *a2) 110 | { 111 | if (ttt[*a1] < ttt[*a2]) 112 | return -1; 113 | if (ttt[*a1] > ttt[*a2]) 114 | return 1; 115 | return 0; 116 | } 117 | 118 | void 119 | lsortit (long *a, int *ind, int len) 120 | { 121 | int i, k; 122 | int *inda; 123 | 124 | if (len == 0) 125 | fatalx ("(lsortit) len = 0\n"); 126 | ZALLOC (lttt, len, long); 127 | ZALLOC (inda, len, int); 128 | 129 | for (i = 0; i < len; i++) { 130 | inda[i] = i; 131 | } 132 | 133 | copylarr (a, lttt, len); 134 | qsort ((int *) inda, len, sizeof (int), 135 | (int (*)(const void *, const void *)) lcompit); 136 | 137 | for (i = 0; i < len; i++) { 138 | k = inda[i]; 139 | a[i] = lttt[k]; 140 | } 141 | free (lttt); 142 | if (ind != NULL) 143 | copyiarr (inda, ind, len); 144 | free (inda); 145 | } 146 | 147 | void 148 | isortit (int *a, int *ind, int len) 149 | { 150 | int i, k; 151 | int *inda; 152 | 153 | if (len == 0) 154 | fatalx ("(isortit) len = 0\n"); 155 | ZALLOC (ittt, len, int); 156 | ZALLOC (inda, len, int); 157 | 158 | for (i = 0; i < len; i++) { 159 | inda[i] = i; 160 | } 161 | 162 | copyiarr (a, ittt, len); 163 | qsort ((int *) inda, len, sizeof (int), 164 | (int (*)(const void *, const void *)) icompit); 165 | 166 | for (i = 0; i < len; i++) { 167 | k = inda[i]; 168 | a[i] = ittt[k]; 169 | } 170 | free (ittt); 171 | if (ind != NULL) 172 | copyiarr (inda, ind, len); 173 | free (inda); 174 | } 175 | 176 | int 177 | lcompit (int *a1, int *a2) 178 | { 179 | if (lttt[*a1] < lttt[*a2]) 180 | return -1; 181 | if (lttt[*a1] > lttt[*a2]) 182 | return 1; 183 | return 0; 184 | } 185 | 186 | int 187 | icompit (int *a1, int *a2) 188 | { 189 | if (ittt[*a1] < ittt[*a2]) 190 | return -1; 191 | if (ittt[*a1] > ittt[*a2]) 192 | return 1; 193 | return 0; 194 | } 195 | 196 | void 197 | invperm (int *a, int *b, int n) 198 | { 199 | 200 | /** 201 | a, b can be same 202 | */ 203 | int i, j; 204 | int *x; 205 | 206 | if (n == 0) 207 | return; 208 | ZALLOC (x, n, int); 209 | 210 | ivclear (x, -1, n); 211 | for (i = 0; i < n; i++) { 212 | j = b[i]; 213 | x[j] = i; 214 | } 215 | copyiarr (x, a, n); 216 | free (x); 217 | } 218 | 219 | void 220 | ipsortit (int **a, int *ind, int len, int rlen) 221 | { 222 | if (len==0) return ; 223 | ipsortitp (a, ind, len, rlen, NULL); 224 | 225 | } 226 | 227 | void 228 | ipsortitp (int **a, int *ind, int len, int rlen, int *order) 229 | 230 | /** 231 | sort integer array pointers 232 | rows of array are sorted in lexicographical order 233 | 234 | compiarr can be called outside the sort 235 | */ 236 | { 237 | int i, k; 238 | int *inda; 239 | 240 | if (len == 0) 241 | fatalx ("(ipsortit) len = 0\n"); 242 | ZALLOC (pttt, len, int *); 243 | ZALLOC (inda, len, int); 244 | 245 | setorder (order, rlen); // order defines order as sorted in ascending order. 246 | 247 | for (i = 0; i < len; i++) { 248 | if (a[i] == NULL) 249 | fatalx ("(ipsortit) array pointer %d NULL\n", i); 250 | inda[i] = i; 251 | } 252 | 253 | copyiparr (a, pttt, len); 254 | qsort ((int *) inda, len, 255 | sizeof (int), (int (*)(const void *, const void *)) ipcompit); 256 | 257 | for (i = 0; i < len; i++) { 258 | k = inda[i]; 259 | a[i] = pttt[k]; 260 | // note that this just reorders pointers. 261 | } 262 | if (ind != NULL) 263 | copyiarr (inda, ind, len); 264 | free (inda); 265 | free (pttt); 266 | } 267 | 268 | int 269 | ipcompit (int *a1, int *a2) 270 | { 271 | int l; 272 | l = compiarr (pttt[*a1], pttt[*a2], plen); 273 | return l; 274 | } 275 | 276 | int 277 | compiarr (int *a, int *b, int len) 278 | { 279 | int i, k; 280 | for (i = 0; i < len; i++) { 281 | k = i; 282 | if (porder != NULL) 283 | k = porder[i]; 284 | if (a[k] < b[k]) 285 | return -1; 286 | if (a[k] > b[k]) 287 | return 1; 288 | } 289 | return 0; 290 | } 291 | 292 | int 293 | comparr (double *a, double *b, int len) 294 | { 295 | int i, k; 296 | for (i = 0; i < len; i++) { 297 | k = i; 298 | if (porder != NULL) 299 | k = porder[i]; 300 | if (a[k] < b[k]) 301 | return -1; 302 | if (a[k] > b[k]) 303 | return 1; 304 | } 305 | return 0; 306 | } 307 | 308 | 309 | void 310 | mkirank (int *rank, int *xin, int n) 311 | // faster to call isortit 312 | { 313 | double *a; 314 | 315 | ZALLOC (a, n, double); 316 | floatit (a, xin, n); 317 | mkrank (rank, a, n); 318 | free (a); 319 | } 320 | 321 | void 322 | mkrank (int *rank, double *xin, int n) 323 | 324 | /** rank 0:n-1 325 | largest element k has rank[k] = 0, smallest, rank[k] = n-1 326 | */ 327 | { 328 | int i; 329 | double *a; 330 | int *ind; 331 | 332 | ZALLOC (a, n, double); 333 | ZALLOC (ind, n, int); 334 | 335 | vst (a, xin, -1.0, n); 336 | sortit (a, ind, n); 337 | 338 | for (i = 0; i < n; i++) { 339 | rank[ind[i]] = i; 340 | } 341 | 342 | free (a); 343 | free (ind); 344 | 345 | 346 | } 347 | -------------------------------------------------------------------------------- /src/baseprog.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include "admutils.h" 14 | #include "mcio.h" 15 | #include "mcmcpars.h" 16 | #include "egsubs.h" 17 | #include "exclude.h" 18 | 19 | 20 | #define WVERSION "420" 21 | 22 | // badpairsname added 23 | 24 | /** 25 | does nothing but read the data 26 | and print snps 27 | sometimes a good place to start !! 28 | 29 | New I/O (mcio.c) added 30 | New admutils (snpindex hash) 31 | mcio bug fixed (large files) 32 | */ 33 | 34 | 35 | #define MAXFL 50 36 | #define MAXSTR 512 37 | 38 | extern int packmode; 39 | 40 | char *trashdir = "/var/tmp"; 41 | extern int verbose; 42 | int qtmode = NO; 43 | Indiv **indivmarkers; 44 | SNP **snpmarkers; 45 | int numsnps, numindivs; 46 | 47 | char *genotypename = NULL; 48 | char *snpname = NULL; 49 | char *genooutfilename = NULL; 50 | char *indoutfilename = NULL; 51 | char *indivname = NULL; 52 | char *badsnpname = NULL; 53 | char *goodsnpname = NULL; 54 | char *badpairsname = NULL; 55 | char *markername = NULL; 56 | char *idname = NULL; 57 | 58 | char *outputname = NULL; 59 | FILE *ofile; 60 | 61 | double fakespacing = 0.0; 62 | 63 | char unknowngender = 'U'; 64 | 65 | void readcommands (int argc, char **argv); 66 | void dophyscheck (SNP ** snpm, int numsnps); 67 | 68 | int 69 | main (int argc, char **argv) 70 | { 71 | 72 | int i, j, k, g; 73 | SNP *cupt; 74 | Indiv *indx; 75 | int ch1, ch2; 76 | 77 | int numvind, nignore, numrisks = 1; 78 | int markernum, idnum; 79 | 80 | ofile = stdout; 81 | packmode = YES; 82 | readcommands (argc, argv); 83 | if (indivname == NULL) { 84 | printf ("no indivname\n"); 85 | return 0; 86 | } 87 | if (outputname != NULL) 88 | openit (outputname, &ofile, "w"); 89 | 90 | numsnps = 91 | getsnps (snpname, &snpmarkers, fakespacing, badsnpname, &nignore, 92 | numrisks); 93 | 94 | // fakespacing 0.0 (default) 95 | 96 | numindivs = getindivs (indivname, &indivmarkers); 97 | setstatus (indivmarkers, numindivs, "Case"); 98 | 99 | setgenotypename (&genotypename, indivname); 100 | 101 | printf ("genotypename: %s\n", genotypename); 102 | 103 | if (genotypename != NULL) { 104 | getgenos (genotypename, snpmarkers, indivmarkers, 105 | numsnps, numindivs, nignore); 106 | 107 | /** 108 | if (badpairsname != NULL) { 109 | loadbadpsc(snpmarkers, numsnps, NO, goodsnpname) ; 110 | dobadpairs(badpairsname, snpmarkers, numsnps) ; 111 | } 112 | */ 113 | } 114 | dophyscheck (snpmarkers, numsnps); 115 | 116 | numvind = numvalidind (indivmarkers, numindivs); 117 | printf ("\n\n"); 118 | printf ("numindivs: %d valid: %d numsnps: %d nignore: %d\n", 119 | numindivs, numvind, numsnps, nignore); 120 | 121 | if (verbose) { 122 | for (i = 0; i < numindivs; ++i) { 123 | indx = indivmarkers[i]; 124 | printf ("%20s ", indx->ID); 125 | for (j = 0; j < numsnps; ++j) { 126 | cupt = snpmarkers[j]; 127 | if (cupt->ignore) 128 | continue; 129 | g = getgtypes (cupt, i); 130 | if (g < 0) 131 | g = 9; 132 | printf ("%1d", g); 133 | } 134 | printf (" %20s", indx->egroup); 135 | printnl (); 136 | } 137 | } 138 | // numsnps includes fakes 139 | 140 | if (markername != NULL) { 141 | markernum = snpindex (snpmarkers, numsnps, markername); 142 | if (markernum < 0) 143 | fatalx ("markername %s not found\n", markername); 144 | cupt = snpmarkers[markernum]; 145 | printf ("markername: %s %d %9.3f %12.0f\n", 146 | cupt->ID, cupt->chrom, cupt->genpos, cupt->physpos); 147 | for (i = 0; i < numindivs; ++i) { 148 | indx = indivmarkers[i]; 149 | g = getgtypes (cupt, i); 150 | printf ("%20s %20s %2d\n", cupt->ID, indx->ID, g); 151 | } 152 | } 153 | 154 | if (idname != NULL) { 155 | idnum = indindex (indivmarkers, numindivs, idname); 156 | if (idnum < 0) 157 | fatalx ("idname %s not found\n", idname); 158 | indx = indivmarkers[idnum]; 159 | printf ("idname: %20s %c %20s\n", 160 | indx->ID, indx->gender, indx->egroup); 161 | for (j = 0; j < numsnps; ++j) { 162 | cupt = snpmarkers[j]; 163 | if (cupt->ignore) 164 | continue; 165 | g = getgtypes (cupt, idnum); 166 | printf ("%20s %20s %2d", cupt->ID, indx->ID, g); 167 | printf (" %3d %12.0f", cupt->chrom, cupt->physpos); 168 | printf (" %c %c", cupt->alleles[0], cupt->alleles[1]); 169 | printnl (); 170 | } 171 | } 172 | 173 | /** 174 | if (genotypename != NULL) { 175 | printdata(genooutfilename, indoutfilename, snpmarkers, indivmarkers, numsnps, numindivs, NO) ; 176 | } 177 | */ 178 | 179 | printf ("##end of run\n"); 180 | return 0; 181 | } 182 | 183 | void 184 | readcommands (int argc, char **argv) 185 | { 186 | int i, haploid = 0; 187 | char *parname = NULL; 188 | phandle *ph; 189 | char str[5000]; 190 | char *tempname; 191 | int n; 192 | 193 | while ((i = getopt (argc, argv, "p:vV")) != -1) { 194 | 195 | switch (i) { 196 | 197 | case 'p': 198 | parname = strdup (optarg); 199 | break; 200 | 201 | case 'v': 202 | printf ("version: %s\n", WVERSION); 203 | break; 204 | 205 | case 'V': 206 | verbose = YES; 207 | break; 208 | 209 | case '?': 210 | printf ("Usage: bad params.... \n"); 211 | fatalx ("bad params\n"); 212 | } 213 | } 214 | 215 | 216 | pcheck (parname, 'p'); 217 | printf ("parameter file: %s\n", parname); 218 | ph = openpars (parname); 219 | dostrsub (ph); 220 | 221 | /** 222 | DIR2: /fg/nfiles/admixdata/ms2 223 | SSSS: DIR2/outfiles 224 | genotypename: DIR2/autos_ccshad_fakes 225 | eglistname: DIR2/eurlist 226 | output: eurout 227 | */ 228 | getint (ph, "packmode:", &packmode); // controls internals 229 | 230 | getstring (ph, "genotypename:", &genotypename); 231 | getstring (ph, "genooutfilename:", &genooutfilename); 232 | getstring (ph, "indoutfilename:", &indoutfilename); 233 | getstring (ph, "snpname:", &snpname); 234 | getstring (ph, "indivname:", &indivname); 235 | getstring (ph, "output:", &outputname); 236 | getstring (ph, "badsnpname:", &badsnpname); 237 | getstring (ph, "goodsnpname:", &goodsnpname); 238 | getstring (ph, "badpairsname:", &badpairsname); 239 | getstring (ph, "markername:", &markername); 240 | getstring (ph, "idname:", &idname); 241 | getdbl (ph, "fakespacing:", &fakespacing); 242 | getint (ph, "familynames:", &familynames); 243 | writepars (ph); 244 | closepars (ph); 245 | 246 | } 247 | 248 | void 249 | dophyscheck (SNP ** snpm, int numsnps) 250 | { 251 | // catch places where physpos genpos are in opposite order 252 | SNP *cupt, *cuptold; 253 | int i; 254 | 255 | for (i = 0; i < numsnps; i++) { 256 | cupt = snpm[i]; 257 | if (i == 0) 258 | cuptold = cupt; 259 | if (cupt->isfake) 260 | continue; 261 | if (cupt->ignore) 262 | continue; 263 | if (cupt->chrom == cuptold->chrom) { 264 | if (cupt->physpos < cuptold->physpos) { 265 | printf ("physcheck %20s %15s %12.3f %12.3f %13.0f %13.0f\n", 266 | cuptold->ID, cupt->ID, 267 | cuptold->genpos, cupt->genpos, 268 | cuptold->physpos, cupt->physpos); 269 | } 270 | } 271 | cuptold = cupt; 272 | } 273 | } 274 | --------------------------------------------------------------------------------