├── CONVERTF
    ├── example.pheno
    ├── example.bed
    ├── example.eigenstratgeno
    ├── example.packedancestrymap
    ├── example.packedancestrymapgeno
    ├── example.pedind
    ├── example.ind
    ├── example.ind.packedancestrymap
    ├── par.EIGENSTRAT.PED
    ├── par.ANCESTRYMAP.EIGENSTRAT
    ├── par.PACKEDANCESTRYMAP.ANCESTRYMAP
    ├── example.map
    ├── par.PED.PACKEDPED
    ├── example.ped
    ├── par.PED.EIGENSTRAT
    ├── example.pedsnp
    ├── par.PACKEDPED.PACKEDANCESTRYMAP
    ├── example.perl
    ├── example.snp
    ├── example.snp.packedancestrymap
    ├── ind2pheno.perl
    └── example.ancestrymapgeno
├── POPGEN
    ├── HGDP.X.perl
    ├── elldemo
    │   ├── ctable
    │   ├── ebalist
    │   ├── test4
    │   ├── test3
    │   ├── aaa.pdf
    │   ├── sicaa.phyl
    │   ├── doplot
    │   ├── aaa:Sicily_EBA
    │   ├── par1
    │   ├── aaa:Lithuanian
    │   ├── q1
    │   ├── q2
    │   ├── q3
    │   ├── aaa.xtxt
    │   ├── q0
    │   ├── aaa:Georgian
    │   ├── aaa:Sardinian
    │   ├── rescale_ell
    │   └── sicaa.evec
    ├── example.geno
    ├── grmjunk.id
    ├── lsqproject.pdf
    ├── example.plot.pdf
    ├── example.eval
    ├── example.pca
    ├── example.ind
    ├── twexample.perl
    ├── example.plot2.xtxt
    ├── example.plot.xtxt
    ├── grmjunk
    ├── par.example
    ├── example.evec
    ├── example.snp
    ├── example.perl
    ├── example.log
    ├── twexample.eval
    ├── smartpca.info
    └── twtable
├── EIGENSTRAT
    ├── example.pheno
    ├── example.geno
    ├── example.plot.pdf
    ├── example.eval
    ├── example.pca
    ├── example.chisq
    ├── example.QTL.chisq
    ├── example.ind
    ├── example.QTL.chisq.GC
    ├── example.chisq.GC
    ├── example.QTL.ind
    ├── example.log
    ├── example.chisq.par
    ├── example.QTL.chisq.par
    ├── example.plot.xtxt
    ├── example.pca.par
    ├── example.pca.evec
    ├── example.snp
    ├── example.oldstyle.perl
    ├── example.perl
    └── example.QTL.perl
├── src
    ├── .gitignore
    ├── eigensrc
    │   ├── .gitignore
    │   ├── pcatoy.c
    │   ├── smartsubs.c
    │   ├── exclude.c
    │   ├── twstats.c
    │   ├── eigsubs.c
    │   └── eigx.c
    ├── nicksrc
    │   ├── Makefile
    │   ├── gauss.c
    │   ├── LICENSE.txt
    │   └── sortit.c
    ├── ksrc
    │   ├── Makefile
    │   ├── kjg_fpca.c
    │   └── kjg_gsl.c
    ├── gval.h
    ├── README
    ├── LICENSE.txt
    ├── Makefile
    ├── egsubs.c
    ├── h2d.c
    ├── gval.c
    ├── twsubs.c
    ├── smarttables
    │   └── twtable
    └── baseprog.c
├── include
    ├── globals.h
    ├── packit.h
    ├── smartsubs.h
    ├── nicklib.h
    ├── gval.h
    ├── egsubs.h
    ├── eigsubs.h
    ├── badpairs.h
    ├── exclude.h
    ├── workqueue.h
    ├── ldsubs.h
    ├── xsearch.h
    ├── sortit.h
    ├── getpars.h
    ├── regsubs.h
    ├── mcmcpars.h
    ├── xpsubs.h
    ├── not-thread-h
    ├── kjg_fpca.h
    ├── linsubs.h
    ├── ranmath.h
    ├── kjg_gsl.h
    ├── strsubs.h
    ├── statsubs.h
    ├── admutils.h
    └── mcio.h
├── bin
    ├── .gitignore
    ├── evec2pca.perl
    ├── gc.perl
    ├── rescale_ell
    ├── smarteigenstrat.perl
    ├── evec2pca-ped.perl
    ├── smartpca.perl
    └── ploteig
├── .gitignore
├── LICENSE.txt
└── README


/CONVERTF/example.pheno:
--------------------------------------------------------------------------------
1 | 11000
2 | 


--------------------------------------------------------------------------------
/POPGEN/HGDP.X.perl:
--------------------------------------------------------------------------------
1 | #!/usr/bin
2 | 


--------------------------------------------------------------------------------
/EIGENSTRAT/example.pheno:
--------------------------------------------------------------------------------
1 | 11000
2 | 


--------------------------------------------------------------------------------
/POPGEN/elldemo/ctable:
--------------------------------------------------------------------------------
1 | Sicily_EBA   black
2 | base  gray70
3 | 


--------------------------------------------------------------------------------
/POPGEN/elldemo/ebalist:
--------------------------------------------------------------------------------
1 | I3122 
2 | I11442 
3 | I7796 
4 | I7800 
5 | I7807 
6 | 


--------------------------------------------------------------------------------
/POPGEN/elldemo/test4:
--------------------------------------------------------------------------------
1 | Lithuanian
2 | Georgian
3 | Sardinian
4 | Sicily_EBA
5 | 


--------------------------------------------------------------------------------
/POPGEN/elldemo/test3:
--------------------------------------------------------------------------------
1 | Lithuanian
2 | Georgian
3 | Sardinian
4 | ##Sicily_EBA
5 | 


--------------------------------------------------------------------------------
/CONVERTF/example.bed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DReichLab/EIG/HEAD/CONVERTF/example.bed


--------------------------------------------------------------------------------
/EIGENSTRAT/example.geno:
--------------------------------------------------------------------------------
1 | 11100
2 | 01212
3 | 21101
4 | 00122
5 | 21100
6 | 00111
7 | 22110
8 | 


--------------------------------------------------------------------------------
/POPGEN/elldemo/aaa.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DReichLab/EIG/HEAD/POPGEN/elldemo/aaa.pdf


--------------------------------------------------------------------------------
/POPGEN/example.geno:
--------------------------------------------------------------------------------
1 | 11100
2 | 01212
3 | 21101
4 | 00122
5 | 21100
6 | 00111
7 | 22110
8 | 


--------------------------------------------------------------------------------
/POPGEN/grmjunk.id:
--------------------------------------------------------------------------------
1 | NA	SAMPLE0
2 | NA	SAMPLE1
3 | NA	SAMPLE2
4 | NA	SAMPLE3
5 | NA	SAMPLE4
6 | 


--------------------------------------------------------------------------------
/POPGEN/lsqproject.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DReichLab/EIG/HEAD/POPGEN/lsqproject.pdf


--------------------------------------------------------------------------------
/POPGEN/example.plot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DReichLab/EIG/HEAD/POPGEN/example.plot.pdf


--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | baseprog
2 | convertf
3 | mergeit
4 | pca
5 | smshrink
6 | smartpca
7 | testpca
8 | 


--------------------------------------------------------------------------------
/CONVERTF/example.eigenstratgeno:
--------------------------------------------------------------------------------
1 | 11100
2 | 01212
3 | 21101
4 | 00122
5 | 21100
6 | 00111
7 | 22110
8 | 


--------------------------------------------------------------------------------
/EIGENSTRAT/example.plot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DReichLab/EIG/HEAD/EIGENSTRAT/example.plot.pdf


--------------------------------------------------------------------------------
/EIGENSTRAT/example.eval:
--------------------------------------------------------------------------------
1 |     3.145365
2 |     0.479177
3 |     0.279889
4 |     0.095570
5 |    -0.000000
6 | 


--------------------------------------------------------------------------------
/POPGEN/example.eval:
--------------------------------------------------------------------------------
1 |     3.145365
2 |     0.479177
3 |     0.279889
4 |     0.095570
5 |    -0.000000
6 | 


--------------------------------------------------------------------------------
/CONVERTF/example.packedancestrymap:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DReichLab/EIG/HEAD/CONVERTF/example.packedancestrymap


--------------------------------------------------------------------------------
/CONVERTF/example.packedancestrymapgeno:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DReichLab/EIG/HEAD/CONVERTF/example.packedancestrymapgeno


--------------------------------------------------------------------------------
/src/eigensrc/.gitignore:
--------------------------------------------------------------------------------
1 | eigenstrat
2 | eigenstratQTL
3 | pcatoy
4 | smarteigenstrat
5 | smartpca
6 | smartrel
7 | twstats
8 | 


--------------------------------------------------------------------------------
/POPGEN/elldemo/sicaa.phyl:
--------------------------------------------------------------------------------
1 |      3
2 | Lithuanian 0.000 0.020 0.021
3 |   Georgian 0.020 0.000 0.018
4 |  Sardinian 0.021 0.018 0.000
5 | 


--------------------------------------------------------------------------------
/POPGEN/example.pca:
--------------------------------------------------------------------------------
1 | 2
2 | 3.1450
3 | 0.4790
4 |   0.6502  0.0875
5 |   0.3589  0.0804
6 |  -0.0781 -0.6017
7 |  -0.3982  0.7316
8 |  -0.5328 -0.2977
9 | 


--------------------------------------------------------------------------------
/EIGENSTRAT/example.pca:
--------------------------------------------------------------------------------
1 | 2
2 | 3.1450
3 | 0.4790
4 |   0.6502  0.0875
5 |   0.3589  0.0804
6 |  -0.0781 -0.6017
7 |  -0.3982  0.7316
8 |  -0.5328 -0.2977
9 | 


--------------------------------------------------------------------------------
/EIGENSTRAT/example.chisq:
--------------------------------------------------------------------------------
1 | Chisq EIGENSTRAT
2 | 2.2222 0.9615
3 | 2.9167 0.2219
4 | 2.0833 0.0612
5 | 4.1667 0.1258
6 | 2.9167 2.0997
7 | 5.0000 3.0000
8 | 3.8095 0.0764
9 | 


--------------------------------------------------------------------------------
/include/globals.h:
--------------------------------------------------------------------------------
1 | #ifndef _GLOBALS_
2 | #define _GLOBALS_
3 | 
4 | int numchrom = 22;
5 | int fancynorm = YES, verbose = NO, plotmode = NO, outnum = -1;
6 | 
7 | #endif
8 | 


--------------------------------------------------------------------------------
/include/packit.h:
--------------------------------------------------------------------------------
1 | int packmode = NO;
2 | unsigned char *packgenos = NULL;
3 | long packlen = 0;
4 | long rlen = -1;
5 | int rdismode = NO;
6 | unsigned char *packepath;
7 | 


--------------------------------------------------------------------------------
/include/smartsubs.h:
--------------------------------------------------------------------------------
1 | void setoutliermode (int mode);
2 | int
3 | ridoutlier (double *evecs, int n, int neigs,
4 | 	    double thresh, int *badlist, OUTLINFO ** outinfo);
5 | 


--------------------------------------------------------------------------------
/EIGENSTRAT/example.QTL.chisq:
--------------------------------------------------------------------------------
1 | Chisq EIGENSTRAT
2 | 2.2222 0.9615
3 | 2.9167 0.2219
4 | 2.0833 0.0612
5 | 4.1667 0.1258
6 | 2.9167 2.0997
7 | 5.0000 3.0000
8 | 3.8095 0.0764
9 | 


--------------------------------------------------------------------------------
/CONVERTF/example.pedind:
--------------------------------------------------------------------------------
1 |      1      SAMPLE0 0 0 2 2
2 |      2      SAMPLE1 0 0 1 2
3 |      3      SAMPLE2 0 0 2 1
4 |      4      SAMPLE3 0 0 1 1
5 |      5      SAMPLE4 0 0 2 1
6 | 


--------------------------------------------------------------------------------
/bin/.gitignore:
--------------------------------------------------------------------------------
 1 | baseprog
 2 | convertf
 3 | eigenstrat
 4 | eigenstratQTL
 5 | mergeit
 6 | pca
 7 | pcatoy
 8 | smarteigenstrat
 9 | smartpca
10 | smartrel
11 | smshrink
12 | twstats
13 | 


--------------------------------------------------------------------------------
/POPGEN/elldemo/doplot:
--------------------------------------------------------------------------------
1 | #!/usr//bin/perl  -w 
2 | 
3 | system "ploteig -i sicaa.evec -c 1:2  -g aaa  -p  test4 -x -k  -r ctable -e ell4aa.out -t \" Sicily EBA projected conf: 0.95\"" ;
4 | 
5 | 


--------------------------------------------------------------------------------
/include/nicklib.h:
--------------------------------------------------------------------------------
1 | #include <vsubs.h>
2 | #include <strsubs.h>
3 | #include <sortit.h>
4 | #include <statsubs.h>
5 | #include <linsubs.h>  
6 | #include <ranmath.h>
7 | #include <xsearch.h>
8 | 
9 | 


--------------------------------------------------------------------------------
/CONVERTF/example.ind:
--------------------------------------------------------------------------------
1 | 	     SAMPLE0 F       Case
2 |              SAMPLE1 M       Case
3 |              SAMPLE2 F    Control
4 |              SAMPLE3 M    Control
5 |              SAMPLE4 F    Control
6 | 


--------------------------------------------------------------------------------
/POPGEN/example.ind:
--------------------------------------------------------------------------------
1 |              SAMPLE0 F       Case
2 |              SAMPLE1 M       Case
3 |              SAMPLE2 F    Control
4 |              SAMPLE3 M    Control
5 |              SAMPLE4 F    Control
6 | 


--------------------------------------------------------------------------------
/EIGENSTRAT/example.ind:
--------------------------------------------------------------------------------
1 |              SAMPLE0 F       Case
2 |              SAMPLE1 M       Case
3 |              SAMPLE2 F    Control
4 |              SAMPLE3 M    Control
5 |              SAMPLE4 F    Control
6 | 


--------------------------------------------------------------------------------
/EIGENSTRAT/example.QTL.chisq.GC:
--------------------------------------------------------------------------------
 1 | Chisq EIGENSTRAT
 2 | lambda=6.396 lambda=1.000
 3 | 0.3475 0.9615
 4 | 0.4560 0.2219
 5 | 0.3257 0.0612
 6 | 0.6515 0.1258
 7 | 0.4560 2.0997
 8 | 0.7818 3.0000
 9 | 0.5956 0.0764
10 | 


--------------------------------------------------------------------------------
/EIGENSTRAT/example.chisq.GC:
--------------------------------------------------------------------------------
 1 | Chisq EIGENSTRAT
 2 | lambda=6.396 lambda=1.000
 3 | 0.3475 0.9615
 4 | 0.4560 0.2219
 5 | 0.3257 0.0612
 6 | 0.6515 0.1258
 7 | 0.4560 2.0997
 8 | 0.7818 3.0000
 9 | 0.5956 0.0764
10 | 


--------------------------------------------------------------------------------
/EIGENSTRAT/example.QTL.ind:
--------------------------------------------------------------------------------
1 |              SAMPLE0 F    0.999999
2 |              SAMPLE1 M    1.000001
3 |              SAMPLE2 F   -0.000001
4 |              SAMPLE3 M    0.000001
5 |              SAMPLE4 F   -0.000001
6 | 


--------------------------------------------------------------------------------
/CONVERTF/example.ind.packedancestrymap:
--------------------------------------------------------------------------------
1 |              SAMPLE0 F       Case
2 |              SAMPLE1 M       Case
3 |              SAMPLE2 F    Control
4 |              SAMPLE3 M    Control
5 |              SAMPLE4 F    Control
6 | 


--------------------------------------------------------------------------------
/EIGENSTRAT/example.log:
--------------------------------------------------------------------------------
1 | parameter file: example.chisq.par
2 | genotypename: example.geno
3 | snpname: example.snp
4 | indivname: example.ind
5 | pcaname: example.pca
6 | outputname: example.chisq
7 | numpc: 1
8 | qtmode: NO
9 | 


--------------------------------------------------------------------------------
/EIGENSTRAT/example.chisq.par:
--------------------------------------------------------------------------------
1 | genotypename:  example.geno
2 | snpname:       example.snp
3 | indivname:     example.ind
4 | pcaname:       example.pca
5 | outputname:    example.chisq
6 | numpc:         1
7 | qtmode:        NO
8 | 


--------------------------------------------------------------------------------
/EIGENSTRAT/example.QTL.chisq.par:
--------------------------------------------------------------------------------
1 | genotypename:  example.geno
2 | snpname:       example.snp
3 | indivname:     example.QTL.ind
4 | pcaname:       example.pca
5 | outputname:    example.QTL.chisq
6 | numpc:         1
7 | qtmode:        YES
8 | 


--------------------------------------------------------------------------------
/POPGEN/twexample.perl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | $command = "../bin/twstats";
 4 | $command .= " -t twtable ";
 5 | $command .= " -i twexample.eval ";
 6 | $command .= " -o twexample.out";
 7 | print("$command\n");
 8 | system("$command");
 9 | 
10 | 


--------------------------------------------------------------------------------
/CONVERTF/par.EIGENSTRAT.PED:
--------------------------------------------------------------------------------
1 | genotypename:    example.eigenstratgeno
2 | snpname:         example.snp
3 | indivname:       example.ind
4 | outputformat:    PED
5 | genotypeoutname: example.ped
6 | snpoutname:      example.pedsnp
7 | indivoutname:    example.pedind
8 | 


--------------------------------------------------------------------------------
/include/gval.h:
--------------------------------------------------------------------------------
1 | void setgval (SNP ** xsnps, int nrows, Indiv ** indivmarkers, int numindivs,
2 | 	      int *xindex, int *xtypes, int ncols);
3 | void unsetgval ();
4 | int getgval (int row, int col, double *val);
5 | int getggval (int indindx, int col, double *val);
6 | 


--------------------------------------------------------------------------------
/POPGEN/example.plot2.xtxt:
--------------------------------------------------------------------------------
1 | set terminal postscript color
2 | set title  "" 
3 | set xlabel  "eigenvector 1" 
4 | set ylabel  "eigenvector 2" 
5 | plot  "example.evec.1:2:Case" using 2:3 title "Case" , \
6 |  "example.evec.1:2:Control" using 2:3 title "Control" 
7 | ## pause 9999
8 | 


--------------------------------------------------------------------------------
/CONVERTF/par.ANCESTRYMAP.EIGENSTRAT:
--------------------------------------------------------------------------------
1 | genotypename:    example.ancestrymapgeno
2 | snpname:         example.snp
3 | indivname:       example.ind
4 | outputformat:    EIGENSTRAT
5 | genotypeoutname: example.eigenstratgeno
6 | snpoutname:      example.snp
7 | indivoutname:    example.ind
8 | 


--------------------------------------------------------------------------------
/EIGENSTRAT/example.plot.xtxt:
--------------------------------------------------------------------------------
1 | set terminal postscript color
2 | set title  "" 
3 | set xlabel  "eigenvector 1" 
4 | set ylabel  "eigenvector 2" 
5 | plot  "example.pca.evec.1:2:Case" using 2:3 title "Case" , \
6 |  "example.pca.evec.1:2:Control" using 2:3 title "Control" 
7 | ## pause 9999
8 | 


--------------------------------------------------------------------------------
/CONVERTF/par.PACKEDANCESTRYMAP.ANCESTRYMAP:
--------------------------------------------------------------------------------
1 | genotypename:    example.packedancestrymapgeno
2 | snpname:         example.snp
3 | indivname:       example.ind
4 | outputformat:    ANCESTRYMAP
5 | genotypeoutname: example.ancestrymapgeno
6 | snpoutname:      example.snp
7 | indivoutname:    example.ind
8 | 


--------------------------------------------------------------------------------
/POPGEN/example.plot.xtxt:
--------------------------------------------------------------------------------
1 | set terminal postscript color
2 | set title  "" 
3 | set key outside
4 | set xlabel  "eigenvector 1" 
5 | set ylabel  "eigenvector 2" 
6 | plot  "example.evec.1:2:Case" using 2:3 title "Case" , \
7 |  "example.evec.1:2:Control" using 2:3 title "Control" 
8 | ## pause 9999
9 | 


--------------------------------------------------------------------------------
/EIGENSTRAT/example.pca.par:
--------------------------------------------------------------------------------
 1 | genotypename: example.geno
 2 | snpname: example.snp
 3 | indivname: example.ind
 4 | evecoutname: example.pca.evec
 5 | evaloutname: example.eval
 6 | altnormstyle: NO
 7 | numoutevec: 2
 8 | numoutlieriter: 5
 9 | numoutlierevec: 2
10 | outliersigmathresh: 6.0
11 | qtmode: 0
12 | 


--------------------------------------------------------------------------------
/POPGEN/grmjunk:
--------------------------------------------------------------------------------
 1 | 1 1 7 1.789380
 2 | 2 1 7 0.789216
 3 | 2 2 7 0.670580
 4 | 3 1 7 -0.286950
 5 | 3 2 7 -0.108997
 6 | 3 3 7 0.317350
 7 | 4 1 7 -0.993409
 8 | 4 2 7 -0.527105
 9 | 4 3 7 -0.108997
10 | 4 4 7 0.958931
11 | 5 1 7 -1.298237
12 | 5 2 7 -0.823694
13 | 5 3 7 0.187593
14 | 5 4 7 0.670580
15 | 5 5 7 1.263758
16 | 


--------------------------------------------------------------------------------
/src/nicksrc/Makefile:
--------------------------------------------------------------------------------
 1 | override CFLAGS += -c -O3   -g -p -Wimplicit -I../../include
 2 | 
 3 | all: libnick.a
 4 | 
 5 | libnick.a: strsubs.o sortit.o vsubs.o statsubs.o linsubs.o getpars.o xsearch.o gauss.o gds.o
 6 | 	ar -r libnick.a $^
 7 | 	ranlib libnick.a
 8 | 
 9 | clean:
10 | 	rm -f *.o
11 | 	rm -f libnick.a
12 | 


--------------------------------------------------------------------------------
/POPGEN/par.example:
--------------------------------------------------------------------------------
 1 | genotypename:    ../CONVERTF/example.ped
 2 | snpname:         ../CONVERTF/example.map
 3 | indivname:       ../CONVERTF/example.ped
 4 | evecoutname:     example.evec
 5 | evaloutname:     example.eval
 6 | altnormstyle:    NO 
 7 | numoutevec:      2
 8 | familynames:     NO
 9 | grmoutname:      grmjunk
10 | 


--------------------------------------------------------------------------------
/src/ksrc/Makefile:
--------------------------------------------------------------------------------
 1 | CFLAGS += -I../../include -I/opt/openblas/include
 2 | 
 3 | ifeq ($(DEBUG), 1)
 4 | 	CFLAGS += -g # enable debugging
 5 | endif
 6 | 
 7 | ifeq ($(PROFILING), 1)
 8 | 	CFLAGS += -pg # enable profiling
 9 | endif
10 | 
11 | .PHONY: all clean
12 | 
13 | all: kjg_gsl.o kjg_fpca.o
14 | 
15 | clean:
16 | 	rm *.o
17 | 


--------------------------------------------------------------------------------
/CONVERTF/example.map:
--------------------------------------------------------------------------------
1 | 11       rs0000     0.000000            0
2 | 11       rs1111     0.001000       100000
3 | 11       rs2222     0.002000       200000
4 | 11       rs3333     0.003000       300000
5 | 11       rs4444     0.004000       400000
6 | 11       rs5555     0.005000       500000
7 | 11       rs6666     0.006000       600000
8 | 


--------------------------------------------------------------------------------
/CONVERTF/par.PED.PACKEDPED:
--------------------------------------------------------------------------------
1 | genotypename:    example.ped
2 | snpname:         example.pedsnp # or example.map, either works
3 | indivname:       example.pedind # or example.ped, either works
4 | outputformat:    PACKEDPED
5 | genotypeoutname: example.bed
6 | snpoutname:      example.pedsnp
7 | indivoutname:    example.pedind
8 | familynames:     NO
9 | 


--------------------------------------------------------------------------------
/CONVERTF/example.ped:
--------------------------------------------------------------------------------
1 |      1      SAMPLE0 0 0 2 2  1 2  3 3  1 1  1 1  3 3  1 1  3 3
2 |      2      SAMPLE1 0 0 1 2  1 2  1 3  1 4  1 1  1 3  1 1  3 3
3 |      3      SAMPLE2 0 0 2 1  1 2  1 1  1 4  1 2  1 3  1 4  3 4
4 |      4      SAMPLE3 0 0 1 1  2 2  1 3  4 4  2 2  1 1  1 4  3 4
5 |      5      SAMPLE4 0 0 2 1  2 2  1 1  1 4  2 2  1 1  1 4  4 4
6 | 


--------------------------------------------------------------------------------
/CONVERTF/par.PED.EIGENSTRAT:
--------------------------------------------------------------------------------
1 | genotypename:    example.ped
2 | snpname:         example.pedsnp # or example.map, either works 
3 | indivname:       example.pedind # or example.ped, either works
4 | outputformat:    EIGENSTRAT
5 | genotypeoutname: example.eigenstratgeno
6 | snpoutname:      example.snp
7 | indivoutname:    example.ind
8 | familynames:     NO
9 | 


--------------------------------------------------------------------------------
/CONVERTF/example.pedsnp:
--------------------------------------------------------------------------------
1 | 11       rs0000     0.000000            0 A C
2 | 11       rs1111     0.001000       100000 A G
3 | 11       rs2222     0.002000       200000 A T
4 | 11       rs3333     0.003000       300000 C A
5 | 11       rs4444     0.004000       400000 G A
6 | 11       rs5555     0.005000       500000 T A
7 | 11       rs6666     0.006000       600000 G T
8 | 


--------------------------------------------------------------------------------
/POPGEN/elldemo/aaa:Sicily_EBA:
--------------------------------------------------------------------------------
1 |                I3122    -0.094106     -0.019738       Sicily_EBA
2 |               I11442    -0.061346     -0.019553       Sicily_EBA
3 |                I7796    -0.046874     -0.045908       Sicily_EBA
4 |                I7800    -0.066847     -0.032000       Sicily_EBA
5 |                I7807    -0.073119     -0.032957       Sicily_EBA
6 | 


--------------------------------------------------------------------------------
/CONVERTF/par.PACKEDPED.PACKEDANCESTRYMAP:
--------------------------------------------------------------------------------
1 | genotypename:    example.bed
2 | snpname:         example.pedsnp # or example.map, either works
3 | indivname:       example.pedind # or example.ped, either works
4 | outputformat:    PACKEDANCESTRYMAP
5 | genotypeoutname: example.packedancestrymapgeno
6 | snpoutname:      example.snp
7 | indivoutname:    example.ind
8 | familynames:     NO
9 | 


--------------------------------------------------------------------------------
/POPGEN/example.evec:
--------------------------------------------------------------------------------
1 |            #eigvals:     3.145     0.479 
2 |              SAMPLE0     0.6502      0.0875             Case
3 |              SAMPLE1     0.3589      0.0804             Case
4 |              SAMPLE2    -0.0781     -0.6017          Control
5 |              SAMPLE3    -0.3982      0.7316          Control
6 |              SAMPLE4    -0.5328     -0.2977          Control
7 | 


--------------------------------------------------------------------------------
/EIGENSTRAT/example.pca.evec:
--------------------------------------------------------------------------------
1 |            #eigvals:     3.145     0.479 
2 |              SAMPLE0     0.6502      0.0875             Case
3 |              SAMPLE1     0.3589      0.0804             Case
4 |              SAMPLE2    -0.0781     -0.6017          Control
5 |              SAMPLE3    -0.3982      0.7316          Control
6 |              SAMPLE4    -0.5328     -0.2977          Control
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Object files
 2 | *.o
 3 | *.ko
 4 | *.obj
 5 | *.elf
 6 | 
 7 | # Precompiled Headers
 8 | *.gch
 9 | *.pch
10 | 
11 | # Libraries
12 | *.lib
13 | *.a
14 | *.la
15 | *.lo
16 | 
17 | # Shared objects (inc. Windows DLLs)
18 | *.dll
19 | *.so
20 | *.so.*
21 | *.dylib
22 | 
23 | # Executables
24 | *.exe
25 | *.out
26 | *.app
27 | *.i*86
28 | *.x86_64
29 | *.hex
30 | 
31 | # Debug files
32 | *.dSYM/
33 | 


--------------------------------------------------------------------------------
/CONVERTF/example.perl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | # $parfile = "par.ANCESTRYMAP.EIGENSTRAT";
 4 |   $parfile = "par.EIGENSTRAT.PED"; 
 5 | # $parfile = "par.PED.EIGENSTRAT";                    
 6 | # $parfile = "par.PED.PACKEDPED";                     
 7 | # $parfile = "par.PACKEDPED.PACKEDANCESTRYMAP";       
 8 | # $parfile = "par.PACKEDANCESTRYMAP.ANCESTRYMAP";     
 9 | 
10 | system("../bin/convertf -p $parfile");
11 | 


--------------------------------------------------------------------------------
/POPGEN/example.snp:
--------------------------------------------------------------------------------
1 |               rs0000  11        0.000000               0 A C
2 |               rs1111  11        0.001000          100000 A G
3 |               rs2222  11        0.002000          200000 A T
4 |               rs3333  11        0.003000          300000 C A
5 |               rs4444  11        0.004000          400000 G A
6 |               rs5555  11        0.005000          500000 T A
7 |               rs6666  11        0.006000          600000 G T
8 | 


--------------------------------------------------------------------------------
/CONVERTF/example.snp:
--------------------------------------------------------------------------------
1 |               rs0000  11        0.000000               0 A C
2 |               rs1111  11        0.001000          100000 A G
3 |               rs2222  11        0.002000          200000 A T
4 |               rs3333  11        0.003000          300000 C A
5 |               rs4444  11        0.004000          400000 G A
6 |               rs5555  11        0.005000          500000 T A
7 |               rs6666  11        0.006000          600000 G T
8 | 


--------------------------------------------------------------------------------
/EIGENSTRAT/example.snp:
--------------------------------------------------------------------------------
1 |               rs0000  11        0.000000               0 A C
2 |               rs1111  11        0.001000          100000 A G
3 |               rs2222  11        0.002000          200000 A T
4 |               rs3333  11        0.003000          300000 C A
5 |               rs4444  11        0.004000          400000 G A
6 |               rs5555  11        0.005000          500000 T A
7 |               rs6666  11        0.006000          600000 G T
8 | 


--------------------------------------------------------------------------------
/include/egsubs.h:
--------------------------------------------------------------------------------
 1 | #include "admutils.h"
 2 | 
 3 | 
 4 | int makeeglist (char **eglist, int maxnumeg, Indiv ** indivmarkers,
 5 | 		int numindivs);
 6 | int mkeglist (Indiv ** indm, int numindivs, char **eglist);
 7 | void seteglist (Indiv ** indm, int nindiv, char *eglistname);
 8 | void seteglistv (Indiv ** indm, int nindiv, char *eglistname, int val);
 9 | int loadlist (char **list, char *listname);
10 | int loadlist_type (char **list, char *listname, int *ztypes, int off);
11 | 


--------------------------------------------------------------------------------
/CONVERTF/example.snp.packedancestrymap:
--------------------------------------------------------------------------------
1 |               rs0000  11        0.000000               0 A C
2 |               rs1111  11        0.001000          100000 A G
3 |               rs2222  11        0.002000          200000 A T
4 |               rs3333  11        0.003000          300000 C A
5 |               rs4444  11        0.004000          400000 G A
6 |               rs5555  11        0.005000          500000 T A
7 |               rs6666  11        0.006000          600000 G T
8 | 


--------------------------------------------------------------------------------
/CONVERTF/ind2pheno.perl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | $in = $ARGV[0]; # .ind file
 4 | $out = $ARGV[1]; # .pheno file
 5 | 
 6 | open(IN,$in) || die("COF");
 7 | open(OUT,">$out") || die("COF");
 8 | 
 9 | while($line = <IN>)
10 | {
11 |   if($line =~ /Case/) { print OUT ("1"); $case=1; }
12 |   elsif($line =~ /Control/) { print OUT ("0"); $control=1; }
13 |   else { print OUT ("9"); }
14 | }
15 | print OUT ("\n");
16 | unless($case) { print("WARNING: no cases\n"); }
17 | unless($control) { print("WARNING: no controls\n"); }
18 | 
19 | 


--------------------------------------------------------------------------------
/src/gval.h:
--------------------------------------------------------------------------------
 1 | void setgval (SNP ** xsnps, int nrows, Indiv ** indivmarkers, int numindivs,
 2 | 	      int *xindex, int *xtypes, int ncols);
 3 | void unsetgval ();
 4 | int getgval (int row, int col, double *val);
 5 | int getggval (int indindx, int col, double *val);
 6 | 
 7 | void set_ind_mask ();
 8 | 
 9 | size_t get_nrows ();
10 | size_t get_ncols ();
11 | 
12 | void kjg_geno_get_normalized_row (const size_t snp_index, double *y);
13 | size_t kjg_geno_get_normalized_rows (const size_t i, const size_t r,
14 | 				     double *Y);
15 | 


--------------------------------------------------------------------------------
/POPGEN/elldemo/par1:
--------------------------------------------------------------------------------
 1 | DIR:  /home/np29/broaddatax/v37/
 2 | ##DIR:    . 
 3 | S1:              test4     
 4 | indivname:       DIR/S1.ind  
 5 | snpname:         DIR/S1.snp       
 6 | genotypename:    DIR/S1.geno   
 7 | numoutliter:     0 
 8 | numoutevec:      2 
 9 | evecoutname:     sicaa.evec
10 | phylipname:      sicaa.phyl
11 | poplistname:     test3    
12 | inbreed:         NO 
13 | lsqproject:      YES 
14 | shrinkmode:      YES 
15 | hiprec:          YES
16 | elllistname:     ebalist
17 | elloutname:      ell4aa.out
18 | ellconf:         0.95 
19 | 


--------------------------------------------------------------------------------
/include/eigsubs.h:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <limits.h>
 3 | #include <math.h>
 4 | #include <nicklib.h>
 5 | 
 6 | void eigvals (double *mat, double *evals, int n);
 7 | void eigvecs (double *mat, double *evals, double *evecs, int n);
 8 | void mkorth(double *orth, double *ww, int n) ;
 9 | void eigb (double *lam, double *a, double *b, int n);
10 | void eigc (double *lam, double *a, double *b, int n);
11 | double twestxx (double *lam, int m, double *pzn, double *pzvar);
12 | 
13 | typedef struct
14 | {
15 |   int vecno;
16 |   double score;
17 | } OUTLINFO;;
18 | 


--------------------------------------------------------------------------------
/include/badpairs.h:
--------------------------------------------------------------------------------
 1 | #include <nicklib.h>
 2 | #include "admutils.h"
 3 | 
 4 | void dobadpairs (char *badpairsname, SNP ** snpm, int numsnps);
 5 | void dogood (char *goodsnpname, SNP ** snpm, int numsnps);
 6 | void getsnpsc (char *snpscname, SNP ** snpm, int numsnps);
 7 | int killsnps (Indiv ** indivmarkers, SNP ** snpmarkers, int numsnps,
 8 | 	      int mincasenum);
 9 | void loadbadpsc (SNP ** snpm, int numsnps, int rmode, char *gname);
10 | 
11 | double entrop (double *a, int n);
12 | double xxlog2 (double t);
13 | double mutx (double *dd);
14 | double mutxx (double *dd, int m, int n);
15 | 


--------------------------------------------------------------------------------
/POPGEN/example.perl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | $ENV{'PATH'} = "../bin:$ENV{'PATH'}";
 4 | 
 5 | $command = "smartpca";
 6 | $command .= " -p par.example >example.log";
 7 | print("$command\n");
 8 | system("$command");
 9 | 
10 | $command = "ploteig";
11 | $command .= " -i example.evec ";
12 | $command .= " -c 1:2 ";
13 | $command .= " -p Case:Control ";
14 | $command .= " -x ";
15 | $command .= " -o example.plot.xtxt "; # must end in .xtxt
16 | print("$command\n");
17 | system("$command");
18 | 
19 | $command = "evec2pca.perl 2 example.evec example.ind example.pca";
20 | print("$command\n");
21 | system("$command");
22 | 


--------------------------------------------------------------------------------
/include/exclude.h:
--------------------------------------------------------------------------------
 1 | #ifndef _EXCLUDE_
 2 | #define _EXCLUDE_
 3 | 
 4 | #include <admutils.h>
 5 | #include <mcio.h>
 6 | #include <stdio.h>
 7 | 
 8 | /*  file name parameter : xregionname */
 9 | /*  HW filter parameter : nhwfilter (-1 means no-filter) */
10 | /*  maximum number of regions : 1000
11 |     closed intervals in physical position include endpoints */
12 | /*  read file and set ignore flag for SNPs */
13 | void excluderegions (char *xregionname, SNP ** snps, int nsnps,
14 | 		     char *deletesnpoutname);
15 | void hwfilter (SNP ** snps, int nsnps, int nindiv, double nhwfilter,
16 | 	       char *deletesnpoutname);
17 | 
18 | #endif
19 | 


--------------------------------------------------------------------------------
/src/README:
--------------------------------------------------------------------------------
 1 | Partial list of Authors of eigensoft
 2 | 
 3 | Nick Patterson
 4 | Alkes Price
 5 | Sam Pollack 
 6 | Sasha Gusev
 7 | Chris Chang 
 8 | Kevin Galinsky
 9 | 
10 | ========================================================
11 | fastmode:  YES  
12 | optional parameters 
13 |  fastdim:   (default 2 * numeigs)
14 |  fastiter:  (default  numeigs)
15 | 
16 | In fastmode 
17 |  A .evec and .eval file wiil be produced on request but:
18 | 
19 | NO outlier removal
20 | NO Tracy-Widom significance testing
21 | NO lsqproject mode 
22 | NO F_st.   (Use fstonly: YES) if this is wanted.
23 | 
24 | If fastmode and easymode set, NO projection
25 | 
26 | 


--------------------------------------------------------------------------------
/POPGEN/elldemo/aaa:Lithuanian:
--------------------------------------------------------------------------------
 1 |         LithuanianF1     0.030379      0.322035       Lithuanian
 2 |           lithuania3     0.031552      0.282867       Lithuanian
 3 |          lithuania10     0.023618      0.342455       Lithuanian
 4 |           lithuania9     0.032949      0.338347       Lithuanian
 5 |         LithuanianA1     0.023348      0.316050       Lithuanian
 6 |         LithuanianE2     0.031801      0.273176       Lithuanian
 7 |           lithuania1     0.015194      0.254946       Lithuanian
 8 |           lithuania8     0.030478      0.305058       Lithuanian
 9 |           lithuania2     0.032994      0.334543       Lithuanian
10 |         LithuanianD1     0.016299      0.282471       Lithuanian
11 | 


--------------------------------------------------------------------------------
/src/nicksrc/gauss.c:
--------------------------------------------------------------------------------
 1 | #include  "ranmath.h"
 2 | 
 3 | double
 4 | gauss ()
 5 | {
 6 | 
 7 | /** 
 8 |  Numer alg. in C pp 289 ff
 9 | */
10 | 
11 |   static int iset = 0;
12 |   static double gset;
13 |   double v1, v2, rsq, fac;
14 | 
15 |   if (iset == 1) {
16 |     iset = 0;
17 |     return gset;
18 |   }
19 | 
20 |   do {
21 |     v1 = 2.0 * DRAND2 () - 1.0;
22 |     v2 = 2.0 * DRAND2 () - 1.0;
23 |     rsq = v1 * v1 + v2 * v2;
24 |   } while (rsq >= 1.0 || rsq == 0.0);
25 | 
26 |   fac = sqrt (-2.0 * log (rsq) / rsq);
27 |   gset = v1 * fac;
28 |   iset = 1;
29 |   return v2 * fac;
30 | 
31 | }
32 | 
33 | void
34 | gaussa (double *a, int n)
35 | {
36 |   int i;
37 |   for (i = 0; i < n; i++)
38 |     a[i] = gauss ();
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/include/workqueue.h:
--------------------------------------------------------------------------------
 1 | #ifndef WORKQUEUE_H
 2 | #define WORKQUEUE_H
 3 | 
 4 | #ifdef HAVE_PTHREAD
 5 | #include <pthread.h>
 6 | #endif
 7 | 
 8 | typedef struct work_task_t
 9 | {
10 | #ifdef HAVE_PTHREAD
11 |   pthread_t thread;
12 | #endif
13 |   void *(*start_routine) (void *);
14 |   void *argument;
15 | } work_task;
16 | 
17 | typedef struct work_queue_t
18 | {
19 |   // Which tasks are pending?
20 |   work_task *tasks;
21 |   int num_tasks;
22 | } work_queue;
23 | 
24 | void create_work_queue (work_queue ** the_queue);
25 | void destroy_work_queue (work_queue ** the_queue);
26 | void queue_task (work_queue * queue, const work_task * task);
27 | void wait_for_queue_to_complete (const work_queue * queue);
28 | 
29 | #endif // WORKQUEUE_H
30 | 


--------------------------------------------------------------------------------
/include/ldsubs.h:
--------------------------------------------------------------------------------
 1 | 
 2 | typedef struct
 3 | {
 4 |   double S0;
 5 |   double S1;
 6 |   double S2;
 7 |   double S11;
 8 |   double S12;
 9 |   double S22;
10 |   double m1;
11 |   double m2;
12 |   double v11;
13 |   double v12;
14 |   double v22;
15 |   double corr;
16 |   double Z;
17 | } CORR;
18 | 
19 | int calccorr (CORR * corrpt, int mode, int ztrans);
20 | void printcorr (CORR * corrpt);
21 | void clearcorr (CORR * corrpt);
22 | void addcorr (CORR * corrpt, double x1, double x2);
23 | void addcorrn (CORR * corrpt, double x1, double x2, double yn);
24 | void minuscorr (CORR * out, CORR * c1, CORR * c2);
25 | 
26 | double lddip (double *xc);
27 | double zdip (double *xc);
28 | void setzdipmode (int mode);
29 | void setzdphasedmode (int mode);
30 | 


--------------------------------------------------------------------------------
/include/xsearch.h:
--------------------------------------------------------------------------------
 1 | #include <search.h>  
 2 | 
 3 | int xfindit(char *ss)   ; 
 4 | int  xloadsearchx(char **ss, int n)   ; 
 5 | int finddup(char **ss, int n) ;
 6 | void xloadsearch(char **ss, int n)   ; 
 7 | void xdestroy()  ;
 8 | 
 9 | void xhcreate (int n)  ;
10 | void xhdestroy()  ;
11 | ENTRY *xhsearch(ENTRY item, ACTION act)  ;
12 | 
13 | int xlookup(char *key, ACTION act)  ;
14 | int xhash (char *key)  ;
15 | int xhash1(int ww)  ;
16 | int xhash2 (int x)  ;
17 | int xcshift(int x, int shft)  ;
18 | int stringhash(char *key) ;
19 | void setfancyhash(int val) ;
20 | long xlhash (long x) ;  
21 | int xshash (int x) ;  
22 | int fnv_hash(char *strng) ;
23 | 
24 | #define FNV_PRIME           0x01000193
25 | #define FNV_OFFSET_BASIS    0x811c9dc5
26 | 
27 | 


--------------------------------------------------------------------------------
/include/sortit.h:
--------------------------------------------------------------------------------
 1 | 
 2 | void sortit(double *a, int *ind, int len)  ;
 3 | double median (double *a, int n) ;
 4 | int compit (int *a1, int *a2) ; 
 5 | void isortit(int *a, int *ind, int len)  ;
 6 | void lsortit(long *a, int *ind, int len)  ;
 7 | int icompit (int *a1, int *a2) ; 
 8 | int lcompit (int *a1, int *a2) ; 
 9 | void invperm(int *a, int *b, int n) ; 
10 | int ipcompit (int *a1, int *a2)  ;
11 | int comparr(double *a, double *b, int len)  ;
12 | int compiarr(int *a, int *b, int len)  ;
13 | int complarr(long *a, long *b, int len)  ;
14 | void ipsortit(int **a, int *ind, int len, int rlen)  ;
15 | void ipsortitp(int **a, int *ind, int len, int rlen, int *pp)  ;
16 | void setorder (int *pp, int rlen) ;
17 | void mkirank(int *rank, int *xin, int n) ;
18 | void mkrank(int *rank, double *xin, int n) ;
19 | 
20 | 


--------------------------------------------------------------------------------
/EIGENSTRAT/example.oldstyle.perl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | $ENV{'PATH'} = "../bin:$ENV{'PATH'}"; 
 4 | # MUST put pca bin directory in path for smartpca.perl to work
 5 | 
 6 | $command = "pca";
 7 | $command .= " -i example.geno ";
 8 | $command .= " -o example.pca ";
 9 | $command .= " -e example.eval ";
10 | $command .= " -l example.log ";
11 | $command .= " -k 2 ";
12 | $command .= " -m 5 ";
13 | $command .= " -t 2 ";
14 | $command .= " -s 6.0 ";
15 | print("$command\n");
16 | system("$command");
17 | 
18 | $command = "eigenstrat"; # or eigenstrat.big.perl for large data sets
19 | $command .= " -i example.geno ";
20 | $command .= " -j example.pheno ";
21 | $command .= " -p example.pca ";
22 | $command .= " -l 1 ";
23 | $command .= " -o example.chisq ";
24 | print("$command\n");
25 | system("$command");
26 | 
27 | $command = "gc.perl example.chisq example.chisq.GC";
28 | print("$command\n");
29 | system("$command");
30 | 


--------------------------------------------------------------------------------
/include/getpars.h:
--------------------------------------------------------------------------------
 1 | typedef struct {
 2 |  int numpars ;
 3 |  FILE *fx ;
 4 |  char **ppars ;
 5 |  char **pdata ;
 6 | }  phandle ;
 7 | 
 8 | void writepars(phandle  *pp) ;
 9 | void closepars(phandle  *pp) ;
10 | phandle *openpars(char *fname) ;
11 | 
12 | int getlongstring(phandle *pp, char *parname, char **kret)  ;
13 | // whole of line
14 | int getstring(phandle *pp, char *parname, char **kret)  ;
15 | int getint(phandle *pp, char *parname, int *kret)  ;
16 | int getints(phandle *pp, char *parname, int *aint, int nint) ;
17 | int getintss(phandle *pp, char *parname, int *aint, int *xint) ;
18 | 
19 | int getdbl(phandle *pp, char *parname, double *dbl)  ;
20 | int getdbls(phandle *pp, char *parname, double *dbl, int ndbl) ;
21 | int getdblss(phandle *pp, char *parname, double *dbl, int *ndbl) ;
22 | int subst(char *outstr, char *instr, char *ins, char *outs)  ;
23 | void dostrsub(phandle *pp)  ; 
24 | int upstring (char *ss)  ;
25 | void subcolon(char *ss)  ;
26 | 


--------------------------------------------------------------------------------
/include/regsubs.h:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <string.h>
 3 | #include <unistd.h>
 4 | #include <math.h>
 5 | #include <nicklib.h>
 6 | 
 7 | double regressit (double *ans, double *eq, double *rhs, int m, int n);
 8 | void regressitall (char **vname, double *eq, double *rhs, int m, int n);
 9 | void add1 (int *a, int *b, int n);
10 | 
11 | void ptoz (double *p, double *z, int n);
12 | void ztop (double *p, double *z, int n);
13 | double logregressit (double *ans, double *eq, double **rhs, int neq, int nv);
14 | double logrscore (double *eq, double **rhs, int neq, int nv);
15 | 
16 | void calcgh (double *grad, double *hess, double *eq, double *z,
17 | 	     double *n0, double *n1, int neq, int nv);
18 | 
19 | double zlike (double *eq, double *n0, double *n1,
20 | 	      double *ans, int neq, int nv);
21 | 
22 | void squish (double *xmat, double *mat, int nrow, int oldc, int newc);
23 | 
24 | void
25 | calcres (double *res, double *ans, double *eq, double *rhs, int neq, int nv);
26 | 


--------------------------------------------------------------------------------
/include/mcmcpars.h:
--------------------------------------------------------------------------------
 1 | 
 2 | double thp1 = 1.0, thp2 = 5.0;	/* params for theta */
 3 | double thxp1 = 1.0, thxp2 = 10.0;	/* params for theta X  */
 4 | double thxp0 = 40;		/* cross term */
 5 | /* -1 is default value (=0 logically)  */
 6 | 
 7 | double lp1 = 10.2, lp2 = 2;
 8 | double lxp1 = 10.2, lxp2 = 2;
 9 | 
10 | double priorlmean = 6;
11 | double priorlmsig = 5;
12 | /* hyperprior on mean, s.dev for gamma prior on lambda */
13 | 
14 | double qtrbase = 0.0;
15 | 
16 | double loclip = -20.0;
17 | double hiclip = 15.0;
18 | 
19 | double a1 = 2, b1 = 8;
20 | double aa2 = 2, bb2 = 14, cc2 = 85;
21 | double p1 = 18, psi1 = 3;
22 | /* for toys */
23 | 
24 | double muval = 0.0, tmumean = 0.2, muval1;
25 | /* for bridge sampler */
26 | 
27 | int pubxindiv = -1;
28 | int alkesmode = NO;
29 | int malexhet = NO;
30 | int familynames = YES;
31 | 
32 | int decim = 0, dmindis = 200000, dmaxdis = 500000;	// decimation parameters
33 | int hashcheck = YES;
34 | int outputall = NO;
35 | int sevencolumnped = NO;
36 | 
37 | FILE *fstdetails = NULL;
38 | 


--------------------------------------------------------------------------------
/EIGENSTRAT/example.perl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | $ENV{'PATH'} = "../bin:$ENV{'PATH'}"; 
 4 | # MUST put smartpca bin directory in path for smartpca.perl to work
 5 | 
 6 | $command = "smartpca.perl";
 7 | $command .= " -i example.geno ";
 8 | $command .= " -a example.snp ";
 9 | $command .= " -b example.ind " ;
10 | $command .= " -k 2 ";
11 | $command .= " -o example.pca ";
12 | $command .= " -p example.plot ";
13 | $command .= " -e example.eval ";
14 | $command .= " -l example.log ";
15 | $command .= " -m 5 ";
16 | $command .= " -t 2 ";
17 | $command .= " -s 6.0 ";
18 | print("$command\n");
19 | system("$command");
20 | 
21 | $command = "smarteigenstrat.perl "; 
22 | $command .= " -i example.geno ";
23 | $command .= " -a example.snp ";
24 | $command .= " -b example.ind ";
25 | $command .= " -p example.pca ";
26 | $command .= " -k 1 ";
27 | $command .= " -o example.chisq ";
28 | $command .= " -l example.log ";
29 | print("$command\n");
30 | system("$command");
31 | 
32 | $command = "gc.perl example.chisq example.chisq.GC";
33 | print("$command\n");
34 | system("$command");
35 | 


--------------------------------------------------------------------------------
/include/xpsubs.h:
--------------------------------------------------------------------------------
 1 | #include <nicklib.h>
 2 | #include <math.h>
 3 | #include "admutils.h"
 4 | 
 5 | extern int verbose;
 6 | 
 7 | double xpest (double **gg, int *gobs, int *na, int *nb,
 8 | 	      int neq, double *ppa, double *ppb);
 9 | 
10 | void mk2from3ml (double *xd, double *xc, double p, double pp);
11 | void mk2from2 (double *xd, double *xc);
12 | 
13 | void loadpprob (double *pprob, double pa, double pb);
14 | void gen3 (double *ww, double a, double b);
15 | 
16 | double xpest2like (double **gg, int *gobs, int *na, int *nb,
17 | 		   int *iscasearr,
18 | 		   int neq, double ppa, double ppb, double risk);
19 | 
20 | 
21 | // clean up SANS when finalized 
22 | 
23 | typedef struct
24 | {
25 |   SNP *cupt;
26 |   int numsamps;
27 |   double admbayessc;
28 |   double *baymodelsc;
29 |   double admfsc;
30 |   double admzscore;
31 |   double admbsc;
32 |   double admfccsc;
33 |   double simpsc[2];
34 |   double admyl[3];
35 |   double lrmax;
36 |   double lrsig;
37 |   double maxlod;
38 | /* now start of fine-mapping scores */
39 |   double gscore;
40 |   double gcheck;
41 |   double gbayes;
42 | } SANS;
43 | 


--------------------------------------------------------------------------------
/include/not-thread-h:
--------------------------------------------------------------------------------
 1 | #ifndef _THREAD_ 
 2 | #define _THREAD_
 3 | 
 4 | #include <admutils.h>
 5 | 
 6 | // globals defined in smartpca.c
 7 | extern int ldregress;
 8 | extern int minallelecnt;
 9 | extern int maxmissing;
10 | extern int numthreads;
11 | extern double ldlimit;
12 | 
13 | extern pthread_mutex_t mutex_xtx;
14 | extern pthread_mutex_t mutex_nkill;
15 | 
16 | 
17 | typedef struct thread_args  {
18 | 
19 |   int mythreadnum;
20 | 
21 |   SNP **xsnplist;
22 |   double *xmean;
23 |   double *xfancy;
24 |   int *xindex;
25 |   int *xtypes;
26 | 
27 |   int numindivs;
28 |   int nrows;
29 |   int ncols;
30 |   int blocksize;
31 |   int weightmode;
32 | 
33 |   int nkill;     // out
34 |   int nused;     // out
35 | 
36 |   double *XTX;
37 | 
38 | } thread_args_t;
39 | 
40 | 
41 | void *thread_function(void *args);
42 | 
43 | thread_args_t *pack_args(int mythreadnum, SNP **xsnplist, double *xmean, double *xfancy, int *xindex, int *xtypes, double *XTX, 
44 |   int numindivs, int nrows, int ncols, int blocksize, int weightmode); 
45 | 
46 | void unpack_args(thread_args_t *ta, int *nkill, int *nused);
47 | 
48 | #endif
49 | 
50 | 


--------------------------------------------------------------------------------
/EIGENSTRAT/example.QTL.perl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | $ENV{'PATH'} = "../bin:$ENV{'PATH'}"; 
 4 | # MUST put smartpca bin directory in path for smartpca.perl to work
 5 | 
 6 | $command = "smartpca.perl";
 7 | $command .= " -i example.geno ";
 8 | $command .= " -a example.snp ";
 9 | $command .= " -b example.QTL.ind " ;
10 | $command .= " -k 2 ";
11 | $command .= " -o example.pca ";
12 | $command .= " -p example.plot ";
13 | $command .= " -e example.eval ";
14 | $command .= " -l example.log ";
15 | $command .= " -m 5 ";
16 | $command .= " -t 2 ";
17 | $command .= " -s 6.0 ";
18 | $command .= " -q YES ";
19 | print("$command\n");
20 | system("$command");
21 | 
22 | $command = "smarteigenstrat.perl "; 
23 | $command .= " -i example.geno ";
24 | $command .= " -a example.snp ";
25 | $command .= " -b example.QTL.ind ";
26 | $command .= " -p example.pca ";
27 | $command .= " -k 1 ";
28 | $command .= " -o example.QTL.chisq ";
29 | $command .= " -l example.log ";
30 | $command .= " -q YES ";
31 | print("$command\n");
32 | system("$command");
33 | 
34 | $command = "gc.perl example.QTL.chisq example.QTL.chisq.GC";
35 | print("$command\n");
36 | system("$command");
37 | 


--------------------------------------------------------------------------------
/src/eigensrc/pcatoy.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <nicklib.h>
 3 | #include <stdlib.h>
 4 | #include "eigsubs.h"
 5 | #include <unistd.h>
 6 | 
 7 | int
 8 | main ()
 9 | {
10 |   int NSAMPLES, n, k;
11 |   double *eval, *evec, *XTX;
12 | 
13 |   NSAMPLES = 2;
14 | 
15 |   /* malloc */
16 |   if ((eval = (double *) malloc (NSAMPLES * sizeof (*eval))) == NULL) {
17 |     fprintf (stderr, "CM\n");
18 |     exit (1);
19 |   }
20 |   if ((evec =
21 |        (double *) malloc (NSAMPLES * NSAMPLES * sizeof (*evec))) == NULL) {
22 |     fprintf (stderr, "CM\n");
23 |     exit (1);
24 |   }
25 |   if ((XTX = (double *) malloc (NSAMPLES * NSAMPLES * sizeof (*XTX))) == NULL) {
26 |     fprintf (stderr, "CM\n");
27 |     exit (1);
28 |   }
29 | 
30 |   XTX[0] = 1;
31 |   XTX[1] = 0;
32 |   XTX[2] = 0;
33 |   XTX[3] = 1;			/* 2x2 identity matrix */
34 | 
35 |   eigvecs (XTX, eval, evec, NSAMPLES);	/* eigenvector k is evec[k*NSAMPLES+n] */
36 | 
37 |   /* print eval and evec */
38 |   printf ("The eigenvectors of the 2x2 identity matrix are:\n");
39 |   for (n = 0; n < NSAMPLES; n++) {
40 |     for (k = 0; k < NSAMPLES; k++) {
41 |       printf (" ");
42 |       printf ("%.02f", evec[k * NSAMPLES + n]);
43 |     }
44 |     printf ("\n");
45 |   }
46 |   return 0;
47 | }
48 | 


--------------------------------------------------------------------------------
/include/kjg_fpca.h:
--------------------------------------------------------------------------------
 1 | /** @file kjg_fpca.h
 2 |  * @brief Runs fastPCA.
 3 |  * This module also has methods to multiply a genotype matrix against the GSL
 4 |  * matrices.
 5 |  */
 6 | 
 7 | #ifndef KJG_FPCA_H_
 8 | #define KJG_FPCA_H_
 9 | 
10 | #include <gsl/gsl_matrix.h>
11 | 
12 | extern size_t KJG_FPCA_ROWS;	// number of rows to process at once
13 | 
14 | /** Performs a fast PCA
15 |  * @param *eval eigenvalues
16 |  * @param *evec eigenvectors
17 |  * @param K number of eigenvalues/vectors
18 |  * @param L width of projection matrix
19 |  * @param I iterations to do exponentiation
20 |  */
21 | 
22 | void kjg_fpca (size_t K, size_t L, size_t I, double *eval, double *evec);
23 | 
24 | /** Multiplies B=X*A1 and A2 = XT*B = XT*X*A1
25 |  * @param *A1 some matrix
26 |  * @param *B intermediate matrix
27 |  * @param *A2 next matrix
28 |  */
29 | 
30 | void kjg_fpca_XTXA (const gsl_matrix * A1, gsl_matrix * B, gsl_matrix * A2);
31 | 
32 | /** Multiplies B = X*A
33 |  * @param *A some matrix
34 |  * @param *B another matrix
35 |  */
36 | 
37 | void kjg_fpca_XA (const gsl_matrix * A, gsl_matrix * B);
38 | 
39 | /** Multiplies A = XT*B
40 |  * @param *B some matrix
41 |  * @param *A another matrix
42 |  */
43 | 
44 | void kjg_fpca_XTB (const gsl_matrix * B, gsl_matrix * A);
45 | 
46 | #endif /* KJG_FPCA_H_ */
47 | 


--------------------------------------------------------------------------------
/POPGEN/elldemo/q1:
--------------------------------------------------------------------------------
 1 |     0.950  ::sample:                I3122
 2 |    -0.094106295    -0.019737790 
 3 |    -0.094106436    -0.019739079 
 4 |     0.000059549     0.000003096
 5 |     0.000003096     0.000145052
 6 | ellcoords: -0.0941 -0.0197     0.029491478     0.018871080    -1.534647054 ::     0.950
 7 | sample:               I11442
 8 |    -0.061346222    -0.019552895 
 9 |    -0.061343762    -0.019555191 
10 |     0.000060178     0.000012115
11 |     0.000012115     0.000150393
12 | ellcoords: -0.0613 -0.0196     0.030177005     0.018734409    -1.439596300 ::     0.950
13 | sample:                I7796
14 |    -0.046873995    -0.045907875 
15 |    -0.046872172    -0.045907252 
16 |     0.000123822     0.000000395
17 |     0.000000395     0.000280934
18 | ellcoords: -0.0469 -0.0459     0.041026961     0.027237240    -1.568280598 ::     0.950
19 | sample:                I7800
20 |    -0.066846646    -0.031999605 
21 |    -0.066842606    -0.031999713 
22 |     0.000320876    -0.000001283
23 |    -0.000001283     0.000713629
24 | ellcoords: -0.0668 -0.0320     0.065388904     0.043846220     1.567529274 ::     0.950
25 | sample:                I7807
26 |    -0.073119168    -0.032956599 
27 |    -0.073121562    -0.032957419 
28 |     0.000118724     0.000007414
29 |     0.000007414     0.000275285
30 | ellcoords: -0.0731 -0.0330     0.040638127     0.026631378    -1.523582242 ::     0.950
31 | 


--------------------------------------------------------------------------------
/POPGEN/elldemo/q2:
--------------------------------------------------------------------------------
 1 |     0.990 ::  sicaa.evec
 2 | sample:                I3122
 3 |    -0.094106295    -0.019737790 
 4 |    -0.094106436    -0.019739079 
 5 |     0.000091541     0.000004759
 6 |     0.000004759     0.000222980
 7 | ellcoords: -0.0941 -0.0197     0.036565194     0.023397427    -1.534647054 ::     0.990
 8 | sample:               I11442
 9 |    -0.061346222    -0.019552895 
10 |    -0.061343762    -0.019555191 
11 |     0.000092508     0.000018624
12 |     0.000018624     0.000231191
13 | ellcoords: -0.0613 -0.0196     0.037415149     0.023227974    -1.439596300 ::     0.990
14 | sample:                I7796
15 |    -0.046873995    -0.045907875 
16 |    -0.046872172    -0.045907252 
17 |     0.000190345     0.000000607
18 |     0.000000607     0.000431864
19 | ellcoords: -0.0469 -0.0459     0.050867535     0.033770263    -1.568280598 ::     0.990
20 | sample:                I7800
21 |    -0.066846646    -0.031999605 
22 |    -0.066842606    -0.031999713 
23 |     0.000493265    -0.000001972
24 |    -0.000001972     0.001097022
25 | ellcoords: -0.0668 -0.0320     0.081072843     0.054363011     1.567529274 ::     0.990
26 | sample:                I7807
27 |    -0.073119168    -0.032956599 
28 |    -0.073121562    -0.032957419 
29 |     0.000182508     0.000011397
30 |     0.000011397     0.000423180
31 | ellcoords: -0.0731 -0.0330     0.050385437     0.033019081    -1.523582242 ::     0.990
32 | 


--------------------------------------------------------------------------------
/POPGEN/elldemo/q3:
--------------------------------------------------------------------------------
 1 |     0.950 ::  sicaa.evec
 2 | sample:                I3122
 3 |    -0.094106295    -0.019737790 
 4 |    -0.094106436    -0.019739079 
 5 |     0.000059549     0.000003096
 6 |     0.000003096     0.000145052
 7 | ellcoords: -0.0941 -0.0197     0.029491478     0.018871080    -1.534647054 ::     0.950
 8 | sample:               I11442
 9 |    -0.061346222    -0.019552895 
10 |    -0.061343762    -0.019555191 
11 |     0.000060178     0.000012115
12 |     0.000012115     0.000150393
13 | ellcoords: -0.0613 -0.0196     0.030177005     0.018734409    -1.439596300 ::     0.950
14 | sample:                I7796
15 |    -0.046873995    -0.045907875 
16 |    -0.046872172    -0.045907252 
17 |     0.000123822     0.000000395
18 |     0.000000395     0.000280934
19 | ellcoords: -0.0469 -0.0459     0.041026961     0.027237240    -1.568280598 ::     0.950
20 | sample:                I7800
21 |    -0.066846646    -0.031999605 
22 |    -0.066842606    -0.031999713 
23 |     0.000320876    -0.000001283
24 |    -0.000001283     0.000713629
25 | ellcoords: -0.0668 -0.0320     0.065388904     0.043846220     1.567529274 ::     0.950
26 | sample:                I7807
27 |    -0.073119168    -0.032956599 
28 |    -0.073121562    -0.032957419 
29 |     0.000118724     0.000007414
30 |     0.000007414     0.000275285
31 | ellcoords: -0.0731 -0.0330     0.040638127     0.026631378    -1.523582242 ::     0.950
32 | 


--------------------------------------------------------------------------------
/POPGEN/elldemo/aaa.xtxt:
--------------------------------------------------------------------------------
 1 | set terminal postscript color noenhanced
 2 | set title  " Sicily EBA projected conf: 0.95" 
 3 | set key outside
 4 | set xlabel  "eigenvector 1" 
 5 | set ylabel  "eigenvector 2" 
 6 | set parametric
 7 | fx(a, b, phi, m0, t) = a*cos(phi)*cos(t)+b*sin(phi)*sin(t)  + m0
 8 | fy(a, b, phi, m1, t) = -a*sin(phi)*cos(t)+b*cos(phi)*sin(t)  + m1
 9 | plot  "aaa:Lithuanian" using ($2)*1:($3)*1 title "Lithuanian" , \
10 |  "aaa:Georgian" using ($2)*1:($3)*1 title "Georgian" , \
11 |  "aaa:Sardinian" using ($2)*1:($3)*1 title "Sardinian" , \
12 |  "aaa:Sicily_EBA" using ($2)*1:($3)*1 title "Sicily_EBA" lt rgb "black" , \
13 |  m0 = -0.0941, m1 = -0.0197, a = 0.029491478, b = 0.018871080, phi = -1.534647054, fx(a, b, phi, m0, t), fy(a, b, phi, m1, t) with lines title "I3122", \
14 |  m0 = -0.0613, m1 = -0.0196, a = 0.030177005, b = 0.018734409, phi = -1.439596300, fx(a, b, phi, m0, t), fy(a, b, phi, m1, t) with lines title "I11442", \
15 |  m0 = -0.0469, m1 = -0.0459, a = 0.041026961, b = 0.027237240, phi = -1.568280598, fx(a, b, phi, m0, t), fy(a, b, phi, m1, t) with lines title "I7796", \
16 |  m0 = -0.0668, m1 = -0.0320, a = 0.065388904, b = 0.043846220, phi = 1.567529274, fx(a, b, phi, m0, t), fy(a, b, phi, m1, t) with lines title "I7800", \
17 |  m0 = -0.0731, m1 = -0.0330, a = 0.040638127, b = 0.026631378, phi = -1.523582242, fx(a, b, phi, m0, t), fy(a, b, phi, m1, t) with lines title "I7807"
18 | ## pause 9999
19 | 


--------------------------------------------------------------------------------
/POPGEN/elldemo/q0:
--------------------------------------------------------------------------------
 1 |     0.950 :: sicaa.evec 
 2 | sample:                I3122
 3 |    -0.094106295    -0.019737790 
 4 |    -0.094106436    -0.019739079 
 5 | 
 6 |     0.000059549     0.000003096 
 7 |     0.000003096     0.000145052 
 8 | ellcoords:    -0.0941    -0.0197     0.029491478     0.018871080    -1.534647054  ::     0.950
 9 | sample:               I11442
10 |    -0.061346222    -0.019552895 
11 |    -0.061343762    -0.019555191 
12 | 
13 |     0.000060178     0.000012115 
14 |     0.000012115     0.000150393 
15 | ellcoords:    -0.0613    -0.0196     0.030177005     0.018734409    -1.439596300  ::     0.950
16 | sample:                I7796
17 |    -0.046873995    -0.045907875 
18 |    -0.046872172    -0.045907252 
19 | 
20 |     0.000123822     0.000000395 
21 |     0.000000395     0.000280934 
22 | ellcoords:    -0.0469    -0.0459     0.041026961     0.027237240    -1.568280598  ::     0.950
23 | sample:                I7800
24 |    -0.066846646    -0.031999605 
25 |    -0.066842606    -0.031999713 
26 | 
27 |     0.000320876    -0.000001283 
28 |    -0.000001283     0.000713629 
29 | ellcoords:    -0.0668    -0.0320     0.065388904     0.043846220     1.567529274  ::     0.950
30 | sample:                I7807
31 |    -0.073119168    -0.032956599 
32 |    -0.073121562    -0.032957419 
33 | 
34 |     0.000118724     0.000007414 
35 |     0.000007414     0.000275285 
36 | ellcoords:    -0.0731    -0.0330     0.040638127     0.026631378    -1.523582242  ::     0.950
37 | 


--------------------------------------------------------------------------------
/POPGEN/elldemo/aaa:Georgian:
--------------------------------------------------------------------------------
 1 |              GEO-002     0.124960     -0.019309         Georgian
 2 |              GEO-005     0.098987     -0.025584         Georgian
 3 |              GEO-010     0.124701     -0.043397         Georgian
 4 |              GEO-015     0.131265     -0.027730         Georgian
 5 |              GEO-020     0.135953     -0.053088         Georgian
 6 |              GEO-028     0.134414     -0.036569         Georgian
 7 |              GEO-031     0.157799     -0.025231         Georgian
 8 |              GEO-032     0.146574     -0.026196         Georgian
 9 |              GEO-039     0.158120     -0.022020         Georgian
10 |              GEO-051     0.144490     -0.042876         Georgian
11 |              GEO-061     0.153842     -0.030906         Georgian
12 |              GEO-082     0.158142     -0.028626         Georgian
13 |               GEO001     0.156173     -0.043278         Georgian
14 |                 mg43     0.159324     -0.022091         Georgian
15 |                 mg47     0.140898     -0.038999         Georgian
16 |                 mg22     0.138969     -0.035963         Georgian
17 |                 mg49     0.153820     -0.036044         Georgian
18 |                 mg23     0.140328     -0.032225         Georgian
19 |                 mg62     0.158555     -0.026727         Georgian
20 |                 mg27     0.154282     -0.028294         Georgian
21 |                 mg31     0.141187     -0.024043         Georgian
22 |                 mg34     0.149388     -0.030698         Georgian
23 |                 mg40     0.162562     -0.032568         Georgian
24 | 


--------------------------------------------------------------------------------
/src/nicksrc/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2009-2016, Broad Institute, Inc.
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | *
 8 |  Redistributions of source code must retain the above copyright notice,
 9 | this list of conditions and the following disclaimer.
10 | 
11 | 
12 | *
13 |  Redistributions in binary form must reproduce the above copyright notice,
14 |  this list of conditions and the following disclaimer in the documentation
15 |  and/or other materials provided with the distribution.
16 | 
17 | 
18 | *
19 |  Neither the name Broad Institute, Inc. nor the names of its
20 |  contributors may be used to endorse or promote products derived from
21 |  this software without specific prior written permission.
22 | 
23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
29 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
32 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
33 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2006-2016, Broad Institute, Inc. and Harvard Medical School
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | *
 8 |  Redistributions of source code must retain the above copyright notice,
 9 | this list of conditions and the following disclaimer.
10 | 
11 | 
12 | *
13 |  Redistributions in binary form must reproduce the above copyright notice,
14 |  this list of conditions and the following disclaimer in the documentation
15 |  and/or other materials provided with the distribution.
16 | 
17 | 
18 | *
19 |  Neither the name Broad Institute, Inc. Harvard University, nor the names of its
20 |  contributors may be used to endorse or promote products derived from
21 |  this software without specific prior written permission.
22 | 
23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
29 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
32 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
33 | 


--------------------------------------------------------------------------------
/src/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2006-2016, Broad Institute, Inc. and Harvard Medical School
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | *
 8 |  Redistributions of source code must retain the above copyright notice,
 9 | this list of conditions and the following disclaimer.
10 | 
11 | 
12 | *
13 |  Redistributions in binary form must reproduce the above copyright notice,
14 |  this list of conditions and the following disclaimer in the documentation
15 |  and/or other materials provided with the distribution.
16 | 
17 | 
18 | *
19 |  Neither the name Broad Institute, Inc. Harvard University, nor the names of its
20 |  contributors may be used to endorse or promote products derived from
21 |  this software without specific prior written permission.
22 | 
23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
29 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
32 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
33 | 


--------------------------------------------------------------------------------
/bin/evec2pca.perl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | ### translate .evec file to .pca file expected by eigenstrat program
 4 | ### Note: .evec file does not contain entries for outliers
 5 | ###       .pca  file does contain entries (set to all 0.0) for outliers
 6 | 
 7 | $k = $ARGV[0];
 8 | $evec = $ARGV[1]; 
 9 | $ind = $ARGV[2];  
10 | $pca = $ARGV[3];  
11 | 
12 | open(EVEC,$evec) || die("OOPS couldn't open file $evec for reading");
13 | open(IND,$ind) || die("OOPS couldn't open indiv file $ind for reading");
14 | open(PCA,">$pca") || die("OOPS couldn't open file $pca for writing");
15 | 
16 | print PCA ("$k\n"); # number of output eigenvectors/eigenvalues
17 | $line = <EVEC>; chomp($line); # eigvals line
18 | my @array = split(/[\t ]+/,$line);
19 | for($x=0; $x<$k; $x++) { printf PCA ("%.04f\n",$array[$x+2]); } # x-th eval
20 | while($line = <EVEC>)
21 | {
22 |   chomp($line);
23 |   $line = " " . $line;
24 |   my @array = split(/[\t ]+/,$line);
25 |   $l = @array; 
26 |   unless($l == 3+$k) { die("OOPS #evec in $evec is different from $k"); }
27 |   $sample = $array[1];
28 |   for($x=0; $x<$k; $x++) { $evecarray{$sample}[$x] = $array[$x+2]; }
29 |   $found{$sample} = 1;
30 | }
31 | 
32 | while($line = <IND>)
33 | {
34 |   chomp($line);
35 |   my @array = split(/[\t ]+/,$line);
36 |   $sample = $array[0];
37 |   if($sample eq "") { $sample = $array[1]; }
38 |   unless($found{$sample})
39 |   {
40 |     for($x=0; $x<$k; $x++) { $evecarray{$sample}[$x] = 0.0; }
41 |   }
42 |   for($x=0; $x<$k; $x++)
43 |   {
44 |     printf PCA (" ");
45 |     if($evecarray{$sample}[$x] > 0) { printf PCA (" "); }
46 |     printf PCA ("%.04f",$evecarray{$sample}[$x]);
47 |   }
48 |   printf PCA ("\n");
49 | }
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/CONVERTF/example.ancestrymapgeno:
--------------------------------------------------------------------------------
 1 |               rs0000              SAMPLE0   1
 2 |               rs0000              SAMPLE1   1
 3 |               rs0000              SAMPLE2   1
 4 |               rs0000              SAMPLE3   0
 5 |               rs0000              SAMPLE4   0
 6 |               rs1111              SAMPLE0   0
 7 |               rs1111              SAMPLE1   1
 8 |               rs1111              SAMPLE2   2
 9 |               rs1111              SAMPLE3   1
10 |               rs1111              SAMPLE4   2
11 |               rs2222              SAMPLE0   2
12 |               rs2222              SAMPLE1   1
13 |               rs2222              SAMPLE2   1
14 |               rs2222              SAMPLE3   0
15 |               rs2222              SAMPLE4   1
16 |               rs3333              SAMPLE0   0
17 |               rs3333              SAMPLE1   0
18 |               rs3333              SAMPLE2   1
19 |               rs3333              SAMPLE3   2
20 |               rs3333              SAMPLE4   2
21 |               rs4444              SAMPLE0   2
22 |               rs4444              SAMPLE1   1
23 |               rs4444              SAMPLE2   1
24 |               rs4444              SAMPLE3   0
25 |               rs4444              SAMPLE4   0
26 |               rs5555              SAMPLE0   0
27 |               rs5555              SAMPLE1   0
28 |               rs5555              SAMPLE2   1
29 |               rs5555              SAMPLE3   1
30 |               rs5555              SAMPLE4   1
31 |               rs6666              SAMPLE0   2
32 |               rs6666              SAMPLE1   2
33 |               rs6666              SAMPLE2   1
34 |               rs6666              SAMPLE3   1
35 |               rs6666              SAMPLE4   0
36 | 


--------------------------------------------------------------------------------
/POPGEN/elldemo/aaa:Sardinian:
--------------------------------------------------------------------------------
 1 |            HGDP00666    -0.116760     -0.030823        Sardinian
 2 |            HGDP00667    -0.116709     -0.017192        Sardinian
 3 |            HGDP00668    -0.137047     -0.027317        Sardinian
 4 |            HGDP00669    -0.126519     -0.024759        Sardinian
 5 |            HGDP00670    -0.135795     -0.031889        Sardinian
 6 |            HGDP00671    -0.128935     -0.029534        Sardinian
 7 |            HGDP00672    -0.124405     -0.032339        Sardinian
 8 |            HGDP00673    -0.120324     -0.035232        Sardinian
 9 |            HGDP00674    -0.137120     -0.026784        Sardinian
10 |            HGDP01062    -0.119201     -0.035362        Sardinian
11 |            HGDP01063    -0.141085     -0.036116        Sardinian
12 |            HGDP01064    -0.153843     -0.049779        Sardinian
13 |            HGDP01065    -0.145904     -0.024355        Sardinian
14 |            HGDP01066    -0.157103     -0.027543        Sardinian
15 |            HGDP01067    -0.144155     -0.050578        Sardinian
16 |            HGDP01068    -0.130147     -0.039257        Sardinian
17 |            HGDP01069    -0.140198     -0.030983        Sardinian
18 |            HGDP01070    -0.138745     -0.032343        Sardinian
19 |            HGDP01071    -0.116760     -0.041106        Sardinian
20 |            HGDP01072    -0.127844     -0.040682        Sardinian
21 |            HGDP01073    -0.143137     -0.035532        Sardinian
22 |            HGDP01074    -0.138522     -0.032142        Sardinian
23 |            HGDP01075    -0.145153     -0.023029        Sardinian
24 |            HGDP01076    -0.136974     -0.040912        Sardinian
25 |            HGDP01077    -0.119744     -0.032696        Sardinian
26 |            HGDP01078    -0.124987     -0.031961        Sardinian
27 |            HGDP01079    -0.128279     -0.021665        Sardinian
28 | 


--------------------------------------------------------------------------------
/include/linsubs.h:
--------------------------------------------------------------------------------
 1 | #include  <stdio.h>
 2 | #include <string.h>
 3 | #include <unistd.h>
 4 | #include <math.h>
 5 | 
 6 | void bal(double *a, double *b, int n) ;
 7 | 
 8 | /* linear algebra */
 9 | void mulmat(double *a, double *b, double *c, int a1, int a2, int a3) ;
10 | void xmultx(double *a, double *b, int m, int n) ;
11 | void txmulx(double *a, double *b, int m, int n) ;
12 | 
13 | int solvit (double *prod, double *rhs,int n, double *ans);
14 | int  solvitfix (double *prod, double *rhs, int n, double *ans, int *vfix, double *vvals, int nfix) ;
15 | int  oldsolvitfix (double *prod, double *rhs, int n, double *ans, int *vfix, double *vvals, int nfix) ;
16 | double pdinv(double *cinv, double *coeff, int n) ;
17 | 
18 | /* numer recipes p 97 */
19 | double logdet(double *mat, int n) ;
20 | int choldc (double *a, int n, double p[]);
21 | void cholsl (double *a, int n, double p[], double b[], double x[]);
22 | int isposdef (double *a, int n) ;
23 | void cholesky(double *cf, double *a, int n) ;
24 | void pmat(double *mat, int n)   ;
25 | void imulmat(int *a, int *b, int *c, int a1, int a2, int a3) ;
26 | int linsolv(int n, double* pfMatr, double* pfVect, double* sol) ; // Developer: Henry Guennadi Levkin
27 | 
28 | double qval(double *vv, double *q, double *l, int n) ; 
29 | void qgrad(double *grad, double *vv, double *q, double *l, int n) ;
30 | double mquad(double y0, double y1, double y2, double *pmx) ;
31 | double qminpos(double *vv, double *q, double *l, int n)  ;
32 | double qminposfix(double *vv, double *q, double *l, int n, int *fixlist, double *fixvals, int nfix)  ;
33 | double qminposfixc(double *vv, double *q, double *l, int n, int *fixlist, double *fixvals, int nfix, int *constraint)  ;
34 | double qmin(double *vv, double *q, double *l, int n) ;
35 | double qminfix(double *vv, double *q, double *l, int n, int *fixlist, double *fixvals, int nfix)  ;;
36 | double qmpc (double *vnew, double *vold, double *q, double *l, int *dead, int level, int *constraint, int n) ;
37 | double qmp (double *vnew, double *vold, double *q, double *l, int *dead, int level, int n) ; 
38 | 


--------------------------------------------------------------------------------
/POPGEN/example.log:
--------------------------------------------------------------------------------
 1 | parameter file: par.example
 2 | ### THE INPUT PARAMETERS
 3 | ##PARAMETER NAME: VALUE
 4 | genotypename: ../CONVERTF/example.ped
 5 | snpname: ../CONVERTF/example.map
 6 | indivname: ../CONVERTF/example.ped
 7 | evecoutname: example.evec
 8 | evaloutname: example.eval
 9 | altnormstyle: NO
10 | numoutevec: 2
11 | familynames: NO
12 | grmoutname: grmjunk
13 | ## smartpca version: 13050
14 | norm used
15 | 
16 | genotype file processed
17 | number of samples used: 5 number of snps used: 7
18 | Using 2 threads, and partial sum lookup algorithm.
19 | total number of snps killed in pass: 0  used: 7
20 | grm dumped
21 | 
22 | ## To get Tracy-Widom statistics: recompile smartpca with TWTAB correctly specified in Makefile, or
23 |    just run twstats (see README file in POPGEN directory)
24 |     kurtosis           snps    indivs
25 |  eigenvector    1     1.271     1.505
26 |  eigenvector    2     1.796     2.127
27 | population:   0                 Case    2
28 | population:   1              Control    3
29 | 
30 | ## Average divergence between populations:
31 |                  Case    Control     popsize
32 |       Case      0.849      3.298           2
33 |    Control      3.298      1.151           3
34 | 
35 | 
36 | number of blocks for moving block jackknife: 1
37 | fst *1000:
38 |           C     C 
39 |    C      0     0
40 |    C      0     0
41 | 
42 | s.dev * 1000000:
43 |           C     C 
44 |    C      0     0
45 |    C      0     0
46 | 
47 | eigenvector 1:means
48 |              Control     -0.336
49 |                 Case      0.505
50 | ## Anova statistics for population differences along each eigenvector:
51 |                                               p-value
52 |              eigenvector_1_Case_Control_     0.0262738 
53 | eigenvector 2:means
54 |              Control     -0.056
55 |                 Case      0.084
56 |              eigenvector_2_Case_Control_      0.805663 
57 | 
58 | ## Statistical significance of differences beween populations:
59 |                                 pop1                  pop2      chisq          p-value   |pop1|   |pop2|
60 | popdifference:                  Case               Control         9.554    0.00842201       2       3
61 | 
62 | ##end of smartpca run
63 | 


--------------------------------------------------------------------------------
/bin/gc.perl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | $P = $ARGV[0];
 4 | $out = $ARGV[1];
 5 | 
 6 | # get data
 7 | $m=0;
 8 | open(P,"$P") || die("COF");
 9 | while($line = <P>) { if($line =~ /Chisq/) { last; } } # header lines
10 | while($line = <P>)
11 | {
12 |   chomp($line);
13 |   if($line =~ /NA/) 
14 |   {
15 |     $chisq1[$m] = -100;
16 |     $chisq2[$m] = -100;
17 |     $m++;  
18 |     next;
19 |   }
20 |   my @array = split(/[\t ]+/,$line);
21 |   $chisq1[$m] = $array[0]; # Chisq
22 |   $chisq2[$m] = $array[1]; # EIGENSTRAT
23 |   $m++; 
24 |   $mvalid++;
25 | }
26 | close(P);
27 | $nSNP = $m;
28 | $nSNPvalid = $mvalid;
29 | 
30 | # compute $lambda1 (Chisq)
31 | $CHISQTHRESH = 0.456;
32 | $step = 0.25;
33 | $oktoreducestep = 0;
34 | for($iter=0; $iter<20; $iter++)
35 | {
36 |   $mm = 0;
37 |   for($m=0; $m<$nSNP; $m++) 
38 |   { 
39 |     if($chisq1[$m] > $CHISQTHRESH) { $mm++; } 
40 |   }
41 |   $frac = $mm/$nSNPvalid; # frac of SNPs exceeding CHISQTHRESH
42 |   if($frac > 0.5) { $CHISQTHRESH += $step; }
43 |   else { $CHISQTHRESH -= $step; $oktoreducestep = 1; }
44 |   if($oktoreducestep) { $step *= 0.5; }
45 | }
46 | $lambda1 = $CHISQTHRESH/0.456; # 0.456 is median if no inflation
47 | if($lambda1 < 1) { $lambda1 = 1; } # not allowed to be less than 1
48 | 
49 | # compute $lambda2 (EIGENSTRAT)
50 | $CHISQTHRESH = 0.456;
51 | $step = 0.25;
52 | $oktoreducestep = 1;
53 | for($iter=0; $iter<20; $iter++)
54 | {
55 |   $mm = 0;
56 |   for($m=0; $m<$nSNP; $m++) 
57 |   { 
58 |     if($chisq2[$m] > $CHISQTHRESH) { $mm++; } 
59 |   }
60 |   $frac = $mm/$nSNPvalid; # frac of SNPs exceeding CHISQTHRESH
61 |   if($frac > 0.5) { $CHISQTHRESH += $step; }
62 |   else { $CHISQTHRESH -= $step; $oktoreducestep = 1; }
63 |   if($oktoreducestep) { $step *= 0.5; }
64 | }
65 | $lambda2 = $CHISQTHRESH/0.456; # 0.456 is median if no inflation
66 | if($lambda2 < 1) { $lambda2 = 1; } # not allowed to be less than 1
67 | 
68 | # output
69 | open(OUT,">$out") || die("COF");
70 | print OUT ("Chisq EIGENSTRAT\n");
71 | printf OUT ("lambda=%.03f lambda=%.03f\n",$lambda1,$lambda2);
72 | for($m=0; $m<$nSNP; $m++)
73 | {
74 |   if($chisq1[$m] < 0) { print OUT ("NA NA\n"); next; }
75 |   printf OUT ("%.04f %.04f\n",$chisq1[$m]/$lambda1,$chisq2[$m]/$lambda2);
76 | }
77 | 


--------------------------------------------------------------------------------
/POPGEN/twexample.eval:
--------------------------------------------------------------------------------
  1 | 3.896803 
  2 | 1.823842 
  3 | 1.233871 
  4 | 1.227676 
  5 | 1.212209 
  6 | 1.200049 
  7 | 1.198344 
  8 | 1.188785 
  9 | 1.188666 
 10 | 1.185873 
 11 | 1.178875 
 12 | 1.172266 
 13 | 1.168976 
 14 | 1.168512 
 15 | 1.164908 
 16 | 1.157360 
 17 | 1.149275 
 18 | 1.146300 
 19 | 1.142036 
 20 | 1.137177 
 21 | 1.133412 
 22 | 1.128474 
 23 | 1.123303 
 24 | 1.121108 
 25 | 1.117918 
 26 | 1.112247 
 27 | 1.111754 
 28 | 1.108400 
 29 | 1.101575 
 30 | 1.098672 
 31 | 1.095195 
 32 | 1.092344 
 33 | 1.087750 
 34 | 1.085886 
 35 | 1.083626 
 36 | 1.081931 
 37 | 1.079625 
 38 | 1.078805 
 39 | 1.071742 
 40 | 1.068232 
 41 | 1.067591 
 42 | 1.063070 
 43 | 1.060513 
 44 | 1.058145 
 45 | 1.056450 
 46 | 1.054916 
 47 | 1.050893 
 48 | 1.048894 
 49 | 1.043173 
 50 | 1.042661 
 51 | 1.040887 
 52 | 1.036872 
 53 | 1.034616 
 54 | 1.030792 
 55 | 1.028936 
 56 | 1.027220 
 57 | 1.025316 
 58 | 1.022074 
 59 | 1.016391 
 60 | 1.015529 
 61 | 1.013203 
 62 | 1.008569 
 63 | 1.007275 
 64 | 1.003458 
 65 | 1.001885 
 66 | 1.001046 
 67 | 0.996586 
 68 | 0.995739 
 69 | 0.992099 
 70 | 0.988173 
 71 | 0.985532 
 72 | 0.982139 
 73 | 0.979806 
 74 | 0.976317 
 75 | 0.974724 
 76 | 0.971432 
 77 | 0.970185 
 78 | 0.966816 
 79 | 0.965728 
 80 | 0.962983 
 81 | 0.958791 
 82 | 0.956462 
 83 | 0.954499 
 84 | 0.952054 
 85 | 0.949986 
 86 | 0.945780 
 87 | 0.944417 
 88 | 0.939229 
 89 | 0.937765 
 90 | 0.933657 
 91 | 0.931285 
 92 | 0.928772 
 93 | 0.927551 
 94 | 0.922779 
 95 | 0.919530 
 96 | 0.917899 
 97 | 0.914707 
 98 | 0.909525 
 99 | 0.907726 
100 | 0.902872 
101 | 0.900004 
102 | 0.899288 
103 | 0.898200 
104 | 0.896129 
105 | 0.892450 
106 | 0.889483 
107 | 0.886985 
108 | 0.880826 
109 | 0.880009 
110 | 0.877975 
111 | 0.875729 
112 | 0.870950 
113 | 0.867717 
114 | 0.866428 
115 | 0.864954 
116 | 0.861732 
117 | 0.859268 
118 | 0.858495 
119 | 0.854881 
120 | 0.851066 
121 | 0.846823 
122 | 0.842529 
123 | 0.841246 
124 | 0.838740 
125 | 0.838459 
126 | 0.834131 
127 | 0.832612 
128 | 0.827219 
129 | 0.823981 
130 | 0.821442 
131 | 0.818558 
132 | 0.815683 
133 | 0.814061 
134 | 0.810291 
135 | 0.808223 
136 | 0.806080 
137 | 0.801127 
138 | 0.795730 
139 | 0.793393 
140 | 0.790344 
141 | 0.787822 
142 | 0.784379 
143 | 0.777760 
144 | 0.776245 
145 | 0.774965 
146 | 0.764580 
147 | 0.758267 
148 | 0.747945 
149 | 0.747072 
150 | 


--------------------------------------------------------------------------------
/src/eigensrc/smartsubs.c:
--------------------------------------------------------------------------------
 1 | #include "qpsubs.h"
 2 | #include "eigsubs.h"
 3 | #include "smartsubs.h"
 4 | extern int fancynorm, verbose, plotmode, outnum;
 5 | 
 6 | // static Indiv **indm ;
 7 | // static void wjackestx(double *est, double *sig, double mean, double *jmean, double *jwt, int g)  ;
 8 | // static void wjackvestx(double *vest, double *var, int d, double *mean, double **jmean, double *jwt, int g)  ;
 9 | static int outliermode = 0;
10 | 
11 | void
12 | setoutliermode (int mode)
13 | {
14 |   outliermode = mode;
15 | }
16 | 
17 | int
18 | ridoutlier (double *evecs, int n, int neigs,
19 | 	    double thresh, int *badlist, OUTLINFO ** outinfo)
20 | {
21 | /* badlist contains list of outliers */
22 |   double *ww, *w2, y1, y2, yy, zz;
23 |   int *vbad;
24 |   int i, j;
25 |   int nbad = 0;
26 |   OUTLINFO *outpt;
27 | 
28 |   if (outliermode > 1)
29 |     return 0;
30 |   if (n < 3)
31 |     return 0;
32 |   ZALLOC (ww, n, double);
33 |   ZALLOC (vbad, n, int);
34 |   for (j = 0; j < n; j++) {
35 |     outpt = outinfo[j];
36 |     outpt->vecno = -1;
37 |   }
38 |   for (i = 0; i < neigs; ++i) {
39 |     copyarr (evecs + i * n, ww, n);
40 |     if (outliermode == 0) {
41 |       y1 = asum (ww, n) / (double) n;
42 |       vsp (ww, ww, -y1, n);
43 |       y2 = asum2 (ww, n) / (double) n;
44 |       y2 = sqrt (y2);
45 |       vst (ww, ww, 1.0 / y2, n);
46 | 
47 |       for (j = 0; j < n; j++) {
48 | 	if (fabs (ww[j]) > thresh) {
49 | 	  vbad[j] = 1;
50 | 	  outpt = outinfo[j];
51 | 	  if (outpt->vecno < 0) {
52 | 	    outpt->vecno = i;
53 | 	    outpt->score = ww[j];
54 | 	  }
55 | 	}
56 |       }
57 |     }
58 |     if (outliermode == 1) {
59 |       ZALLOC (w2, n, double);
60 |       for (j = 0; j < n; j++) {
61 | 	yy = ww[j];
62 | 	ww[j] = 0;
63 | 	y1 = asum (ww, n) / (double) (n - 1);
64 | 	vsp (w2, ww, -y1, n);
65 | 	w2[j] = 0;
66 | 	y2 = asum2 (w2, n) / (double) n;
67 | 	y2 = sqrt (y2);
68 | 	zz = yy - y1;
69 | 	zz /= y2;
70 | 	if (fabs (zz) > thresh) {
71 | 	  vbad[j] = 1;
72 | 	  outpt = outinfo[j];
73 | 	  if (outpt->vecno < 0) {
74 | 	    outpt->vecno = i;
75 | 	    outpt->score = zz;
76 | 	  }
77 | 	}
78 | 	ww[j] = yy;
79 |       }
80 |       free (w2);
81 |     }
82 |   }
83 |   for (j = 0; j < n; j++) {
84 |     if (vbad[j] == 1) {
85 |       badlist[nbad] = j;
86 |       ++nbad;
87 |     }
88 |   }
89 |   free (ww);
90 |   free (vbad);
91 |   return nbad;
92 | 
93 | }
94 | 


--------------------------------------------------------------------------------
/bin/rescale_ell:
--------------------------------------------------------------------------------
 1 | #!/usr//bin/perl  -w 
 2 | 
 3 | ## rescale -i infile -o outfile -a inscale -b outscale 
 4 | 
 5 | use Getopt::Std ;
 6 | use File::Basename ;
 7 | 
 8 | ## pops : separated  -x = make postscript and pdf  -z use another separator
 9 | ##  -k keep intermediate files
10 | ## NEW if pops is a file names are read one per line
11 | ## scaling on x, y axes 
12 | 
13 | getopts('i:o:a:s:',\%opts) ;
14 | $infile = $opts{"i"} ; 
15 | $outfile = $opts{"o"} ; 
16 | $inscalex = $opts{"a"} ; 
17 | $outscale = $opts{"s"} ; 
18 | 
19 | $inscale = 0.95 ; 
20 | 
21 | $inscale = $inscalex  if (defined $inscalex) ; 
22 | 
23 | usage() unless (defined $infile) ;
24 | usage() unless (-r $infile) ;
25 | usage() unless (defined $outfile) ;
26 | usage() unless (defined $outscale) ;
27 | 
28 | open (I1, "$infile")  ;     
29 | open (YY, ">$outfile") || die "can't open $outfile\n"  ;     
30 | 
31 | $lnum = $junk = -99 ; 
32 | foreach $line (<I1>) { 
33 |  chomp $line ; 
34 |  ($a, $b, , $c, @Z)  = split " ", $line ; 
35 |  next unless (defined $b) ; 
36 |  
37 |  if ($a =~ /sample:/) { 
38 |   $lnum = 0 ; 
39 |   $L[0] = $line ; 
40 |   next ; 
41 |  }
42 |  if ($lnum<0) { 
43 |   $inscale = $a ; 
44 |   printf YY "%9.3f ::  %s\n", $outscale, $c ;  
45 |   next ; 
46 |  }
47 |  ++$lnum ;
48 |  $L[$lnum] = $line ; 
49 |  if ($a =~ /ellcoords:/) { 
50 |   $inscale = pop @Z unless (defined $inscalex) ; 
51 |   $fac2 = critchi($outscale)/critchi($inscale) ;
52 |   $fac =  sqrt($fac2) ;
53 | 
54 |   print YY "$L[0]\n" ; 
55 |   print YY "$L[1]\n" ; 
56 |   print YY "$L[2]\n" ; 
57 |   ($a, $b) = split " ", $L[3] ; $a *=  $fac2; $b *= $fac2 ;  printf YY "%15.9f %15.9f\n", $a, $b ; 
58 |   ($a, $b) = split " ", $L[4] ; $a *=  $fac2; $b *= $fac2 ;  printf YY "%15.9f %15.9f\n", $a, $b ; 
59 |   ($junk, $a, $b, $c, $d, $e) = split " ", $line ;
60 |   print YY "$junk $a $b " ; 
61 |   $c *= $fac ; 
62 |   $d *= $fac ; 
63 |   printf YY "%15.9f ", $c ;
64 |   printf YY "%15.9f ", $d ;
65 |   printf YY "%15.9f ", $e ;
66 |   printf YY ":: %9.3f\n", $outscale ;
67 |   next ; 
68 |  }
69 | }
70 | close YY ;
71 | 
72 | 
73 | sub critchi { 
74 | ## critical value for chisq 2 dof
75 |  my ($scale) = @_ ; 
76 |  local $x ; 
77 |  
78 |  $x = -2*log(1-$scale) ;
79 |  ## printf "zz %12.6f %12.6f\n", $scale, $x ;
80 | 
81 |  return $x ;
82 | 
83 | }
84 | 
85 | sub usage { 
86 |  
87 | print "rescale_ell -i infile -o outfile -s outscale [-a inscale]\n"  ; 
88 | exit 0 ; 
89 | 
90 | }
91 | 


--------------------------------------------------------------------------------
/bin/smarteigenstrat.perl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | # perl wrapper for smarteigenstrat program.  Run smarteigenstrat.perl with no options for usage
 4 | 
 5 | use Getopt::Std;
 6 | 
 7 | my @flaglist = ("i","a","b","o","p","q","l","k");
 8 | $x = @ARGV;
 9 | for($n=0;$n<$x;$n++)  {
10 |   foreach $flag (@flaglist)  {
11 |     if ($ARGV[$n] eq "-$flag")  {
12 |       $specified{$flag} = 1;
13 |     }
14 |   }
15 | }
16 | 
17 | # check for mandatory options
18 | foreach $flag ("i","a","b","p","o","l")  {
19 |   unless ($specified{$flag})  {
20 |     usage();
21 |     die("Error:  -$flag not specified");
22 |   }
23 | }
24 | 
25 | # get opts from hash
26 | getopts('i:a:b:p:o:q:k:l:', \%opts);
27 | $genofilename = $opts{"i"};
28 | $indfilename  = $opts{"b"};
29 | $snpfilename  = $opts{"a"};
30 | $pcafilename  = $opts{"p"};
31 | $outfilename  = $opts{"o"};
32 | $logfilename  = $opts{"l"};
33 | $qtmode = "NO";
34 | if ( $specified{"q"} )  {
35 |   $qtmode = $opts{"q"};
36 | }
37 | $k = 1;
38 | if ( $specified{"k"} )  {
39 |   $k = $opts{"k"};
40 | }
41 | 
42 | # write parameter file
43 | open(PAR, ">$outfilename.par") || die("Error:  unable to open $outfilename.par\n");
44 | print PAR "genotypename:  $genofilename\n";
45 | print PAR "snpname:       $snpfilename\n";
46 | print PAR "indivname:     $indfilename\n";
47 | print PAR "pcaname:       $pcafilename\n";
48 | print PAR "outputname:    $outfilename\n";
49 | print PAR "numpc:         $k\n";
50 | print PAR "qtmode:        $qtmode\n";
51 | close(PAR);
52 | 
53 | # run smarteigenstrat
54 | $cmd = "smarteigenstrat -p $outfilename.par >$logfilename";
55 | print "$cmd\n";
56 | system($cmd);
57 | 
58 | sub usage  {
59 |   print "smarteigenstrat.perl -i <genotypefile>  -a <indivfile>  -b <snpfile>  -p <pcafile>   -o <outputfile>  ";
60 |   print "    -l <logfile>  -k <numpca>  -q <qtlmode>";
61 |   print "\n";
62 |   print "-i  genotype file (PED, PACKEDPED, EIGENSTRAT, ANCESTRYMAP or PACKEDANCESTRYMAP format)";
63 |   print "-o  output file (chisq)\n"; 
64 |   print "-l  logfile (screen output,including error messages)\n";
65 |   print "-q  YES for quantitative phenotype or NO otherwise\n";
66 |   print "\n";
67 |   print "For quantitative phenotype, sixth column of .ped file or third column of EIGENSTRAT .ind file\n";
68 |   print "should be real numbers.  For non-quantitative phenotype, sixth column of .ped or third column\n";
69 |   print "should be 'Case' or 'Control'\n";
70 | }
71 | 
72 | 
73 | 
74 | 
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/POPGEN/elldemo/rescale_ell:
--------------------------------------------------------------------------------
 1 | #!/usr//bin/perl  -w 
 2 | 
 3 | ## rescale -i infile -o outfile -a inscale -b outscale 
 4 | 
 5 | use Getopt::Std ;
 6 | use File::Basename ;
 7 | 
 8 | ## pops : separated  -x = make postscript and pdf  -z use another separator
 9 | ##  -k keep intermediate files
10 | ## NEW if pops is a file names are read one per line
11 | ## scaling on x, y axes 
12 | 
13 | getopts('i:o:a:s:',\%opts) ;
14 | $infile = $opts{"i"} ; 
15 | $outfile = $opts{"o"} ; 
16 | $inscalex = $opts{"a"} ; 
17 | $outscale = $opts{"s"} ; 
18 | 
19 | $inscale = 0.95 ; 
20 | 
21 | $inscale = $inscalex  if (defined $inscalex) ; 
22 | 
23 | usage() unless (defined $infile) ;
24 | usage() unless (-r $infile) ;
25 | usage() unless (defined $outfile) ;
26 | usage() unless (defined $outscale) ;
27 | 
28 | open (I1, "$infile")  ;     
29 | open (YY, ">$outfile") || die "can't open $outfile\n"  ;     
30 | 
31 | $lnum = $junk = -99 ; 
32 | foreach $line (<I1>) { 
33 |  chomp $line ; 
34 |  ($a, $b, , $c, @Z)  = split " ", $line ; 
35 |  next unless (defined $b) ; 
36 |  
37 |  if ($a =~ /sample:/) { 
38 |   $lnum = 0 ; 
39 |   $L[0] = $line ; 
40 |   next ; 
41 |  }
42 |  if ($lnum<0) { 
43 |   $inscale = $a ; 
44 |   printf YY "%9.3f ::  %s\n", $outscale, $c ;  
45 |   next ; 
46 |  }
47 |  ++$lnum ;
48 |  $L[$lnum] = $line ; 
49 |  if ($a =~ /ellcoords:/) { 
50 |   $inscale = pop @Z unless (defined $inscalex) ; 
51 |   $fac2 = critchi($outscale)/critchi($inscale) ;
52 |   $fac =  sqrt($fac2) ;
53 | 
54 |   print YY "$L[0]\n" ; 
55 |   print YY "$L[1]\n" ; 
56 |   print YY "$L[2]\n" ; 
57 |   ($a, $b) = split " ", $L[3] ; $a *=  $fac2; $b *= $fac2 ;  printf YY "%15.9f %15.9f\n", $a, $b ; 
58 |   ($a, $b) = split " ", $L[4] ; $a *=  $fac2; $b *= $fac2 ;  printf YY "%15.9f %15.9f\n", $a, $b ; 
59 |   ($junk, $a, $b, $c, $d, $e) = split " ", $line ;
60 |   print YY "$junk $a $b " ; 
61 |   $c *= $fac ; 
62 |   $d *= $fac ; 
63 |   printf YY "%15.9f ", $c ;
64 |   printf YY "%15.9f ", $d ;
65 |   printf YY "%15.9f ", $e ;
66 |   printf YY ":: %9.3f\n", $outscale ;
67 |   next ; 
68 |  }
69 | }
70 | close YY ;
71 | 
72 | 
73 | sub critchi { 
74 | ## critical value for chisq 2 dof
75 |  my ($scale) = @_ ; 
76 |  local $x ; 
77 |  
78 |  $x = -2*log(1-$scale) ;
79 |  ## printf "zz %12.6f %12.6f\n", $scale, $x ;
80 | 
81 |  return $x ;
82 | 
83 | }
84 | 
85 | sub usage { 
86 |  
87 | print "rescale_ell -i infile -o outfile -s outscale [-a inscale]\n"  ; 
88 | exit 0 ; 
89 | 
90 | }
91 | 


--------------------------------------------------------------------------------
/src/Makefile:
--------------------------------------------------------------------------------
 1 | override CFLAGS += -I../include -I/usr/include/openblas
 2 | #LDLIBS += -lgsl -lopenblas -lrt -lm
 3 | override LDLIBS += -lgsl -lopenblas -lm -lpthread
 4 | # Some Linux distributions require separate lapacke library
 5 | # override LDLIBS += -llapacke
 6 | # Mac additions using homebrew installations
 7 | #override CFLAGS += -I/usr/local/opt/openblas/include -I/usr/local/opt/gsl/include
 8 | #override LDFLAGS += -L/usr/local/opt/openblas/lib -L/usr/local/opt/gsl/lib
 9 | # Harvard Medical School O2 cluster additions
10 | ifdef SLURM_CONF
11 | override CFLAGS += -I/n/app/openblas/0.2.19/include -I/n/app/gsl/2.3/include
12 | override LDFLAGS += -L/n/app/openblas/0.2.19/lib -L/n/app/gsl/2.3/lib/
13 | endif
14 | 
15 | ifeq ($(OPTIMIZE), 1)
16 | 	CFLAGS += -O2
17 | endif
18 | 
19 | ifeq ($(DEBUG), 1)
20 | 	CFLAGS += -g # enable debugging
21 | endif
22 | 
23 | ifeq ($(PROFILING), 1)
24 | 	CFLAGS += -pg # enable profiling
25 | endif
26 | 
27 | ND=nicksrc
28 | ED=eigensrc
29 | KD=ksrc
30 | 
31 | NLIB = $(ND)/libnick.a
32 | 
33 | # ----- phony targets
34 | .PHONY: all clean clobber install
35 | 
36 | EXE = baseprog convertf mergeit pca \
37 | 	$(ED)/pcatoy $(ED)/smartrel $(ED)/smarteigenstrat \
38 | 	$(ED)/twstats $(ED)/eigenstrat $(ED)/eigenstratQTL $(ED)/smartpca
39 | 
40 | all:    $(EXE)
41 | 
42 | install:  all
43 | 	mv $(EXE) ../bin
44 | 
45 | clobber:
46 | 	rm -f *.o */*.o */*.a $(ND)/*.o $(ED)/*.o $(KD)/*.o
47 | 	rm -f $(EXE)
48 | 	cd ../bin/ ; rm -f $(notdir $(EXE)) ; cd ../src
49 | 
50 | clean:
51 | 	rm -f *.o core core.* *.o $(ND)/*.o $(ED)/*.o $(KD)/*.o $(EXE)
52 | 
53 | # ----- build nicksrc/libnick.a
54 | $(NLIB):
55 | 	$(MAKE) -C $(ND)
56 | 
57 | baseprog:	baseprog.o mcio.o egsubs.o admutils.o h2d.o $(ED)/exclude.o $(NLIB)
58 | 
59 | convertf:	convertf.o mcio.o egsubs.o admutils.o h2d.o $(ED)/exclude.o $(NLIB)
60 | 
61 | mergeit:	mergeit.o mcio.o admutils.o $(NLIB)
62 | 
63 | pca:	pca.o $(ED)/eigsubs.o eigensrc/eigx.o $(NLIB)
64 | 
65 | $(ED)/pcatoy: 	$(ED)/pcatoy.o eigensrc/eigsubs.o eigensrc/eigx.o $(NLIB)
66 | 
67 | $(ED)/smartrel:	$(ED)/smartrel.o twsubs.o mcio.o qpsubs.o admutils.o egsubs.o regsubs.o \
68 | 		$(ED)/eigsubs.o $(ED)/eigx.o $(ED)/smartsubs.o $(NLIB)    
69 | 
70 | $(ED)/smarteigenstrat:	$(ED)/smarteigenstrat.o mcio.o admutils.o $(NLIB)
71 | 
72 | $(ED)/twstats:	$(ED)/twstats.o $(NLIB) 
73 | 
74 | $(ED)/smartpca:	$(ED)/smartpca.o $(ED)/eigsubs.o $(ED)/exclude.o $(ED)/smartsubs.o $(ED)/eigx.o \
75 | 		mcio.o qpsubs.o admutils.o egsubs.o regsubs.o gval.o \
76 | 		$(NLIB) \
77 |           	$(KD)/kjg_fpca.o $(KD)/kjg_gsl.o 
78 |           	#-lpthread
79 | 


--------------------------------------------------------------------------------
/src/eigensrc/exclude.c:
--------------------------------------------------------------------------------
  1 | #include <exclude.h>
  2 | #include <math.h>
  3 | #include <mcio.h>
  4 | 
  5 | #define MAXRGN 1000
  6 | 
  7 | void
  8 | excluderegions (char *xregionname, SNP ** snps, int nsnps,
  9 | 		char *deletesnpoutname)
 10 | {
 11 |   FILE *fp;
 12 | 
 13 |   int chr[MAXRGN];
 14 |   int lo[MAXRGN];
 15 |   int hi[MAXRGN];
 16 | 
 17 |   char line[MAXSTR];
 18 |   char *spt[MAXFF];
 19 |   int nsplit, nrgn, i, j;
 20 | 
 21 |   if ((fp = fopen (xregionname, "r")) == NULL) {
 22 |     printf ("excluderegions: can't open file %s\n", xregionname);
 23 |     return;
 24 |   }
 25 | 
 26 |   for (i = 0; i < MAXRGN; i++) {
 27 | 
 28 |     if (fgets (line, MAXSTR, fp) == NULL)
 29 |       break;
 30 | 
 31 |     nsplit = splitup (line, spt, MAXFF);
 32 |     if (nsplit != 3)
 33 |       continue;
 34 | 
 35 |     chr[i] = atoi (spt[0]);
 36 |     lo[i] = atoi (spt[1]);
 37 |     hi[i] = atoi (spt[2]);
 38 | 
 39 |   }
 40 |   fclose (fp);
 41 |   nrgn = i;
 42 | 
 43 |   for (i = 0; i < nsnps; i++) {
 44 |     SNP *cupt = snps[i];
 45 |     for (j = 0; j < nrgn; j++) {
 46 |       if (cupt->chrom == chr[j] && cupt->physpos >= lo[j]
 47 | 	  && cupt->physpos <= hi[j]) {
 48 | 	cupt->ignore = YES;
 49 | 	if (deletesnpoutname != NULL) {
 50 | 	  logdeletedsnp (cupt->ID, "xregion", deletesnpoutname);
 51 | 	}
 52 |       }
 53 |     }
 54 |   }
 55 | 
 56 |   return;
 57 | 
 58 | }
 59 | 
 60 | void
 61 | hwfilter (SNP ** snps, int nsnps, int nindiv, double nhwfilter,
 62 | 	  char *deletesnpoutname)
 63 | {
 64 | 
 65 |   int i, k;
 66 | 
 67 |   for (i = 0; i < nsnps; i++) {
 68 |     int num = 0, den = 0, het = 0, n0 = 0, n1 = 0, n2 = 0, nsamples;
 69 |     double p, Q, stdv;
 70 |     SNP *cupt = snps[i];
 71 | 
 72 |     for (k = 0; k < nindiv; k++) {
 73 |       int g = getgtypes (cupt, k);
 74 |       if (g >= 0) {
 75 | 	num += g;
 76 | 	den += 2;
 77 |       }
 78 |       if (g == 1) {
 79 | 	het++;
 80 | 	n1++;
 81 |       }
 82 |       else if (g == 0) {
 83 | 	n0++;
 84 |       }
 85 |       else if (g == 2) {
 86 | 	n2++;
 87 |       }
 88 |     }
 89 | 
 90 |     if ((nsamples = den / 2) == 0)
 91 |       continue;
 92 |     p = (double) num / den;
 93 |     Q = 2 * p * (1 - p);
 94 |     stdv = sqrt (Q * (1 - Q) / nsamples);
 95 |     if (fabs ((double) het / nsamples - Q) > nhwfilter * stdv) {
 96 |       printf ("SNP %s removed by Hardy-Weinberg filter\n", cupt->ID);
 97 |       cupt->ignore = YES;
 98 |       if (deletesnpoutname != NULL) {
 99 | 	logdeletedsnp (cupt->ID, "hwfilt", deletesnpoutname);
100 |       }
101 |     }
102 |   }
103 | 
104 | 
105 | }
106 | 


--------------------------------------------------------------------------------
/bin/evec2pca-ped.perl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | ### translate .evec file to .pca file expected by eigenstrat program
 4 | ### Note: .evec file does not contain entries for outliers
 5 | ###       .pca  file does contain entries (set to all 0.0) for outliers
 6 | 
 7 | # ----- This is a new version for PLINK input files.  It differs from the
 8 | # ----- original in two ways.  (1) The indiv name is in the second column 
 9 | # ----- of the .fam file, but the first of a .ind file.  (2) If the 
10 | # ----- indiv names are not found in the .evec file, try the 
11 | # ----- familyname:indivname combination.  
12 | 
13 | $k = $ARGV[0];
14 | $evec = $ARGV[1]; 
15 | $ind = $ARGV[2];  
16 | $pca = $ARGV[3];  
17 | open(EVEC,$evec) || die("OOPS couldn't open file $evec for reading");
18 | open(PCA,">$pca") || die("OOPS couldn't open file $pca for writing");
19 | 
20 | print PCA ("$k\n"); # number of output eigenvectors/eigenvalues
21 | $line = <EVEC>; chomp($line); # eigvals line
22 | my @array = split(/[\t ]+/,$line);
23 | for($x=0; $x<$k; $x++) { printf PCA ("%.04f\n",$array[$x+2]); } # x-th eval
24 | while($line = <EVEC>)
25 | {
26 |   chomp($line);
27 |   $line = " " . $line;
28 |   my @array = split(/[\t ]+/,$line);
29 |   $l = @array; 
30 |   unless($l == 3+$k) { die("OOPS #evec in $evec is different from $k"); }
31 |   $sample = $array[1];
32 |   for($x=0; $x<$k; $x++) { $evecarray{$sample}[$x] = $array[$x+2]; }
33 |   $found{$sample} = 1;
34 | }
35 | 
36 | # ----- Figure out which name convention to use
37 | my $count1 = 0;
38 | my $count2 = 0;
39 | open(IND,$ind) || die("OOPS couldn't open indiv file $ind for reading");
40 | while ( my $line = <IND> )  {
41 |   chomp($line);
42 |   $line =~ s/^[\s]+//;     # remove leading white-space
43 |   my @E = split(/[\s]+/,$line);
44 | 
45 |   my $s = $E[1];
46 |   my $t = $E[0] . ":" . $E[1];
47 | 
48 |   if ( exists $found{$s} )  {
49 |     $count1++;
50 |   }
51 |   if ( exists $found{$t} )  {
52 |     $count2++;
53 |   }
54 | }
55 | close(IND);
56 | 
57 | open(IND,$ind) || die("OOPS couldn't open indiv file $ind for reading");
58 | while($line = <IND>)
59 | {
60 |   chomp($line);
61 |   $line =~ s/^[\s]+//;
62 |   my @array = split(/[\s]+/,$line);
63 |   $sample = ($count1 >= $count2 ? $array[1] : $array[0] . ":" . $array[1]);
64 |   if($sample eq "") { $sample = $array[1]; }
65 |   unless($found{$sample})
66 |   {
67 |     for($x=0; $x<$k; $x++) { $evecarray{$sample}[$x] = 0.0; }
68 |   }
69 |   for($x=0; $x<$k; $x++)
70 |   {
71 |     printf PCA (" ");
72 |     if($evecarray{$sample}[$x] > 0) { printf PCA (" "); }
73 |     printf PCA ("%.04f",$evecarray{$sample}[$x]);
74 |   }
75 |   printf PCA ("\n");
76 | }
77 | 
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/include/ranmath.h:
--------------------------------------------------------------------------------
 1 | #include<math.h>
 2 | #include<stdlib.h>
 3 | 
 4 | 
 5 | #include <limits.h>
 6 | 
 7 | #define BIGINT INT_MAX   
 8 | #define SRAND  srandom
 9 | #define LRAND  random
10 | #define DRAND() ( (double) (random() % BIGINT) / (double) (BIGINT)) 
11 | #define DRAND2() ( drand2() ) 
12 | /* random must return random integer in range 0 to BIGINT-1  */
13 | 
14 | 
15 | #define NORMAL gauss
16 | 
17 | double  gauss()  ;  // standard normal
18 | void    gaussa(double *a, int n)  ;  // array of standard normals
19 | double  gds(double a)  ;  // obsolete
20 | double  poidev(double mean) ;  // obsolete
21 | double  ranpoiss(double mean) ;  // poisson Note double
22 | double  ranpoissx(double mean) ;  // poisson | > 0
23 | void  ranperm(int *a, int n) ;   // randomly permute a; if random permulation wanted : idperm(a,n) ; ranperm(a,n) 
24 | double ranexp( void) ;  // exponential mean 1
25 | double rangam(double a) ;  // standard gamma mean a
26 | int randis(double *a, int n) ;  // element from discrete distribution a
27 | void ransamp(int *samp, int nsamp, double *p, int plen) ; // sample nsamp samples from p
28 | void pick2(int n, int *k1, int *k2) ;  // pick 2 elements from 0..n-1
29 | int ranmod(int n)  ;  // random mod n 
30 | int iranpick(int lo, int hi) ;  // random int in [lo, hi] 
31 | double ranbeta(double a, double b) ;  //  beta 
32 | int ranbinom(int n, double p) ;   // binomial  
33 | void setrand(double *vv, int n) ;  // filll vv with U[0,1]
34 | int ewens(int *a, int n, double theta) ;  // ewens sampling formula  
35 | void genmultgauss(double *rvec, int num, int n, double *covar) ;  // multivariate 
36 | double drand2() ;  
37 | void ranmultinom(int *samp, int n, double *p, int len)  ;  // multinomial
38 | double ranchi (int d)  ;   // chisq d dof.
39 | void raninvwis(double *wis, int t, int d, double *s)  ;  // inverse wishart
40 | double uniform(double lo, double hi) ;   // uniform (lo..hi)
41 | void ransimplex(double *x, int n) ;   // uniform on n-simplex  
42 | void randirichlet(double *x, double *pp, int n)  ;  // dirichlet parameter vector pp
43 | void randirmult(double *pp, int *aa, int len, int m) ;  // dirichlet multinomial.  Output aa
44 | int prob1(double p) ; // return YES with probability p 
45 | double rant(double df) ;  // t distribution  
46 | double samppow(double e, double a, double b) ;
47 | double rejnorm(double lo, double hi) ;       // usually call ranboundnorm 
48 | double ranboundnorm(double lo, double hi) ;  // sample standard normal in [lo, hi] 
49 | double rtrunc2(double T) ;  // sample standard normal > T Rayleigh rejection         
50 | double rantruncnorm(double T, int upper) ;  // sample standard normal > T (upper =1) < T (upper = 0) 
51 | int ranhprob(int n, int a, int m) ;    // n balls a block sample m .  Fow many black
52 | double rangeom (double theta) ;  // Geometric distribution, mean 1/theta
53 | 


--------------------------------------------------------------------------------
/POPGEN/smartpca.info:
--------------------------------------------------------------------------------
 1 | This file contains documentation of standard output printed by smartpca.  For
 2 | general documentation of the smartpca program, see README in this directory.
 3 | 
 4 | Parameter values: smartpca prints basic info on parameter values used.
 5 | 
 6 | Outliers: smartpca prints a list of outlier individuals removed, if any.
 7 |  Especially in population genetic studies you may not wish for 
 8 | outliers to be removed.  Code
 9 | numoutliter:  0   
10 | 
11 | Tracy-Widom statistics: the column of interest is the "p-value" column which
12 |   indicates the statistical significance of each principal component.
13 |   To get Tracy-Widom statistics, you must recompile smartpca in your 
14 |   local src/ directory (and move it to bin/), 
15 |   or just run twstats (see README file in POPGEN directory).
16 | 
17 | eigbestsnp:  the SNP of maximum weight.  SNP weights are proportional to 
18 |   the correlation (across samples) between each SNP and each PC.  
19 |   Equivalently, PC coordinates of a given sample can be computed as the
20 |   weighted sum of normalized SNP genotypes.
21 | 
22 | ----------------------------------------------------------------------------
23 | 
24 | POPULATION GENETIC STATISTICS (relevant to studying relationships between
25 | populations whose labels are explicitly specified in input indiv file) --
26 | 
27 | Average divergence between populations: smartpca prints a divergence matrix
28 |   describing divergence between each pair of populations.  Details:
29 |   From the covariance matrix X  whose eigenvectors were computed
30 |   we can compute a "distance" d for each pair of individuals (i,j):
31 |   d(i,j) = X(i,i) + X(j,j) - 2X(i,j)
32 |   For each pair of populations (a,b)
33 |   now define
34 |   D(a,b) = \sum d(i,j) (in pop a, in pop b) / (| popa | * | pop b| )
35 |   an average distance.  We then normalize D so that the diagonal has
36 |   mean 1 and publish D.
37 |   
38 | Fst statistics: prints fst estimate between each pair of populations, 
39 |   along with standard error of the estimate.  
40 |   [If there is only 1 population, no fst statistics are printed.]
41 |   [If phylipoutname parameter is specified, this information is instead 
42 |   printed to an output file in PHYLIP format.  See ./README for details.]
43 | 
44 | Anova statistics for population differences along each eigenvector:
45 |   For each eigenvector, a P-value for statistical significance of differences
46 |     between each pair of populations along that eigenvector is printed.
47 |     +++ is used to highlight P-values less than 1e-06.
48 |     *** is used to highlight P-values between 1e-06 and 1e-03.
49 |     If there are more than 2 populations, an overall P-value is also printed
50 |       for that eigenvector.
51 |     If there are more than 2 populations, the populations with minimum (minv)
52 |       and maximum (maxv) eigenvector coordinate are also printed.
53 |   [If there is only 1 population, no Anova statistics are printed.]
54 | 
55 | Statistical significance of differences between populations:
56 |   For each pair of populations, the above Anova statistics are summed
57 |   across eigenvectors.  The result is approximately chisq with
58 |   d.o.f. equal to the number of eigenvectors.  The chisq statistic and
59 |   its p-value are printed.
60 |   [If there is only 1 population, no statistics are printed.]
61 | 
62 | -----------------------------------------------------------------------
63 | 
64 | Questions? nickp@broadinstitute.org
65 | 


--------------------------------------------------------------------------------
/src/eigensrc/twstats.c:
--------------------------------------------------------------------------------
  1 | #include  <stdio.h>
  2 | #include <string.h>
  3 | #include <math.h>
  4 | 
  5 | #include <nicklib.h>
  6 | #include <getpars.h>
  7 | 
  8 | int verbose = NO;
  9 | double nval = -1;
 10 | 
 11 | char *iname = NULL;
 12 | char *parname = NULL;
 13 | char *oname = NULL;
 14 | 
 15 | char *twxtab = NULL;
 16 | 
 17 | void readcommands (int argc, char **argv);
 18 | 
 19 | #define VERSION    "1000"
 20 | 
 21 | int minleneig = 10;
 22 | 
 23 | 
 24 | int
 25 | main (int argc, char **argv)
 26 | {
 27 |   FILE *ofile;
 28 |   int nlambda = 0;
 29 |   int i, m;
 30 |   double zn, zvar, tw, tail;
 31 |   double *xx[0], *lambda;
 32 | 
 33 |   readcommands (argc, argv);
 34 |   settwxtable (twxtab);
 35 | 
 36 |   if (oname == NULL)
 37 |     ofile = stdout;
 38 |   else
 39 |     openit (oname, &ofile, "w");
 40 | 
 41 |   if (iname == NULL)
 42 |     fatalx ("i paraameter compulsory\n");
 43 |   nlambda = numlines (iname);
 44 |   ZALLOC (lambda, nlambda, double);
 45 |   xx[0] = lambda;
 46 |   nlambda = getxx (xx, nlambda, 1, iname);
 47 |   vst (lambda, lambda, -1.0, nlambda);
 48 |   sortit (lambda, NULL, nlambda);
 49 |   vst (lambda, lambda, -1.0, nlambda);
 50 |   m = numgtz (lambda, nlambda);
 51 | 
 52 |   fprintf (ofile, "%4s  %12s", "#N", "eigenvalue");
 53 |   fprintf (ofile, "%12s", "difference");
 54 |   fprintf (ofile, " %9s %12s", "twstat", "p-value");
 55 |   fprintf (ofile, " %9s", "effect. n");
 56 |   fprintf (ofile, "\n");
 57 | 
 58 |   for (i = 0; i < m; ++i) {
 59 | 
 60 |     zn = nval;
 61 |     tail = dotwcalc (lambda + i, m - i, &tw, &zn, &zvar, minleneig);
 62 |     fprintf (ofile, "%4d  %12.6f", i + 1, lambda[i]);
 63 |     if (i == 0)
 64 |       fprintf (ofile, "%12s", "NA");
 65 |     else
 66 |       fprintf (ofile, "%12.6f", lambda[i] - lambda[i - 1]);
 67 |     if (tail >= 0.0)
 68 |       fprintf (ofile, " %9.3f %12.6g", tw, tail);
 69 |     else
 70 |       fprintf (ofile, " %9s %12s", "NA", "NA");
 71 |     if (zn > 0.0) {
 72 |       fprintf (ofile, " %9.3f", zn);
 73 |     }
 74 |     else {
 75 |       fprintf (ofile, " %9s", "NA");
 76 |     }
 77 |     fprintf (ofile, "\n");
 78 |   }
 79 |   return 0;
 80 | }
 81 | 
 82 | void
 83 | readcommands (int argc, char **argv)
 84 | {
 85 | 
 86 |   int i;
 87 |   char *parname = NULL;
 88 |   phandle *ph;
 89 | 
 90 |   while ((i = getopt (argc, argv, "i:o:p:n:m:t:V")) != -1) {
 91 | 
 92 |     switch (i) {
 93 | 
 94 |     case 'i':
 95 |       iname = strdup (optarg);
 96 |       break;
 97 | 
 98 |     case 'o':
 99 |       oname = strdup (optarg);
100 |       break;
101 | 
102 |     case 't':
103 |       twxtab = strdup (optarg);
104 |       break;
105 | 
106 |     case 'n':
107 |       nval = atof (optarg);
108 |       break;
109 | 
110 |     case 'm':
111 |       minleneig = atoi (optarg);
112 |       break;
113 | 
114 |     case 'p':
115 |       parname = strdup (optarg);
116 |       break;
117 | 
118 |     case 'V':
119 |       verbose = YES;
120 |       break;
121 | 
122 |     case '?':
123 |       printf ("Usage: bad params.... \n");
124 |       fatalx ("bad params\n");
125 |     }
126 |   }
127 | 
128 |   if (parname == NULL)
129 |     return;
130 | 
131 |   printf ("parameter file: %s\n", parname);
132 |   ph = openpars (parname);
133 | 
134 |   getstring (ph, "input:", &iname);
135 |   getstring (ph, "output:", &oname);
136 |   getdbl (ph, "nval:", &nval);
137 |   getint (ph, "minleneig:", &minleneig);
138 | 
139 |   writepars (ph);
140 |   closepars (ph);
141 | 
142 | }
143 | 


--------------------------------------------------------------------------------
/include/kjg_gsl.h:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file kjg_gsl.h
  3 |  * @brief Augment GSL functions
  4 |  */
  5 | 
  6 | #ifndef KJG_GSL_H_
  7 | #define KJG_GSL_H_
  8 | 
  9 | #include <gsl/gsl_matrix.h>
 10 | #include <gsl/gsl_vector.h>
 11 | #include <gsl/gsl_rng.h>
 12 | 
 13 | /**
 14 |  * Prints the matrix tab-delimited
 15 |  * @param *stream output file pointer
 16 |  * @param *m gsl_matrix to print
 17 |  * @param *template character template for fprintf
 18 |  */
 19 | 
 20 | void kjg_gsl_matrix_fprintf (FILE * stream, gsl_matrix * m,
 21 | 			     const char *template);
 22 | 
 23 | /**
 24 |  * Prints the eigenvalues and then eigenvectors below
 25 |  * @param *stream output file pointer
 26 |  * @param *eval eigenvalues
 27 |  * @param *evec eigenvectors
 28 |  * @param *template character template for fprintf */
 29 | 
 30 | void kjg_gsl_evec_fprintf (FILE * stream,
 31 | 			   gsl_vector * eval,
 32 | 			   gsl_matrix * evec, const char *template);
 33 | 
 34 | /**
 35 |  * Reads a matrix
 36 |  * @param *stream input file pointer
 37 |  * @param *m matrix to store
 38 |  */
 39 | 
 40 | void kjg_gsl_matrix_fscanf (FILE * stream, gsl_matrix * m);
 41 | 
 42 | /**
 43 |  * Reads an evec
 44 |  * @param *stream input file pointer
 45 |  * @param *eval eigenvalues vector
 46 |  * @param *evec eigenvectors matrix
 47 |  */
 48 | 
 49 | int kjg_gsl_evec_fscanf (FILE * stream, gsl_vector * eval, gsl_matrix * evec);
 50 | 
 51 | /**
 52 |  * Initializes random number generation.
 53 |  */
 54 | 
 55 | gsl_rng *kjg_gsl_rng_init ();
 56 | 
 57 | /**
 58 |  * Initializes the matrix with random unit gaussians
 59 |  * @param *m matrix to be set
 60 |  * @param *r random number generator
 61 |  */
 62 | 
 63 | void kjg_gsl_ran_ugaussian_pair (const gsl_rng * r, double x[2]);
 64 | 
 65 | /** Fills a matrix with unit Gaussian random variates
 66 |  * @param *r random number generator
 67 |  * @param *m matrix to be filled
 68 |  */
 69 | 
 70 | void kjg_gsl_ran_ugaussian_matrix (const gsl_rng * r, gsl_matrix * m);
 71 | 
 72 | /**
 73 |  * Normalizes the matrix so the Frobenius norm is M*N
 74 |  * @param *m matrix to normalize
 75 |  * @return if error
 76 |  */
 77 | 
 78 | int kjg_gsl_matrix_frobenius_normalize (gsl_matrix * m);
 79 | 
 80 | /**
 81 |  * Calculates the norm of a matrix
 82 |  * @param norm type of norm to return, see lapack dlange
 83 |  * @param *m matrix to find norm of
 84 |  * @return norm
 85 |  */
 86 | 
 87 | double kjg_gsl_dlange (const char norm, const gsl_matrix * m);
 88 | 
 89 | /**
 90 |  * Performs the QR decomposition on the matrix and return Q in the matrix
 91 |  * @param *m matrix to orthogonalize
 92 |  */
 93 | 
 94 | void kjg_gsl_matrix_QR (gsl_matrix * m);
 95 | 
 96 | /**
 97 |  * Calls LAPACK dgeqrf and return R and compacted Q matrix
 98 |  * @param *m input matrix
 99 |  * @param *tau see LAPACK documentation
100 |  * @return LAPACK return
101 |  */
102 | 
103 | int kjg_gsl_dgeqrf (gsl_matrix * m, gsl_vector * tau);
104 | 
105 | /**
106 |  * Calls LAPACK dorgqr to extract Q matrix
107 |  * @param *m matrix with compacted Q and will store unpacked Q
108 |  * @param *tau see LAPACK documentation
109 |  * @return LAPACK return
110 |  */
111 | int kjg_gsl_dorgqr (gsl_matrix * m, gsl_vector * tau);
112 | 
113 | /**
114 |  * Calls LAPACK dgesvd, keeping u (in m) and s, discarding v^T
115 |  * @param *m input matrix / where u is stored
116 |  * @param *s entries of the diagonal matrix
117 |  * @return LAPACK return
118 |  */
119 | int kjg_gsl_SVD (gsl_matrix * M, gsl_matrix * V, gsl_vector * S);
120 | 
121 | #endif /* KJG_GSL_H_ */
122 | 


--------------------------------------------------------------------------------
/src/egsubs.c:
--------------------------------------------------------------------------------
  1 | #include  "mcio.h"
  2 | #include  "egsubs.h"
  3 | 
  4 | 
  5 | int
  6 | makeeglist (char **eglist, int maxnumeg, Indiv ** indivmarkers, int numindivs)
  7 | // old routine mkeglist  
  8 | {
  9 | 
 10 |   Indiv *indx;
 11 |   int i, k, numeg = 0;
 12 |   for (i = 0; i < numindivs; i++) {
 13 |     indx = indivmarkers[i];
 14 |     if (indx->ignore)
 15 |       continue;
 16 |     k = indxindex (eglist, numeg, indx->egroup);
 17 |     if (k < 0) {
 18 |       if (numeg >= maxnumeg) {
 19 |         printf
 20 |           ("number of populations too large.  Increase maxpops if you wish\n");
 21 |         fatalx
 22 |           ("(makeeglist) You really want to analyse more than %d populations?\n",
 23 |            maxnumeg);
 24 |       }
 25 |       eglist[numeg] = strdup (indx->egroup);
 26 |       ++numeg;
 27 |     }
 28 |   }
 29 |   return numeg;
 30 | }
 31 | 
 32 | int
 33 | mkeglist (Indiv ** indm, int numindivs, char **eglist)
 34 | {
 35 |   Indiv *indx;
 36 |   int i, k, numeg = 0;
 37 |   for (i = 0; i < numindivs; i++) {
 38 |     indx = indm[i];
 39 |     if (indx->ignore)
 40 |       continue;
 41 |     k = indxindex (eglist, numeg, indx->egroup);
 42 |     if (k < 0) {
 43 |       eglist[numeg] = strdup (indx->egroup);
 44 |       ++numeg;
 45 |     }
 46 |   }
 47 |   return numeg;
 48 | }
 49 | 
 50 | int
 51 | loadlist_type (char **list, char *listname, int *ztypes, int off)
 52 | // listname is just a list of names ... 
 53 | {
 54 |   FILE *lfile;
 55 |   char line[MAXSTR];
 56 |   char *spt[MAXFF];
 57 |   char *sx;
 58 |   Indiv *indx;
 59 |   int nsplit, i, n = 0, tt;
 60 | 
 61 |   if (listname == NULL)
 62 |     return 0;
 63 |   openit (listname, &lfile, "r");
 64 |   while (fgets (line, MAXSTR, lfile) != NULL) {
 65 |     nsplit = splitup (line, spt, MAXFF);
 66 |     if (nsplit == 0)
 67 |       continue;
 68 |     sx = spt[0];
 69 |     if (sx[0] == '#') {
 70 |       freeup (spt, nsplit);
 71 |       continue;
 72 |     }
 73 |     if (nsplit < 2)
 74 |       fatalx ("bad listname: %s\n", sx);
 75 |     list[n] = strdup (sx);
 76 |     tt = atoi (spt[1]);
 77 |     ztypes[n] = tt + off;
 78 |     ++n;
 79 |     freeup (spt, nsplit);
 80 |   }
 81 |   return n;
 82 | }
 83 | 
 84 | 
 85 | void
 86 | seteglist (Indiv ** indm, int nindiv, char *eglistname)
 87 | {
 88 |   FILE *egfile;
 89 |   char line[MAXSTR];
 90 |   char *spt[MAXFF];
 91 |   char *sx;
 92 |   Indiv *indx;
 93 |   int nsplit, i;
 94 | 
 95 |   if (eglistname == NULL)
 96 |     return;
 97 |   openit (eglistname, &egfile, "r");
 98 |   while (fgets (line, MAXSTR, egfile) != NULL) {
 99 |     nsplit = splitup (line, spt, MAXFF);
100 |     if (nsplit == 0)
101 |       continue;
102 |     sx = spt[0];
103 |     if (sx[0] == '#')
104 |       continue;
105 |     setstatus (indm, nindiv, sx);
106 |     freeup (spt, nsplit);
107 |   }
108 |   fclose (egfile);
109 | }
110 | 
111 | void
112 | seteglistv (Indiv ** indm, int nindiv, char *eglistname, int val)
113 | {
114 |   FILE *egfile;
115 |   char line[MAXSTR];
116 |   char *spt[MAXFF];
117 |   char *sx = NULL;
118 |   Indiv *indx;
119 |   int nsplit, i;
120 | 
121 |   if (eglistname == NULL) {
122 |     setstatusv (indm, nindiv, NULL, val);
123 |   }
124 | 
125 |   openit (eglistname, &egfile, "r");
126 |   while (fgets (line, MAXSTR, egfile) != NULL) {
127 |     nsplit = splitup (line, spt, MAXFF);
128 |     if (nsplit == 0)
129 |       continue;
130 |     sx = spt[0];
131 |     if (sx[0] == '#')
132 |       continue;
133 |     setstatusv (indm, nindiv, sx, val);
134 |     freeup (spt, nsplit);
135 |   }
136 |   fclose (egfile);
137 | }
138 | 


--------------------------------------------------------------------------------
/POPGEN/elldemo/sicaa.evec:
--------------------------------------------------------------------------------
 1 |            #eigvals:     1.774     1.458 
 2 |              GEO-002     0.124960     -0.019309         Georgian
 3 |              GEO-005     0.098987     -0.025584         Georgian
 4 |              GEO-010     0.124701     -0.043397         Georgian
 5 |              GEO-015     0.131265     -0.027730         Georgian
 6 |              GEO-020     0.135953     -0.053088         Georgian
 7 |              GEO-028     0.134414     -0.036569         Georgian
 8 |              GEO-031     0.157799     -0.025231         Georgian
 9 |              GEO-032     0.146574     -0.026196         Georgian
10 |              GEO-039     0.158120     -0.022020         Georgian
11 |              GEO-051     0.144490     -0.042876         Georgian
12 |              GEO-061     0.153842     -0.030906         Georgian
13 |              GEO-082     0.158142     -0.028626         Georgian
14 |               GEO001     0.156173     -0.043278         Georgian
15 |            HGDP00666    -0.116760     -0.030823        Sardinian
16 |            HGDP00667    -0.116709     -0.017192        Sardinian
17 |            HGDP00668    -0.137047     -0.027317        Sardinian
18 |            HGDP00669    -0.126519     -0.024759        Sardinian
19 |            HGDP00670    -0.135795     -0.031889        Sardinian
20 |            HGDP00671    -0.128935     -0.029534        Sardinian
21 |            HGDP00672    -0.124405     -0.032339        Sardinian
22 |            HGDP00673    -0.120324     -0.035232        Sardinian
23 |            HGDP00674    -0.137120     -0.026784        Sardinian
24 |            HGDP01062    -0.119201     -0.035362        Sardinian
25 |            HGDP01063    -0.141085     -0.036116        Sardinian
26 |            HGDP01064    -0.153843     -0.049779        Sardinian
27 |            HGDP01065    -0.145904     -0.024355        Sardinian
28 |            HGDP01066    -0.157103     -0.027543        Sardinian
29 |            HGDP01067    -0.144155     -0.050578        Sardinian
30 |            HGDP01068    -0.130147     -0.039257        Sardinian
31 |            HGDP01069    -0.140198     -0.030983        Sardinian
32 |            HGDP01070    -0.138745     -0.032343        Sardinian
33 |            HGDP01071    -0.116760     -0.041106        Sardinian
34 |            HGDP01072    -0.127844     -0.040682        Sardinian
35 |            HGDP01073    -0.143137     -0.035532        Sardinian
36 |            HGDP01074    -0.138522     -0.032142        Sardinian
37 |            HGDP01075    -0.145153     -0.023029        Sardinian
38 |            HGDP01076    -0.136974     -0.040912        Sardinian
39 |            HGDP01077    -0.119744     -0.032696        Sardinian
40 |            HGDP01078    -0.124987     -0.031961        Sardinian
41 |            HGDP01079    -0.128279     -0.021665        Sardinian
42 |         LithuanianF1     0.030379      0.322035       Lithuanian
43 |           lithuania3     0.031552      0.282867       Lithuanian
44 |          lithuania10     0.023618      0.342455       Lithuanian
45 |           lithuania9     0.032949      0.338347       Lithuanian
46 |         LithuanianA1     0.023348      0.316050       Lithuanian
47 |         LithuanianE2     0.031801      0.273176       Lithuanian
48 |           lithuania1     0.015194      0.254946       Lithuanian
49 |           lithuania8     0.030478      0.305058       Lithuanian
50 |           lithuania2     0.032994      0.334543       Lithuanian
51 |         LithuanianD1     0.016299      0.282471       Lithuanian
52 |                 mg43     0.159324     -0.022091         Georgian
53 |                 mg47     0.140898     -0.038999         Georgian
54 |                 mg22     0.138969     -0.035963         Georgian
55 |                 mg49     0.153820     -0.036044         Georgian
56 |                 mg23     0.140328     -0.032225         Georgian
57 |                 mg62     0.158555     -0.026727         Georgian
58 |                 mg27     0.154282     -0.028294         Georgian
59 |                 mg31     0.141187     -0.024043         Georgian
60 |                 mg34     0.149388     -0.030698         Georgian
61 |                 mg40     0.162562     -0.032568         Georgian
62 |                I3122    -0.094106     -0.019738       Sicily_EBA
63 |               I11442    -0.061346     -0.019553       Sicily_EBA
64 |                I7796    -0.046874     -0.045908       Sicily_EBA
65 |                I7800    -0.066847     -0.032000       Sicily_EBA
66 |                I7807    -0.073119     -0.032957       Sicily_EBA
67 | 


--------------------------------------------------------------------------------
/include/strsubs.h:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | 
  3 | int splitup (char *strin,  char *strpt[],int maxpt) ;
  4 | int splitupx(char *strin, char **spt, int maxpt, char splitc)  ;
  5 | int splitupwxbuff(char *strin, char **spt, int maxpt, char *bigbuff, int bigbufflen)  ;
  6 | int splitupxbuff(char *strin, char **spt, int maxpt, char splitc, char *bigbuff, int bigbufflen)  ;
  7 | int oldsplitup (char *strin,  char *strpt[],int maxpt) ;
  8 | void freeup (char *strpt[],int numpt) ;
  9 | int split1 (char *strin,  char *strpt[], char splitc);
 10 | int first_word(char *string, char *word, char *rest) ;
 11 | char *fnwhite (char *ss) ;
 12 | char *fwhite (char *ss) ;
 13 | char *ftab (char *ss) ;
 14 | int NPisnumber (char c) ;
 15 | int isnumword (char *str)  ;
 16 | void ffprint (FILE *fff, char *fmt, ...) ; 
 17 | void enuf( char *fmt, ...) ;
 18 | void fatalx( char *fmt, ...) ;
 19 | int docommand( char *fmt, ...) ;
 20 | long seednum() ;
 21 | void printbl(int n) ;
 22 | void printnl() ;
 23 | void striplead(char *sss, char c) ;
 24 | void striptrail(char *sss, char c) ;
 25 | void catx(char *sout, char **spt, int n) ;
 26 | void catxx(char *sout, char **spt, int n) ;
 27 | void catxc(char *sout, char **spt, int n, char c) ;
 28 | void makedfn(char *dirname, char *fname, char *outname, int maxstr) ;
 29 | int substring (char **ap, char *inx, char *outx) ;
 30 | int mapstrings(char **pstr, char **insub, char **outsub, int n)  ;
 31 | int upstring (char *ss)  ; 
 32 | int numcols (char *name) ;
 33 | int numlines(char *name) ;
 34 | int openit_trap (char *name, FILE ** fff, char *type); 
 35 | void openit(char *name, FILE **fff, char *type)  ;
 36 | int  ftest(char *aname) ;
 37 | void fcheckr(char *name) ;
 38 | void fcheckw(char *name) ; 
 39 | int getxx(double **xx, int maxrow, int numcol, char *fname) ;
 40 | int getss(char  **ss, char *fname) ;
 41 | int  loadlist(char **list, char *listname)    ;  // with dup check
 42 | void printdups(char **list, int n)  ;
 43 | int checkdup(char **list, int n)  ;
 44 | double clocktime() ;  // cpu time in seconds
 45 | void crevcomp(char *sout, char *sin)  ;  
 46 | int indxstring(char **namelist, int len, char *strid)  ;
 47 | int indxstringr(char **namelist, int len, char *strid)  ;
 48 | char *strstrx(char *s1, char *s2)  ;  // case insensitive strstr
 49 | int getxxnames(char ***pnames, double **xx, int maxrow, int numcol, char *fname);
 50 | int getjjnames(char ***pnames, int **xx, int maxrow, int numcol, char *fname);
 51 | int getxxnamesf(char ***pnames, double **xx, int maxrow, int numcol, FILE *fff) ;
 52 | int getnameslohi(char ****pnames, int maxrow, int numcol, char *fname, int lo, int hi) ;
 53 | int getnamesstripcolon(char ****pnames, int maxrow, int numcol, char *fname, int lo, int hi) ;
 54 | int getnames(char ****pnames, int maxrow, int numcol, char *fname) ;
 55 | char num2iub (int num) ; 
 56 | char revchar(char c) ;
 57 | int  iub2num(char c) ;
 58 | char num2base (int num) ; 
 59 | int base2num(char c) ;
 60 | char *int_string(int a, int len, int base) ;
 61 | char *binary_string(int a, int len) ;
 62 | int string_binary(char *sx) ;
 63 | void freestring (char **ss) ;
 64 | void copystrings(char **sa, char **sb, int n) ;
 65 | void printstringsw(char **ss, int n, int slen, int width)  ;
 66 | void printstrings(char **ss, int n)  ;
 67 | void printstringsx(char **ss, int n)  ;
 68 | int ridfile(char *fname) ; 
 69 | char compbase(char x) ;
 70 | void mkupper(char *sx) ;
 71 | void mklower(char *sx) ;
 72 | int iubdekode(char *a, char iub) ;  
 73 | int isiub(char iub) ;  
 74 | int isiub2(char iub) ;  
 75 | int iubcbases(char *cbases, char iub) ;
 76 | int ishet(char c) ;
 77 | int cttype(char cc) ;
 78 | int char2int(char cc) ;
 79 | char int2char(int x) ;
 80 | void chomp(char *str) ;
 81 | 
 82 | int numcmatch(char *cc, int len, char c) ;
 83 | int numcnomatch(char *cc, int len, char c) ;
 84 | char *strnotchar(char *s, char c) ;
 85 | char *findupper(char *s) ;
 86 | char *fgetstrap(char *buff, int maxlen, FILE *fff, int *ret)  ;
 87 | char readtonl(FILE *fff) ; 
 88 | int  filehash(char *name) ;
 89 | char *mytemp (char *qqq) ; 
 90 | void printslurmenv ()  ; 
 91 | int getfline(char *ss, char *fname, int maxstr) ;
 92 | int copyfs(char *infile, FILE *fff)  ;
 93 | int getxxq(double **xx, int maxrow, int numcol, char *fname) ; 
 94 | int numcolsq (char *name) ;
 95 | int getdata(char *buff, int nbytes, char *fname)  ;
 96 | int putdata(char *buff, int nbytes, char *fname)  ;
 97 | void writestrings(char *fname, char **ss, int n)  ;
 98 | 
 99 | 
100 | #define ZALLOC(item,n,type)      if ((item = (type *)calloc((n),sizeof(type))) == NULL) \
101 |                                         fatalx("Unable to allocate %ld unit(s) for item \n", (long) n)
102 | 
103 | #undef MAX
104 | #undef MIN
105 | 
106 | #define MAX(a,b)   ( (a) < (b) ?  (b) : (a) ) 
107 | #define MIN(a,b)   ( (a) < (b) ?  (a) : (b) ) 
108 | #define YES  1
109 | #define NO   0
110 | #define TRUE   1
111 | #define FALSE  0
112 | #define CNULL  '\0' 
113 | #define CNL  '\n' 
114 | #define CTAB  '\t' 
115 | 
116 | 


--------------------------------------------------------------------------------
/src/h2d.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <string.h>
  3 | #include <unistd.h>
  4 | #include <math.h>
  5 | 
  6 | #include <nicklib.h>
  7 | #include <admutils.h>
  8 | 
  9 | extern int verbose;
 10 | extern long rlen, packlen ; 
 11 | extern char *packgenos ; 
 12 | 
 13 | static char *pack2 ;   
 14 | 
 15 | static int pseudodip = YES ;  // for hap to dip.  
 16 | 
 17 | int
 18 | mkindh2d (Indiv ** indivmarkers, Indiv *** pindm2, int numindivs)
 19 | {
 20 |   char ss[50];
 21 |   Indiv *indx, **indm2, *indp;
 22 |   int n, len, k;
 23 |   int numind2;
 24 | 
 25 |   numind2 = numindivs / 2;
 26 |   ZALLOC (*pindm2, numind2, Indiv *);
 27 |   indm2 = *pindm2;
 28 |   n = 0;
 29 |   for (k = 0; k < numindivs; k++) {
 30 |     indx = indivmarkers[k];
 31 |     strcpy (ss, indx->ID);
 32 |     len = strlen (ss);
 33 |     if (ss[len - 1] != 'A')
 34 |       continue;
 35 |     ss[len - 2] = CNULL;
 36 |     ZALLOC (indm2[n], 1, Indiv);
 37 |     indp = indm2[n];
 38 |     *indp = *indx;
 39 |     strcpy (indp->ID, ss);
 40 |     ++n;
 41 |   }
 42 |   if (n != numind2)
 43 |     fatalx ("(mkindh2d) bug\n");
 44 |   return n;
 45 | }
 46 | 
 47 | void
 48 | remaph2d (SNP ** snpmarkers, int numsnps, Indiv ** indivmarkers,
 49 | 	  Indiv ** indm2, int numindivs, int numind2)
 50 | {
 51 | 
 52 |   int *g1, *g2;
 53 |   int *x1, *x2;
 54 |   int *tind, tt, t, i, j, k, j1, j2;
 55 |   Indiv *indx;
 56 |   SNP *cupt;
 57 |   char s1[50], s2[50];
 58 | 
 59 |   ZALLOC (g2, numind2, int);
 60 |   ZALLOC (g1, numindivs, int);
 61 |   ZALLOC (x1, numindivs, int);
 62 |   ZALLOC (x2, numindivs, int);
 63 | 
 64 |   for (k = 0; k < numind2; ++k) {
 65 |     indx = indm2[k];
 66 |     sprintf (s1, "%s:A", indx->ID);
 67 |     sprintf (s2, "%s:B", indx->ID);
 68 |     t = x1[k] = indindex (indivmarkers, numindivs, s1);
 69 |     if (t < 0) {
 70 |       sprintf (s1, "%s_A", indx->ID);
 71 |       sprintf (s2, "%s_B", indx->ID);
 72 |       t = x1[k] = indindex (indivmarkers, numindivs, s1);
 73 |     }
 74 |     if (t < 0)
 75 |       fatalx ("bad newindiv: %s\n", indx->ID);
 76 |     t = x2[k] = indindex (indivmarkers, numindivs, s2);
 77 |     if (t < 0)
 78 |       fatalx ("bad newindiv: %s\n", indx->ID);
 79 |   }
 80 | 
 81 |   for (i = 0; i < numsnps; i++) {
 82 |     cupt = snpmarkers[i];
 83 | 
 84 |     for (j = 0; j < numind2; ++j) {
 85 |       t = x1[j];
 86 |       g1[j] = getgtypes (cupt, t);
 87 |       t = x2[j];
 88 |       g2[j] = getgtypes (cupt, t);
 89 |       tt = -1;
 90 |       if ((g1[j] >= 0) && (g2[j] >= 0)) {  
 91 | 	tt = g1[j] + g2[j];
 92 |         if (pseudodip) tt /= 2 ; 
 93 |       }
 94 |       putgtypes (cupt, j, tt);
 95 |     }
 96 |   }
 97 | 
 98 |   free (g1);
 99 |   free (g2);
100 |   free (x1);
101 |   free (x2);
102 | 
103 | }
104 | 
105 | int mkindd2h (Indiv ** indivmarkers, Indiv *** pindm2, int numindivs)
106 | {
107 |   char ss[50], s1[50], s2[50] ;
108 |   Indiv *indx, **indm2, *indp, *ind1, *ind2 ;
109 |   int n, len, k;
110 |   int numind2;
111 | 
112 |   numind2 = numindivs * 2;
113 |   ZALLOC (*pindm2, numind2, Indiv *);
114 |   indm2 = *pindm2;
115 |   n = 0;
116 |   for (k = 0; k < numindivs; k++) {
117 |     indx = indivmarkers[k];
118 |     strcpy (s1, indx->ID);
119 |     strcat (s1, ":A") ; 
120 |     strcpy (s2, indx->ID);
121 |     strcat (s2, ":B") ; 
122 |     if (strlen(s1) >=IDSIZE) fatalx("d2h overflow: %s\n", indx -> ID) ;
123 |     ZALLOC (indm2[n], 1, Indiv);
124 |     indp = indm2[n];
125 |     *indp = *indx;
126 |     strcpy (indp->ID, s1);
127 |     ++n;
128 |     ZALLOC (indm2[n], 1, Indiv);
129 |     indp = indm2[n];
130 |     *indp = *indx;
131 |     strcpy (indp->ID, s2);
132 |     ++n;
133 |   }
134 |   if (n != numind2) fatalx ("(mkindh2d) bug\n");
135 |   return n;
136 | }
137 | 
138 | void
139 | remapd2h (SNP ** snpmarkers, int numsnps, Indiv ** indivmarkers,
140 | 	  Indiv ** indm2, int numindivs, int numind2)
141 | {
142 | 
143 |   int g1, g2, gg;
144 |   int *xx, *x2;
145 |   int *tind, tt, t, i, j, k, j1, j2, x;
146 |   Indiv *indx;
147 |   SNP *cupt;
148 |   char s1[50], s2[50];
149 |   long rlen2, packlen2 ; 
150 |   char *pbuff ;
151 | 
152 |   rlen2 = rlen  * 2 ; 
153 |   packlen2 = packlen  * 2 ; 
154 | 
155 |   ZALLOC(pack2, packlen2, char) ;  
156 |   pbuff = pack2 ; 
157 | 
158 |   ZALLOC (xx, numind2, int);
159 |   ZALLOC (x2, numindivs, int);
160 | 
161 |   x = 0 ;
162 |   for (i=0 ; i < numsnps; ++i) { 
163 |    cupt = snpmarkers[i] ; 
164 |    ivclear(xx, -1, numind2) ; 
165 |    for (k=0 ; k < numindivs; ++k) { 
166 |     gg = getgtypes(cupt, k) ; 
167 |     if (gg<0) gg = 3 ; 
168 |     g1 = g2 = gg ; 
169 |     if (gg == 1) {  
170 |      x = ranmod(2) ; 
171 |      g1 = 2*x ; 
172 |      g2 = 2*(1-x) ; 
173 |     }
174 |     xx[2*k] = g1 ; 
175 |     xx[2*k+1] = g2 ; 
176 |    }
177 |    for (k=0; k<numind2; ++k) { 
178 |     gg = xx[k] ; 
179 |     wbuff( (unsigned char *) pbuff, k, gg) ; 
180 |    }
181 |    cupt -> pbuff = pbuff ; 
182 |    pbuff += rlen2 ; 
183 |   }
184 | 
185 |   free (xx);
186 |   free (x2);
187 |   free (packgenos) ; 
188 |   packgenos = pack2 ; 
189 |   rlen = rlen2 ; 
190 |   packlen = packlen2 ;
191 | 
192 | }
193 | 
194 | 


--------------------------------------------------------------------------------
/src/ksrc/kjg_fpca.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * kjg_fpca.c
  3 |  *
  4 |  *  Created on: Apr 28, 2014
  5 |  *      Author: Kevin
  6 |  */
  7 | 
  8 | #include <stdio.h>
  9 | 
 10 | #include <gsl/gsl_matrix.h>
 11 | #include <gsl/gsl_vector.h>
 12 | #include <gsl/gsl_rng.h>
 13 | #include <gsl/gsl_blas.h>
 14 | 
 15 | #include "kjg_fpca.h"
 16 | #include "kjg_gsl.h"
 17 | 
 18 | #include "admutils.h"
 19 | #include "../gval.h"
 20 | 
 21 | size_t KJG_FPCA_ROWS = 256;
 22 | 
 23 | void
 24 | kjg_fpca (size_t K, size_t L, size_t I, double *eval, double *evec)
 25 | {
 26 |   if (K >= L)
 27 |     exit (1);
 28 |   if (I == 0)
 29 |     exit (1);
 30 | 
 31 |   size_t m = get_ncols ();
 32 |   size_t n = get_nrows ();
 33 | 
 34 |   // PART A - compute Q such that X ~ Q * (Q^T) * X
 35 |   gsl_matrix *G1 = gsl_matrix_alloc (n, L);
 36 |   gsl_matrix *G2 = gsl_matrix_alloc (n, L);
 37 |   gsl_matrix *Q = gsl_matrix_alloc (m, (I + 1) * L);
 38 |   gsl_matrix *Gswap;
 39 | 
 40 |   gsl_rng *r = kjg_gsl_rng_init ();
 41 |   kjg_gsl_ran_ugaussian_matrix (r, G1);
 42 |   gsl_rng_free (r);
 43 | 
 44 |   size_t i;
 45 |   for (i = 0; i < I; i++)
 46 |     {
 47 |       gsl_matrix_view Qi = gsl_matrix_submatrix (Q, 0, i * L, m, L);
 48 | 
 49 |       // do the multiplication
 50 |       kjg_fpca_XTXA (G1, &Qi.matrix, G2);
 51 | 
 52 |       // scale to prevent G2 from blowing up
 53 |       gsl_matrix_scale (G2, 1.0 / m);
 54 | 
 55 |       Gswap = G2;
 56 |       G2 = G1;
 57 |       G1 = Gswap;
 58 |     }
 59 | 
 60 |   gsl_matrix_view Qi = gsl_matrix_submatrix (Q, 0, I * L, m, L);
 61 |   kjg_fpca_XA (G1, &Qi.matrix);
 62 | 
 63 |   {
 64 |     gsl_matrix *V = gsl_matrix_alloc (Q->size2, Q->size2);
 65 |     gsl_vector *S = gsl_vector_alloc (Q->size2);
 66 | 
 67 |     kjg_gsl_SVD (Q, V, S);
 68 | 
 69 |     gsl_matrix_free (V);
 70 |     gsl_vector_free (S);
 71 |   }
 72 | 
 73 |   // kjg_gsl_matrix_QR(Q); // QR decomposition is less accurate than SVD
 74 | 
 75 |   gsl_matrix_free (G1);
 76 |   gsl_matrix_free (G2);
 77 | 
 78 |   // PART B - compute B matrix, take SVD and return
 79 |   gsl_matrix *B = gsl_matrix_alloc (n, (I + 1) * L);
 80 |   kjg_fpca_XTB (Q, B);
 81 | 
 82 |   gsl_matrix *Utilda = gsl_matrix_alloc ((I + 1) * L, (I + 1) * L);
 83 |   gsl_vector *Stilda = gsl_vector_alloc ((I + 1) * L);
 84 | 
 85 |   kjg_gsl_SVD (B, Utilda, Stilda);
 86 | 
 87 |   gsl_matrix_view Vk = gsl_matrix_submatrix (B, 0, 0, n, K);
 88 |   gsl_matrix_view evec_view = gsl_matrix_view_array (evec, n, K);
 89 |   gsl_matrix_memcpy (&evec_view.matrix, &Vk.matrix);
 90 | 
 91 |   gsl_vector_view Sk = gsl_vector_subvector (Stilda, 0, K);
 92 |   gsl_vector_view eval_view = gsl_vector_view_array (eval, K);
 93 |   gsl_vector_mul (&Sk.vector, &Sk.vector);
 94 |   gsl_vector_scale (&Sk.vector, 1.0 / m);
 95 |   gsl_vector_memcpy (&eval_view.vector, &Sk.vector);
 96 | 
 97 |   gsl_matrix_free (Q);
 98 |   gsl_matrix_free (B);
 99 |   gsl_matrix_free (Utilda);
100 |   gsl_vector_free (Stilda);
101 | }
102 | 
103 | void
104 | kjg_fpca_XTXA (const gsl_matrix * A1, gsl_matrix * B, gsl_matrix * A2)
105 | {
106 |   size_t m = get_ncols ();
107 |   size_t n = get_nrows ();
108 | 
109 |   size_t i, r;			// row index
110 |   double *Y = malloc (sizeof (double) * n * KJG_FPCA_ROWS);	// normalized
111 | 
112 |   gsl_matrix_view Bi, Xi;
113 | 
114 |   gsl_matrix_set_zero (A2);
115 | 
116 |   for (i = 0; i < m; i += KJG_FPCA_ROWS)
117 |     {
118 |       r = kjg_geno_get_normalized_rows (i, KJG_FPCA_ROWS, Y);
119 |       Xi = gsl_matrix_view_array (Y, r, n);
120 |       Bi = gsl_matrix_submatrix (B, i, 0, r, B->size2);
121 |       gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1, &Xi.matrix, A1, 0,
122 | 		      &Bi.matrix);
123 |       gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1, &Xi.matrix, &Bi.matrix,
124 | 		      1, A2);
125 |     }
126 | 
127 |   free (Y);
128 | }
129 | 
130 | void
131 | kjg_fpca_XA (const gsl_matrix * A, gsl_matrix * B)
132 | {
133 |   size_t n = get_nrows ();
134 |   size_t m = get_ncols ();
135 | 
136 |   size_t i, r;
137 |   double *Y = malloc (sizeof (double) * n * KJG_FPCA_ROWS);
138 | 
139 |   gsl_matrix_view Hmat, Xmat;
140 | 
141 |   gsl_matrix_set_zero (B);
142 | 
143 |   for (i = 0; i < m; i += KJG_FPCA_ROWS)
144 |     {
145 |       r = kjg_geno_get_normalized_rows (i, KJG_FPCA_ROWS, Y);
146 |       Xmat = gsl_matrix_view_array (Y, r, n);
147 |       Hmat = gsl_matrix_submatrix (B, i, 0, r, B->size2);
148 |       gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1, &Xmat.matrix, A, 0,
149 | 		      &Hmat.matrix);
150 |     }
151 | 
152 |   free (Y);
153 | }
154 | 
155 | void
156 | kjg_fpca_XTB (const gsl_matrix * B, gsl_matrix * A)
157 | {
158 |   size_t n = get_nrows ();
159 |   size_t m = get_ncols ();
160 | 
161 |   size_t i, r;
162 |   double *Y = malloc (sizeof (double) * n * KJG_FPCA_ROWS);
163 |   gsl_matrix_view Xmat;
164 | 
165 |   gsl_matrix_set_zero (A);
166 | 
167 |   for (i = 0; i < m; i += KJG_FPCA_ROWS)
168 |     {
169 |       r = kjg_geno_get_normalized_rows (i, KJG_FPCA_ROWS, Y);
170 |       Xmat = gsl_matrix_view_array (Y, r, n);
171 |       gsl_matrix_const_view Hmat = gsl_matrix_const_submatrix (B, i, 0, r,
172 | 							       B->size2);
173 |       gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1, &Xmat.matrix, &Hmat.matrix,
174 | 		      1, A);
175 |     }
176 | 
177 |   free (Y);
178 | }
179 | 


--------------------------------------------------------------------------------
/src/ksrc/kjg_gsl.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <math.h>
  3 | 
  4 | #include <gsl/gsl_matrix.h>
  5 | #include <gsl/gsl_vector.h>
  6 | #include <gsl/gsl_rng.h>
  7 | #include <gsl/gsl_randist.h>
  8 | #include <gsl/gsl_blas.h>
  9 | 
 10 | #include <lapacke.h>
 11 | #include "kjg_gsl.h"
 12 | 
 13 | void
 14 | kjg_gsl_matrix_fprintf (FILE * stream, gsl_matrix * m, const char *template)
 15 | {
 16 |   size_t i, j;
 17 |   for (i = 0; i < m->size1; i++)
 18 |     {
 19 |       fprintf (stream, template, gsl_matrix_get (m, i, 0));
 20 |       for (j = 1; j < m->size2; j++)
 21 | 	{
 22 | 	  fprintf (stream, "\t");
 23 | 	  fprintf (stream, template, gsl_matrix_get (m, i, j));
 24 | 	}
 25 |       fprintf (stream, "\n");
 26 |     }
 27 | }
 28 | 
 29 | void
 30 | kjg_gsl_matrix_fscanf (FILE * stream, gsl_matrix * m)
 31 | {
 32 |   size_t i, j;
 33 |   double x;
 34 |   for (i = 0; i < m->size1; i++)
 35 |     {
 36 |       for (j = 0; j < m->size2; j++)
 37 | 	{
 38 | 	  fscanf (stream, "%lg", &x);
 39 | 	  gsl_matrix_set (m, i, j, x);
 40 | 	}
 41 |     }
 42 | }
 43 | 
 44 | void
 45 | kjg_gsl_evec_fprintf (FILE * stream,
 46 | 		      gsl_vector * eval,
 47 | 		      gsl_matrix * evec, const char *template)
 48 | {
 49 |   size_t i, j;
 50 |   fprintf (stream, "#");
 51 |   fprintf (stream, template, gsl_vector_get (eval, 0));
 52 |   for (i = 1; i < eval->size; i++)
 53 |     {
 54 |       fprintf (stream, "\t");
 55 |       fprintf (stream, template, gsl_vector_get (eval, i));
 56 |     }
 57 |   fprintf (stream, "\n");
 58 |   kjg_gsl_matrix_fprintf (stream, evec, template);
 59 | }
 60 | 
 61 | int
 62 | kjg_gsl_evec_fscanf (FILE * stream, gsl_vector * eval, gsl_matrix * evec)
 63 | {
 64 |   size_t i, j;
 65 |   int r;
 66 |   double x;
 67 | 
 68 |   r = fscanf (stream, "#%lg", &x);
 69 |   if (r != 1)
 70 |     return (r);
 71 |   gsl_vector_set (eval, 0, x);
 72 | 
 73 |   for (i = 1; i < eval->size; i++)
 74 |     {
 75 |       r = fscanf (stream, "%lg", &x);
 76 |       if (r != 1)
 77 | 	return (r);
 78 |       gsl_vector_set (eval, i, x);
 79 |     }
 80 | 
 81 |   for (i = 0; i < evec->size1; i++)
 82 |     {
 83 |       for (j = 0; j < evec->size2; j++)
 84 | 	{
 85 | 	  r = fscanf (stream, "%lg", &x);
 86 | 	  if (r != 1)
 87 | 	    return (r);
 88 | 	  gsl_matrix_set (evec, i, j, x);
 89 | 	}
 90 |     }
 91 | 
 92 |   return (0);
 93 | }
 94 | 
 95 | gsl_rng *
 96 | kjg_gsl_rng_init ()
 97 | {
 98 |   const gsl_rng_type *T;
 99 |   gsl_rng *r;
100 |   extern long seed;
101 | 
102 |   gsl_rng_env_setup ();
103 | 
104 |   gsl_rng_default_seed = seed;
105 | 
106 |   T = gsl_rng_default;
107 |   r = gsl_rng_alloc (T);
108 | 
109 | //  fprintf (stderr, "generator type: %s\n", gsl_rng_name (r));
110 | //  fprintf (stderr, "seed = %lu\n", gsl_rng_default_seed);
111 | 
112 |   return (r);
113 | }
114 | 
115 | int
116 | kjg_gsl_matrix_frobenius_normalize (gsl_matrix * m)
117 | {
118 |   double s = kjg_gsl_dlange ('F', m);
119 |   double d = m->size1 * m->size2;
120 |   return (gsl_matrix_scale (m, d / s));
121 | }
122 | 
123 | double
124 | kjg_gsl_dlange (const char norm, const gsl_matrix * m)
125 | {
126 |   return (LAPACKE_dlange (LAPACK_ROW_MAJOR, norm, m->size1, m->size2, m->data,
127 | 			  m->tda));
128 | }
129 | 
130 | int
131 | kjg_gsl_dgeqrf (gsl_matrix * m, gsl_vector * tau)
132 | {
133 |   return (LAPACKE_dgeqrf (LAPACK_ROW_MAJOR, m->size1, m->size2, m->data,
134 | 			  m->tda, tau->data));
135 | }
136 | 
137 | int
138 | kjg_gsl_dorgqr (gsl_matrix * m, gsl_vector * tau)
139 | {
140 |   return (LAPACKE_dorgqr (LAPACK_ROW_MAJOR, m->size2, m->size2, m->size2,
141 | 			  m->data, m->tda, tau->data));
142 | }
143 | 
144 | void
145 | kjg_gsl_ran_ugaussian_pair (const gsl_rng * r, double x[2])
146 | {
147 |   double r2;
148 | 
149 |   do
150 |     {
151 |       /* choose x,y in uniform square (-1,-1) to (+1,+1) */
152 |       x[0] = -1 + 2 * gsl_rng_uniform_pos (r);
153 |       x[1] = -1 + 2 * gsl_rng_uniform_pos (r);
154 | 
155 |       /* see if it is in the unit circle */
156 |       r2 = x[0] * x[0] + x[1] * x[1];
157 |     }
158 |   while (r2 > 1.0 || r2 == 0);
159 | 
160 |   r2 = sqrt (-2.0 * log (r2) / r2);
161 | 
162 |   x[0] *= r2;
163 |   x[1] *= r2;
164 | }
165 | 
166 | void
167 | kjg_gsl_ran_ugaussian_matrix (const gsl_rng * r, gsl_matrix * m)
168 | {
169 |   size_t i, j;
170 |   double *data;
171 |   double x, y, r2;
172 | 
173 |   for (i = 0; i < m->size1; i++)
174 |     {
175 |       data = gsl_matrix_ptr (m, i, 0);
176 | 
177 |       for (j = 0; j < m->size2 - 1; j += 2)
178 | 	{
179 | 	  kjg_gsl_ran_ugaussian_pair (r, data);
180 | 	  data += 2;
181 | 	}
182 | 
183 |       if (m->size2 % 2)
184 | 	*data = gsl_rng_uniform_pos (r);
185 |     }
186 | }
187 | 
188 | void
189 | kjg_gsl_matrix_QR (gsl_matrix * m)
190 | {
191 |   gsl_vector *tau = gsl_vector_alloc (m->size2);
192 |   kjg_gsl_dgeqrf (m, tau);
193 |   kjg_gsl_dorgqr (m, tau);
194 |   gsl_vector_free (tau);
195 | }
196 | 
197 | int
198 | kjg_gsl_SVD (gsl_matrix * M, gsl_matrix * V, gsl_vector * S)
199 | {
200 |   size_t big_enough = M->size1 + V->size2;
201 |   double *superb = malloc (big_enough * sizeof (double));
202 |   double *U;
203 |   int info = LAPACKE_dgesvd (LAPACK_ROW_MAJOR,	// row major
204 | 			     'O', 'S', M->size1, M->size2, M->data, M->tda,
205 | 			     S->data, U,
206 | 			     big_enough, V->data, V->tda, superb);
207 |   free (superb);
208 |   return (info);
209 | }
210 | 


--------------------------------------------------------------------------------
/bin/smartpca.perl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/perl
  2 | 
  3 | use Getopt::Std ;
  4 | use File::Basename ;
  5 | 
  6 | ### process flags
  7 | # -w poplist is compute eigenvectors using populations in poplist, then project
  8 | # -y poplistplot is use populations in poplistplot for plot
  9 | # -z badsnpfile is use badsnpname: badsnpfile in call to smartpca
 10 | my @flaglist = ("i","a","b","k","o","p","e","l","m","q","t","s","w","y","z");
 11 | $x = @ARGV;
 12 | for($n=0; $n<$x; $n++)
 13 | {
 14 |   foreach $flag (@flaglist) 
 15 |   {
 16 |     if($ARGV[$n] eq "-$flag") { $specified{$flag} = 1; }
 17 |   }
 18 | }
 19 | foreach $flag ("i","a","b","o","p","e","l")
 20 | {
 21 |   unless($specified{$flag}) { die("OOPS -$flag flag not specified"); }
 22 | }
 23 | getopts('i:a:b:k:o:p:e:l:m:t:s:w:y:z:q:',\%opts);
 24 | $i = $opts{"i"}; 
 25 | $a = $opts{"a"}; 
 26 | $b = $opts{"b"}; 
 27 | $k = 10; if($specified{"k"}) { $k = $opts{"k"}; }
 28 | $o = $opts{"o"}; 
 29 | $q = 0; if($specified{"q"}) { $q = $opts{"q"}; }
 30 | $p = $opts{"p"}; 
 31 | $e = $opts{"e"}; 
 32 | $l = $opts{"l"}; 
 33 | $m = 5; if($specified{"m"}) { $m = $opts{"m"}; }
 34 | $t = 10; if($specified{"t"}) { $t = $opts{"t"}; }
 35 | $s = 6.0; if($specified{"s"}) { $s = $opts{"s"}; }
 36 | if($specified{"w"}) { $w = $opts{"w"}; }
 37 | if($specified{"y"}) { $y = $opts{"y"}; }
 38 | if($specified{"z"}) { $z = $opts{"z"}; }
 39 | 
 40 | ### run smartpca
 41 | $parfile = "$o.par"; 
 42 | $evec = "$o.evec";
 43 | open(PARFILE,">$parfile") || die("OOPS couldn't open file $parfile for writing");
 44 | print PARFILE ("genotypename: $i\n");
 45 | print PARFILE ("snpname: $a\n");
 46 | print PARFILE ("indivname: $b\n");
 47 | print PARFILE ("evecoutname: $evec\n");
 48 | print PARFILE ("evaloutname: $e\n");
 49 | print PARFILE ("altnormstyle: NO\n");
 50 | print PARFILE ("numoutevec: $k\n");
 51 | print PARFILE ("numoutlieriter: $m\n");
 52 | print PARFILE ("numoutlierevec: $t\n");
 53 | print PARFILE ("outliersigmathresh: $s\n");
 54 | print PARFILE ("qtmode: $q\n");
 55 | if($specified{"w"}) { print PARFILE ("poplistname: $w\n"); }
 56 | if($specified{"z"}) { print PARFILE ("badsnpname: $z\n"); }
 57 | close(PARFILE);
 58 | $command = "smartpca";          # MUST put bin directory in path
 59 | $command .= " -p $parfile >$l";
 60 | print("$command\n");
 61 | system("$command");
 62 | 
 63 | ### make string of populations for ploteig
 64 | $popstring = "";
 65 | open(EVEC,$evec) || die("OOPS couldn't open file $evec for reading");
 66 | while($line = <EVEC>)
 67 | {
 68 |   chomp($line);
 69 |   my @array = split(/[\t ]+/,$line);
 70 |   $x = @array;
 71 |   if($array[1] =~ /eigvals/) { next; } # eigvals header line
 72 |   $pop = $array[$x-1];
 73 |   if($popfound{$pop}) { next; }
 74 |   $popstring = $popstring . "$pop:";
 75 |   $popfound{$pop} = 1;
 76 | }
 77 | close(EVEC);
 78 | chop($popstring); # remove last ":"
 79 | 
 80 | if($specified{"y"}) 
 81 | {
 82 |   ### make string of populations for ploteig based on -y flag input
 83 |   $popstring = "";
 84 |   open(Y,$y) || die("COF");
 85 |   while($line = <Y>)
 86 |   {
 87 |     chomp($line);
 88 |     $popstring .= "$line:";
 89 |   }
 90 |   chop($popstring);
 91 | }
 92 | 
 93 | ### cax ploteig
 94 | $command = "ploteig";           # MUST put bin directory in path
 95 | $command .= " -i $evec";
 96 | $command .= " -c 1:2 ";
 97 | $command .= " -p $popstring ";
 98 | $command .= " -x ";
 99 | $command .= " -y ";
100 | $command .= " -o $p.xtxt "; # must end in .xtxt
101 | print("$command\n");
102 | system("$command");
103 | 
104 | ### translate .evec file to .pca file expected by eigenstrat program
105 | ### Note: .evec file does not contain entries for outliers 
106 | ###       .pca  file does contain entries (set to all 0.0) for outliers
107 | 
108 | # ----- If this looks like a PLINK run, call the PLINK kludge
109 | if ( $i =~ m/\.ped$/ || $i =~ m/\.PED/ )  {
110 |   $command = "evec2pca-ped.perl $k $evec $b $o";
111 | }
112 | else  {
113 |   $command = "evec2pca.perl $k $evec $b $o";
114 | }
115 | print("$command\n");
116 | system("$command");
117 | 
118 | ### If labels are Case and Control only, compute correlations between
119 | ### Case/Control status and each eigenvector.  Append to logfile.
120 | if(($popstring eq "Case:Control") || ($popstring eq "Control:Case"))
121 | {
122 |   open(LOG,">>$l") || die("OOPS couldn't open file $l for appending");
123 |   print LOG ("\n");
124 |   for($x=0; $x<$k; $x++) # compute correlation for evec $x
125 |   {
126 |     open(EVEC,$evec) || die("OOPS couldn't open file $evec for reading");
127 |     $sum1=0; $sumx=0; $sumxx=0; $sumy=0; $sumyy=0; $sumxy=0;
128 |     $line = <EVEC>; chomp($line); # eigvals line
129 |     while($line = <EVEC>)
130 |     {
131 |       chomp($line);
132 |       my @array = split(/[\t ]+/,$line);
133 |       $this = $array[2+$x];
134 |       $sumy += $this;
135 |       $sumyy += $this*$this;
136 |       $sum1 += 1;
137 |       if($line =~ /Case/) # Case is 1, Control is 0
138 |       {
139 |         $sumx += 1;
140 |         $sumxx += 1;
141 |         $sumxy += $this;
142 |       }
143 |     }
144 |     close(EVEC);
145 |     $meanx = $sumx/$sum1;
146 |     $meany = $sumy/$sum1;
147 |     if($sum1 == 0) { next; }
148 |     $sdevx = sqrt($sumxx/$sum1 - $meanx*$meanx);
149 |     $sdevy = sqrt($sumyy/$sum1 - $meany*$meany);
150 |     if($sdevx * $sdevy == 0) { next; }
151 |     $corr = ($sumxy/$sum1) / ($sdevx*$sdevy);
152 |     $x1 = $x+1;
153 |     printf LOG ("Correlation between eigenvector $x1 (of $k) and Case/Control status is %.03f\n",$corr);
154 |   }
155 |   close(LOG);
156 | }
157 | 


--------------------------------------------------------------------------------
/src/gval.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <string.h>
  3 | #include <stdlib.h>
  4 | #include <unistd.h>
  5 | #include <math.h>
  6 | #include <sys/types.h>
  7 | #include <sys/stat.h>
  8 | #include <fcntl.h>
  9 | #include <stdint.h>
 10 | #include <inttypes.h>
 11 | 
 12 | #include <nicklib.h>
 13 | 
 14 | #include "admutils.h"
 15 | #include "mcio.h"
 16 | #include "gval.h"
 17 | 
 18 | static SNP **xxsnps = NULL;
 19 | static Indiv **xindivmarkers = NULL;
 20 | static int xnrows, xncols;
 21 | static int xnumindivs;
 22 | static int *xxindex = NULL;
 23 | static double *xmean, *xfancy;
 24 | static double **gtable = NULL;
 25 | 
 26 | int getcolxz (double *xcol, SNP * cupt, int *xindex, int *xtypes,
 27 |               int nrows, int col, double *xmean, double *xfancy, int *n0,
 28 |               int *n1);
 29 | 
 30 | void
 31 | setgval (SNP ** xsnps, int nrows, Indiv ** indivmarkers, int numindivs,
 32 |          int *xindex, int *xtypes, int ncols)
 33 | {
 34 | 
 35 |   double *cc;
 36 |   int t, n0, n1, i, k, col;
 37 |   SNP *cupt;
 38 |   double mean, y;
 39 | 
 40 |   unsetgval ();
 41 | 
 42 |   xxsnps = xsnps;
 43 |   xnrows = nrows;
 44 |   xncols = ncols;
 45 |   xindivmarkers = indivmarkers;
 46 |   xnumindivs = numindivs;
 47 |   xxindex = xindex;
 48 | 
 49 |   for (i = 1; i < nrows; i++) {
 50 |     if (xxindex[i] < xxindex[i - 1]) {
 51 |       fprintf (stderr, "xindex not sorted\n");
 52 |       exit (1);
 53 |     }
 54 |   }
 55 | 
 56 |   ZALLOC (cc, nrows, double);
 57 |   ZALLOC (xmean, ncols, double);
 58 |   ZALLOC (xfancy, ncols, double);
 59 |   vclear (xfancy, 1.0, ncols);
 60 |   gtable = initarray_2Ddouble (ncols, 4, 0);
 61 | 
 62 |   for (i = 0; i < ncols; ++i) {
 63 |     col = i;
 64 |     cupt = xsnps[i];
 65 | 
 66 |       /**
 67 |        if (i>=0) {
 68 |        printf("zz: %d %s\n", cupt -> ID) ;  fflush(stdout) ;
 69 |        }
 70 |        */
 71 |     getcolxz (cc, cupt, xindex, xtypes, nrows, i, xmean, xfancy, &n0, &n1);
 72 | 
 73 |     mean = xmean[col] / xfancy[col];
 74 |     for (k = 0; k < 3; ++k) {
 75 |       y = ((double) k) - mean;
 76 |       y *= xfancy[col];
 77 |       gtable[col][k] = y / sqrt (2.0);
 78 |     }
 79 |     gtable[col][3] = 0;
 80 | 
 81 |     t = MIN (n0, n1);
 82 |     if (t == 0)
 83 |       cupt->ignore = YES;       // side-effect
 84 |   }
 85 | 
 86 |   free (cc);
 87 | }
 88 | 
 89 | void
 90 | unsetgval ()
 91 | {
 92 |   if (xxsnps == NULL)
 93 |     return;
 94 | 
 95 |   xxsnps = NULL;
 96 |   xindivmarkers = NULL;
 97 |   xxindex = NULL;
 98 | 
 99 |   free2D (&gtable, xncols);
100 | 
101 |   gtable = NULL;
102 | 
103 |   free (xmean);
104 |   free (xfancy);
105 | }
106 | 
107 | int
108 | getgval (int row, int col, double *val)
109 | {
110 | 
111 |   /**
112 |    if (row>=xnrows) fatalx("row index overflow\n") ;
113 |    if (col>=xncols) fatalx("col index overflow\n") ;
114 |    */
115 | 
116 |   return getggval (xxindex[row], col, val);
117 | 
118 | }
119 | 
120 | int
121 | getggval (int indindx, int col, double *val)
122 | // indindex is index in full array
123 | {
124 |   SNP *cupt;
125 |   int t, z;
126 |   double y, mean;
127 | 
128 |   if (gtable == NULL) fatalx("(gval) bug\n") ; 
129 |   *val = 0;
130 |   if (xindivmarkers[indindx]->ignore)
131 |     return -1;
132 |   cupt = xxsnps[col];
133 |   t = getgtypes (cupt, indindx);
134 |   if (t < 0)
135 |     return t;
136 | 
137 |   *val = gtable[col][t];
138 |   return t;
139 | 
140 | }
141 | 
142 | // Unpack lookup table
143 | 
144 | // macro to unpack a single byte
145 | #define U(n) { ((n) >> 6) & 3, ((n) >> 4) & 3, ((n) >> 2) & 3, (n) & 3 }
146 | 
147 | // macros to build the u(n)packi(n)g table
148 | #define U1(n)  U(n),  U((n) +  1),  U((n) +  2),  U((n) +  3)
149 | #define U2(n) U1(n), U1((n) +  4), U1((n) +  8), U1((n) + 12)
150 | #define U3(n) U2(n), U2((n) + 16), U2((n) + 32), U2((n) + 48)
151 | 
152 | // the unpacking table
153 | static const uint8_t UL[256][4] = { U3 (0), U3 (64), U3 (128), U3 (192) };
154 | 
155 | size_t
156 | get_nrows ()
157 | {
158 |   return (xnrows);
159 | }
160 | 
161 | size_t
162 | get_ncols ()
163 | {
164 |   return (xncols);
165 | }
166 | 
167 | /**
168 |  * Unpacks a SNP column
169 |  * @param snp_index
170 |  * @param *y arrayref to store data
171 |  */
172 | void
173 | kjg_geno_get_normalized_row (const size_t snp_index, double *y)
174 | {
175 |   uint8_t *packed = xxsnps[snp_index]->pbuff;
176 |   double *norm_lookup = gtable[snp_index];
177 | 
178 |   size_t i = 0, j = xxindex[i];
179 |   while (1) {
180 |     size_t k = j / 4;           // packed location
181 |     size_t jf = (k + 1) * 4;    // last index in packed location
182 | 
183 |     uint8_t p = packed[k];      // packed data
184 |     const uint8_t *u = UL[p];   // unpacked data
185 | 
186 |     while (j < jf) {
187 |       size_t o = j % 4;         // offset in packed data
188 |       size_t t = u[o];          // unpacked data
189 |       y[i] = norm_lookup[t];    // normalized data
190 | 
191 |       if (++i == xnrows)        // move onto next entry
192 |         return;                 // break if we are done with SNP
193 |       j = xxindex[i];           // perform the lookup
194 |     }
195 |   }
196 | }
197 | 
198 | /**
199 |  * Unpacks several SNP coluns
200 |  * @param snp_index index of the SNP
201 |  * @param *unpacked arrayref to store data
202 |  */
203 | 
204 | size_t
205 | kjg_geno_get_normalized_rows (const size_t i, const size_t r, double *Y)
206 | {
207 |   size_t j;
208 |   for (j = i; j < i + r && j < xncols; j++) {
209 |     kjg_geno_get_normalized_row (j, Y);
210 |     Y += xnrows;
211 |   }
212 |   return (j - i);
213 | }
214 | 


--------------------------------------------------------------------------------
/include/statsubs.h:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <string.h>
  3 | #include <unistd.h>
  4 | #include <math.h>  
  5 | 
  6 | #include "strsubs.h" 
  7 | 
  8 | #define	CHI_EPSILON     0.000001    /* accuracy of critchi approximation */
  9 | #define	CHI_MAX     99999.0         /* maximum chi square value */
 10 | 
 11 | #define	LOG_SQRT_PI     0.5723649429247000870717135 /* log (sqrt (pi)) */
 12 | #define	I_SQRT_PI       0.5641895835477562869480795 /* 1 / sqrt (pi) */
 13 | #define	I_PI            0.3183098861837906715377675 /* 1 / pi  */
 14 | #define PI              3.1415926535897932384626434  
 15 | #define	BIGX           20.0         /* max value to represent exp (x) */
 16 | #define	ex(x)             (((x) < -BIGX) ? 0.0 : exp (x))
 17 | #define	SQRT_PI       (1.0/I_SQRT_PI)  /* sqrt (pi) */
 18 | 
 19 | double medchi(int *cls, int len, int *n0, int *n1, double *kstail)  ;
 20 | double ks2(int *cls, int len, int *n0, int *n1, double *kstail)  ;
 21 | double probks(double lam) ;
 22 | 
 23 | double nordis(double z) ;
 24 | double ndens(double val, double mean, double sig) ;
 25 | double lognordis(double z) ;
 26 | double logndens(double val, double mean, double sig) ;
 27 | 
 28 | double ntail(double z) ;
 29 | void tailstats(double *x, double a, int isupper) ; 
 30 | double zprob(double p) ;
 31 | void setzptable() ;
 32 | double z2x2(double *a) ;
 33 | double conchi(double *a, int m, int n)  ;
 34 | double conchiv(double *a, int m, int n)  ;
 35 | double chitest(double *a, double *p, int n) ;
 36 | double pi() ;
 37 | 
 38 | double xlgamma(double x) ;
 39 | double psi(double x) ;
 40 | double tau(double x) ;
 41 | double logbessi0(double x) ;
 42 | double bessi0(double x) ;
 43 | double logbessi1(double x) ;
 44 | double bessi1(double x) ;
 45 | void bernload() ;
 46 | double bernum(int x) ;
 47 | 
 48 | void gpars(double *p, double *lam, double mean, double var) ;
 49 | void mlegamma(double *a, int n, double *p, double *lam) ;
 50 | void mleg(double a1, double a2, double *p, double *lam) ;
 51 | 
 52 | double dilog(double x) ;
 53 | double li2(double x) ;
 54 | 
 55 | double hwstat(double *x) ;
 56 | 
 57 | double gammprob(double x, double p, double lam)  ;
 58 | double bprob(double p, double a, double b)  ;
 59 | double lbeta(double a, double b)  ;
 60 | double poissloglike(int kk, double mean) ;
 61 | double poissexp(int kk, double mean) ;
 62 | double dirmult(double *pp, int *aa, int len) ;
 63 | double dawson(double t) ;
 64 | 
 65 | double binomtail(int n, int t, double p, char c) ;
 66 | double binlogtail(int n, int t, double p, char c) ;
 67 | void genbin(double *a, int n, double p) ;
 68 | void genlogbin(double *a, int n, double p) ;
 69 | int ifirstgt(int val, int *tab, int n)  ;
 70 | int firstgt(double val, double *tab, int n)  ;
 71 | 
 72 | void cinterp(double val, double x0, double x1, 
 73 |   double f0, double f0p, double f1, double f1p, double *fv, double *fvp)  ;
 74 | int firstgtx(double val, double *tab, int n) ;
 75 | int jfirstgtx(int val, int *tab, int n) ;
 76 | 
 77 | double rtlchsq(int df, double z) ;
 78 | double critchi(int df, double z) ;
 79 | double rtlf(int df1, int df2, double f) ;
 80 | 
 81 | double ltlg(double a, double x)  ;
 82 | double rtlg(double a, double x)  ;
 83 | 
 84 | double twdens(double twstat)  ;
 85 | double twtail(double twstat)  ;
 86 | double twtailx(double twstat)  ;
 87 | double twdensx(double twstat)  ;
 88 | double twnorm(double lam, double p, double n)  ; 
 89 | void twfree() ;
 90 | int settwxtable(char *table) ;
 91 | void gettw(double x,  double *tailp, double *densp)   ;
 92 | double dotwcalc(double *lambda, int m, double *ptw, double *pzn, double *pzvar, int minm) ;
 93 | int numgtz(double *a, int n) ;
 94 | 
 95 | double betaix(double a, double b, double lo, double hi)  ;
 96 | double betai(double a, double b, double x) ;
 97 | void bpars(double *a, double *b, double mean, double var)  ;
 98 | void bmoments(double a, double b, double *mean, double *var)  ;
 99 | double unbiasedest(int *ndx, int ndsize, int **counts) ;
100 | void weightjack(double *est, double *sig, double mean, double *jmean, double *jwt, int g)  ;
101 | int modehprob(int n, int a, int m) ;
102 | void calcfc(double *c, int n, double rho) ;
103 | void circconv(double *xout, double *xa, double *xb, int n) ;
104 | 
105 | double bino(int a, int b) ;
106 | void setbino(int maxbco) ;
107 | void destroy_bino() ; 
108 | double exx(double x) ; 
109 | double ubias(int a, int n, int k) ;  
110 | double scx(double *W, double *mean, double *x, int d)  ; // maybe should be in vsubs  
111 | void dither(double *xout, double *xin, int n)  ;
112 | void probit(double *xout, double *xin, int n)  ;
113 | // Berk-Jones 
114 | double bjugauss(double *p, double *u, double *a, int n)  ; // return M_n  (Berk-Jones) 
115 | void bjasympt(double *ptail, double *mtail, double *tail, double mplus, double mminus, int n)  ; 
116 | void bj2(double *aa, double *bb, int a, int b,  double *plpv, double *prpv, double *ppv)  ;
117 | double genhp(double **hp, int a, int b) ; 
118 | double genhpt(int a, int b, int *lt, int *rt)  ; 
119 | void gentail(double **ltail, double **rtail, double **hp, int a, int b)  ;
120 | void setthresh(int *thresh, double **tail, int a, int b, double stat, int mode)  ; 
121 | void bj2x(int *type, int a, int b, double *plpv, double *prpv, double *ppv)  ; 
122 | 
123 | void mlebeta(double *a, int n, double *p1, double *p2);
124 | void estbpars(double *a, int n, double *p1, double *p2);
125 | void mleb(double *p1, double *p2, double u, double v) ;
126 | 
127 | int  loadmptable(double ***mptable) ; 
128 | double wynn(double *v, int n, double *acc, int *nacc) ;
129 | double *vwynn(double **vv, int n, int dim, double **acc, int *nacc) ;
130 | 
131 | double rad2deg(double rad) ; 
132 | double deg2rad(double deg) ; 
133 | 
134 | double quartile(double *x, int n, double q)  ;
135 | int qinterp(double *a, double *b, int n, double val, double *ans)  ;
136 | double truncexpmean(double m, double thresh, int isupper) ;
137 | 
138 | void jitter(double *xout, double *xin, int n)   ; 
139 | void mannwhit(double *a, int na, double *b, int nb, double *pu, double *pv, double *ppv) ;
140 | 
141 | 


--------------------------------------------------------------------------------
/src/twsubs.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | 
  3 | #include <limits.h>
  4 | #include <math.h>
  5 | #include <nicklib.h>
  6 | #include "admutils.h"
  7 | #include "eigsubs.h"
  8 | 
  9 | /* ********************************************************************* */
 10 | 
 11 | extern int verbose;
 12 | 
 13 | int twl2mode = YES;
 14 | int mval = -1;
 15 | int nval = -1;
 16 | int numsamp = 100;
 17 | double mul1 = 1.0;
 18 | 
 19 | 
 20 | double xxlike (int m, double a, double var, double logsum, double lsum);
 21 | double xxlikex (int m, double a, double logsum, double lsum);
 22 | double xxliked (int m, double a, double logsum, double lsum);
 23 | double xxliked2 (int m, double a, double logsum, double lsum);
 24 | double oldtwestxx (double *lam, int m, double *pzn, double *pzvar);
 25 | double doeig2 (double *vals, int m, double *pzn, double *ptw);
 26 | 
 27 | double
 28 | twestxx (double *lam, int m, double *pzn, double *pzvar)
 29 | {
 30 |   double tw, y;
 31 | 
 32 |   if (twl2mode == NO)
 33 |     return oldtwestxx (lam, m, pzn, pzvar);
 34 |   (void) doeig2 (lam, m, pzn, &tw);
 35 | 
 36 |   y = (*pzn) * (double) m;
 37 |   *pzvar = asum (lam, m) / y;
 38 |   return tw;
 39 | 
 40 | }
 41 | 
 42 | double
 43 | oldtwestxx (double *lam, int m, double *pzn, double *pzvar)
 44 | {
 45 |   double lsum, logsum;
 46 |   double *ww;
 47 |   double a, p, yn, var;
 48 |   double ylike, ybase, y, ylmax, ynmax, yld, yld2, ainc, ym;
 49 |   int k;
 50 | 
 51 | 
 52 |   ZALLOC (ww, m, double);
 53 |   copyarr (lam, ww, m);
 54 |   lsum = asum (ww, m);
 55 |   vlog (ww, ww, m);
 56 |   logsum = asum (ww, m);
 57 | 
 58 |   ylmax = -1.0e20;
 59 |   yn = (double) m;
 60 |   ybase = xxlikex (m, yn, logsum, lsum);
 61 | 
 62 |   for (k = 1; k <= 100; ++k) {
 63 |     a = yn / 2.0;
 64 |     ylike = xxlikex (m, a, logsum, lsum);
 65 |     yld = xxliked (m, a, logsum, lsum);
 66 |     ylike -= ybase;
 67 |     if (verbose)
 68 |       printf ("ynloop %12.3f %12.3f %12.3f\n", yn / (double) m, ylike, yld);
 69 |     if (ylike < ylmax)
 70 |       break;
 71 |     ylmax = ylike;
 72 |     ynmax = yn;
 73 |     yn *= 1.1;
 74 |   }
 75 |   a = ynmax / 2.0;
 76 |   for (k = 1; k <= 10; ++k) {
 77 | // newton iteration
 78 |     ylike = xxlikex (m, a, logsum, lsum);
 79 |     yld = xxliked (m, a, logsum, lsum);
 80 |     yld2 = xxliked2 (m, a, logsum, lsum);
 81 |     ylike -= ybase;
 82 |     ainc = -yld / yld2;
 83 |     a += ainc;
 84 |     if (verbose)
 85 |       printf ("newton: %3d  %15.9f  %15.9f  %15.9f\n", k, ylike, yld, ainc);
 86 |   }
 87 |   fflush (stdout);
 88 |   yn = 2.0 * a;
 89 |   ym = (double) m;
 90 |   var = lsum / (2.0 * a * ym);
 91 | 
 92 |   *pzn = yn;
 93 |   *pzvar = var;
 94 | 
 95 |   free (ww);
 96 |   return 0;
 97 | }
 98 | 
 99 | double
100 | xxlike (int m, double a, double var, double logsum, double lsum)
101 | {
102 |   double p, yl = 0.0;
103 |   double ym, x;
104 |   int j;
105 | 
106 |   p = 0.5 * (double) (m + 1);
107 |   ym = (double) m;
108 | 
109 |   yl = -ym * a * log (2.0);
110 |   for (j = 1; j <= m; ++j) {
111 |     x = a - 0.5 * (double) (m - j);
112 |     yl -= lgamma (x);
113 |   }
114 | // so far this is log (C_L)    normalizing constant
115 |   yl -= ym * a * log (var);
116 |   yl += (a - p) * logsum;
117 |   yl -= lsum / (2.0 * var);
118 | 
119 |   return yl;
120 | 
121 | }
122 | 
123 | double
124 | xxlikex (int m, double a, double logsum, double lsum)
125 | {
126 |   double p, yl = 0.0;
127 |   double ym, x, var, lco;
128 |   int j;
129 | 
130 |   p = 0.5 * (double) (m + 1);
131 |   ym = (double) m;
132 |   lco = lsum / (2.0 * ym);
133 |   var = lco / a;
134 | 
135 | 
136 |   yl = -ym * a * log (2.0);
137 |   for (j = 1; j <= m; ++j) {
138 |     x = a - 0.5 * (double) (m - j);
139 |     yl -= lgamma (x);
140 |   }
141 | // so far this is log (C_L)    normalizing constant
142 |   yl -= ym * a * log (var);
143 |   yl += (a - p) * logsum;
144 |   yl -= ym * a;                 // plugging in var 
145 | 
146 |   return yl;
147 | 
148 | }
149 | 
150 | double
151 | xxliked (int m, double a, double logsum, double lsum)
152 | // first deriv wrt a
153 | {
154 |   double p, yl = 0.0;
155 |   double ym, x, var, vard;
156 |   int j;
157 | 
158 |   p = 0.5 * (double) (m + 1);
159 |   ym = (double) m;
160 |   var = lsum / (2.0 * a * ym);
161 |   vard = -var / a;
162 | 
163 | 
164 |   yl = -ym * log (2.0);
165 |   for (j = 1; j <= m; ++j) {
166 |     x = a - 0.5 * (double) (m - j);
167 |     if (x < 0.0)
168 |       return 100.0;
169 |     yl -= psi (x);
170 |   }
171 | // so far this is log (C_L)    normalizing constant
172 |   yl -= ym * log (var);
173 |   yl -= (ym * a / var) * vard;
174 |   yl += logsum;
175 |   yl -= ym;                     // plugging in var 
176 | 
177 |   return yl;
178 | 
179 | }
180 | 
181 | double
182 | xxliked2 (int m, double a, double logsum, double lsum)
183 | // second deriv wrt a
184 | {
185 |   double p, yl = 0.0;
186 |   double ym, x, var, vard, vard2, y;
187 |   int j;
188 | 
189 |   p = 0.5 * (double) (m + 1);
190 |   ym = (double) m;
191 |   var = lsum / (2.0 * a * ym);
192 |   vard = -var / a;
193 |   vard2 = 2.0 * var / (a * a);
194 | 
195 | 
196 |   yl = 0.0;
197 |   for (j = 1; j <= m; ++j) {
198 |     x = a - 0.5 * (double) (m - j);
199 |     if (x < 0.0)
200 |       return 100.0;
201 |     yl -= tau (x);
202 |   }
203 | // so far this is log (C_L)    normalizing constant
204 |   yl -= 2.0 * (ym / var) * vard;
205 |   yl -= (ym * a / var) * vard2;
206 |   y = vard / var;
207 |   yl += (ym * a) * y * y;
208 | 
209 |   return yl;
210 | 
211 | }
212 | 
213 | double
214 | doeig2 (double *vals, int m, double *pzn, double *ptw)
215 | {
216 |   static int ncall = 0;
217 |   double y, tw, tail;
218 |   double zn, zvar, top, bot;
219 |   double *evals;
220 | 
221 |   ++ncall;
222 |   ZALLOC (evals, m, double);
223 |   copyarr (vals, evals, m);
224 |   y = (double) m / asum (evals, m);
225 |   vst (evals, evals, y, m);
226 |   top = (double) (m * (m + 2));
227 |   bot = asum2 (evals, m) - (double) m;
228 |   zn = top / bot;
229 |   y = evals[0] * zn;
230 |   tw = twnorm (y, (double) m, zn);
231 |   tail = twtail (tw);
232 |   free (evals);
233 |   *pzn = zn;
234 |   *ptw = tw;
235 |   return tail;
236 | }
237 | 
238 | double
239 | rhoinv (double x, double gam)
240 | // Lee et al. page 5 for \rho^{-1} 
241 | {
242 |   double y1, y2;
243 | 
244 |   y1 = x + 1.0 - gam;
245 |   y2 = y1 * y1 - 4.0 * x;
246 |   if (y2 <= 0.0)
247 |     return -1.0;
248 | 
249 |   y1 += sqrt (y2);
250 | 
251 |   return 0.5 * y1;
252 | 
253 | }
254 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
  1 | ## shrinkmode added.  3/15
  2 | EIGENSOFT version 8.0.0, 03/30/21 (for Linux only)
  3 | 
  4 | The EIGENSOFT package implements methods from the following 2 papers:
  5 | Patterson et al. 2006 PLoS Genet 2:e190 (population structure)
  6 | Price et al. 2006 Nat Genet 38:904-9 (EIGENSTRAT stratification correction)
  7 | 
  8 | NEW features of EIGENSOFT version 7.2.0
  9 | -- shrinkmode
 10 | 
 11 | NEW features of EIGENSOFT version 6.1.4 include:
 12 | -- pcaselection was omitted from 6.1.3 by accident
 13 | -- Statically linked GSL/openblas
 14 | -- Fixed memory allocation bug in pcaselection
 15 | -- Some routines moved into nicklib
 16 | -- Error message on allocate failure now prints length as "%ld"
 17 |    supporting long values.
 18 | 
 19 | NEW features of EIGENSOFT version 6.1.3 include:
 20 | -- Restored script file extensions to .perl instead of .pl
 21 | -- Added updated ploteig script that disappeared from the repository
 22 | 
 23 | NEW features of EIGENSOFT version 6.1.2 include:
 24 | -- Updated license info to be GPL compliant required by linking the GSL
 25 | 
 26 | NEW features of EIGENSOFT version 6.1.1 include:
 27 | -- Minor bug fix to correctly merge version 6.0.2 and version 6.1 changes.
 28 | -- pcaselection operates on evec files. Added examples. 
 29 | -- Backported twtable.c/h from EIGENSOFT 7alpha
 30 | 
 31 | NEW features of EIGENSOFT version 6.1 include:
 32 | -- The range finding step of PCA fastmode only scales the multiplied matrix,
 33 |    as orthogonalization is unnecessary. This appears to improve accuracy. 
 34 | 
 35 | NEW features of EIGENSOFT version 6.0.2 include:
 36 | -- Fixed Makefile and documentation to build eigenstrat properly
 37 | -- Moved Tracy-Widom table into a header file for easier building
 38 | 
 39 | NEW features of EIGENSOFT version 6.0.1 include:
 40 | -- Minor bug fix which prevents smartpca from trying to print out eigenvalues
 41 |   if fastmode is set.
 42 | 
 43 | NEW features of EIGENSOFT version 6.0.0beta included:
 44 | -- New option fastmode which implements a very fast pca approximation.
 45 |    See POPGEN/README and Galinsky 2014 ASHG talk.
 46 | -- Changes to external packages required.  EIGENSOFT version 5.0.2 required
 47 |    lapack + blas.  On the other hand, EIGENSOFT version 6.0beta requires 
 48 |    GSL + lapack + OpenBLAS (but does not require the native version of blas).
 49 |    The Makefile has been changed accordingly.
 50 | -- EIGENSOFT version 6.0beta supports multi-threading.  See POPGEN/README.
 51 | -- Bug fix for ldregress option.
 52 | 
 53 | See CONVERTF/README for documentation of programs for converting file formats.
 54 | See POPGEN/README for documentation of population structure programs.
 55 | See EIGENSTRAT/README for documentation of EIGENSTRAT programs.
 56 | 
 57 | Questions?
 58 | See https://www.hsph.harvard.edu/alkes-price/eigensoft-frequently-asked-questions/
 59 | https://github.com/DReichLab/EIG
 60 | 
 61 | For questions about building this software: 
 62 | Matthew Mah <matthew_mah@hms.harvard.edu>
 63 | 
 64 | For questions about smartpca:
 65 | Nick Patterson <nickp@broadinstitute.org>
 66 | 
 67 | For questions about eigenstrat:
 68 | Alkes Price <aprice@hsph.harvard.edu>
 69 | 
 70 | Executables and source code:
 71 | ----------------------------
 72 | All C executables are in the bin/ directory.
 73 | 
 74 | We have placed source code for all C executables in the src/ directory, 
 75 | for users who wish to modify and recompile our programs.
 76 | "cd src"
 77 | "make"      
 78 | "make install"
 79 | 
 80 | Note that some of our software will only compile if your system has the
 81 | GSL + lapack + OpenBLAS packages installed.
 82 | 
 83 | On Mac OSX, you can install gsl and OpenBLAS with lapack using homebrew:
 84 | "brew install gsl"
 85 | "brew install openblas"
 86 | 
 87 | If these packages are not in standard directories, you can specify the 
 88 | appropriate include and library directories with the CFLAGS and LDFLAGS 
 89 | make variables. 
 90 | For example, on the Harvard Medical School O2 cluster, the command is:
 91 | 'make CFLAGS="-I/n/app/openblas/0.2.19/include -I/n/app/gsl/2.3/include" LDFLAGS="-L/n/app/openblas/0.2.19/lib -L/n/app/gsl/2.3/lib/"'
 92 | On Mac OSX:
 93 | 'make CFLAGS="-I/usr/local/opt/openblas/include -I/usr/local/opt/gsl/include" LDFLAGS="-L/usr/local/opt/openblas/lib -L/usr/local/opt/gsl/lib"'
 94 | 
 95 | If you have issues with missing lapacke symbols, for example "undefined reference to `LAPACKE_dlange'", run make with the corresponding additional libraries linked:
 96 | 'make LDLIBS="-llapacke"'
 97 | This has been encountered on Linux Mint 18. 
 98 | 
 99 | If you have trouble compiling and running our code, try compiling and
100 | running the pcatoy program in the src directory:
101 | "cd src"
102 | "make pcatoy"
103 | "./pcatoy"
104 | If you are unable to run the pcatoy program successfully, please contact
105 | your system administrator for help, as this is a systems issue which is
106 | beyond our scope.  Your system administrator will be able to troubleshoot
107 | your systems issue using this trivial program.  [You can also try running
108 | the pcatoy program in the bin directory, which we have already compiled.]
109 | 
110 | To remake the entire package:
111 | "cd src"
112 | "make clobber"
113 | "make install"
114 | 
115 | To remake EIG7.2 it is necessary to link to the OpenBLAS library. On orchestra, 
116 | the path is /opt/openblas and should work automatically. On Broad institute machines,
117 | the user should execute "use .openblas-0.2.8" and "use GCC-4.9" at the command 
118 | prompt before attempting to remake. All other users should install OpenBLAS and 
119 | set the variable OPENBLAS to the path at the make command line, 
120 | e.g. "make install OPENBLAS=/usr/local/openblas"
121 | 
122 | ----------------------------
123 | Acknowledgements:
124 | EIGENSOFT was written by Nick Patterson, Alkes Price, Samuela Pollack,
125 | Kevin Galinsky, Chris Chang, and Sasha Gusev.
126 | 
127 | We thank John Novembre and Mike Boursnell for code improvements, Matt Hanna 
128 | for the first implementation of multi-threading, and Angela Yu for a bugfix. 
129 | 
130 | ----------------------------
131 | SOFTWARE COPYRIGHT NOTICE AGREEMENT
132 | This software and its documentation are copyright (2010) by Harvard University 
133 | and The Broad Institute. All rights are reserved. This software is supplied 
134 | without any warranty or guaranteed support whatsoever. Neither Harvard 
135 | University nor The Broad Institute can be responsible for its use, misuse, or 
136 | functionality. The software may be freely copied for non-commercial purposes, 
137 | provided this copyright notice is retained.
138 | 
139 | 


--------------------------------------------------------------------------------
/src/eigensrc/eigsubs.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <limits.h>
  3 | #include <math.h>
  4 | 
  5 | #include <vsubs.h>
  6 | #include <eigsubs.h>
  7 | 
  8 | 
  9 | /* ********************************************************************* */
 10 | void eigx_ (double *pmat, double *ev, int *n);
 11 | void eigxv_ (double *pmat, double *eval, double *evec, int *n);
 12 | void cdc_ (double *pmat, int *n);
 13 | void inverse_ (double *pmat, int *n);
 14 | void solve_ (double *pmat, double *v, int *n);
 15 | void geneigsolve_ (double *pmat, double *qmat, double *eval, int *n);
 16 | 
 17 | void packsym (double *pmat, double *mat, int n);
 18 | 
 19 | 
 20 | void
 21 | eigvals (double *mat, double *evals, int n)
 22 | {
 23 |   double *pmat;
 24 |   int len;
 25 | 
 26 |   len = n * (n + 1);
 27 |   len /= 2;
 28 |   ZALLOC (pmat, 2*len, double);
 29 | 
 30 |   vst (mat, mat, -1.0, n * n);
 31 |   packsym (pmat, mat, n);
 32 |   eigx_ (pmat, evals, &n);
 33 |   free (pmat);
 34 |   vst (mat, mat, -1.0, n * n);
 35 |   vst (evals, evals, -1.0, n);
 36 | }
 37 | 
 38 | void
 39 | eigvecs (double *mat, double *evals, double *evecs, int n)
 40 | {
 41 |   double *pmat;
 42 |   int len;
 43 | 
 44 |   len = n * (n + 1);
 45 |   len /= 2;
 46 |   ZALLOC (pmat, 2*len, double);
 47 | 
 48 |   vst (mat, mat, -1.0, n * n);
 49 |   packsym (pmat, mat, n);
 50 | 
 51 |   eigxv_ (pmat, evals, evecs, &n);
 52 |   free (pmat);
 53 |   vst (mat, mat, -1.0, n * n);
 54 |   vst (evals, evals, -1.0, n);
 55 | }
 56 | 
 57 | /*  note:  dpotrf requires the entire matrix, not packed lower-tri */
 58 | void
 59 | chdecomp (double *mat, int n)
 60 | {
 61 |   /* symetric matrix - don't need to 
 62 |    * convert to column major order */
 63 | 
 64 |   cdc_ (mat, &n);
 65 | }
 66 | 
 67 | void
 68 | inverse (double *mat, int n)
 69 | {
 70 |   int i, j;
 71 | 
 72 |   /* convert to column-major order */
 73 |   for (i = 0; i < n; i++) {
 74 |     for (j = 0; j < i; j++) {
 75 |       double t = mat[n * i + j];
 76 |       mat[n * i + j] = mat[n * j + i];
 77 |       mat[n * j + i] = t;
 78 |     }
 79 |   }
 80 | 
 81 |   /*** DEBUGGING: ***/
 82 |   {
 83 |     FILE *fid = fopen ("eigsubs.dbg", "a");
 84 |     fprintf (fid, "matrix U\n");
 85 |     for (i = 0; i < n; i++) {
 86 |       for (j = 0; j < n; j++) {
 87 | 	fprintf (fid, "%8.4f ", mat[i * n + j]);
 88 |       }
 89 |       fprintf (fid, "\n");
 90 |     }
 91 |   }
 92 |   /*******************/
 93 | 
 94 |   inverse_ (mat, &n);
 95 | 
 96 |   /*** DEBUGGING: ***/
 97 |   {
 98 |     FILE *fid = fopen ("eigsubs.dbg", "a");
 99 |     fprintf (fid, "inverse of matrix U\n");
100 |     for (i = 0; i < n; i++) {
101 |       for (j = 0; j < n; j++) {
102 | 	fprintf (fid, "%8.4f ", mat[i * n + j]);
103 |       }
104 |       fprintf (fid, "\n");
105 |     }
106 |   }
107 |   /*******************/
108 | 
109 |   for (i = 0; i < n; i++) {
110 |     for (j = 0; j < i; j++) {
111 |       double t = mat[n * i + j];
112 |       mat[n * i + j] = mat[n * j + i];
113 |       mat[n * j + i] = t;
114 |     }
115 |   }
116 | }
117 | 
118 | void
119 | solve (double *mat, double *b, double *v, int n)
120 | {
121 |   int i, j;
122 | 
123 |   double *mat2 = (double *) malloc (n * n * sizeof (double));
124 | 
125 |   /* lapack is going to put the lu-decomp into the matrix,
126 |    * so make a copy and convert to column-major order */
127 |   for (i = 0; i < n; i++) {
128 |     for (j = 0; j < n; j++) {
129 |       mat2[n * i + j] = mat[n * j + i];
130 |     }
131 |   }
132 | 
133 |   /* copy b into v */
134 |   for (i = 0; i < n; i++) {
135 |     v[i] = b[i];
136 |   }
137 | 
138 |   solve_ (mat2, v, &n);
139 | 
140 |   free (mat2);
141 |   return;
142 | }
143 | 
144 | void
145 | packsym (double *pmat, double *mat, int n)
146 | 	//  lapack L mode (fortran)
147 | {
148 |   int i, j, k = 0;
149 |   for (i = 0; i < n; i++) {
150 |     for (j = i; j < n; j++) {
151 |       pmat[k] = mat[i * n + j];
152 |       ++k;
153 |     }
154 |   }
155 | }
156 | 
157 | void
158 | geneigsolve (double *pmat, double *qmat, double *evec, double *eval, int n)
159 | {
160 | 
161 |   /* save copy of A and B, which LAPACK will overwrite */
162 |   double *amat = (double *) malloc (n * n * sizeof (double));
163 |   double *bmat = (double *) malloc (n * n * sizeof (double));
164 | 
165 |   int i, j;
166 |   for (i = 0; i < n * n; i++) {
167 |     amat[i] = pmat[i];
168 |     bmat[i] = qmat[i];
169 |   }
170 | 
171 | 
172 | 
173 | 
174 |   {
175 |     FILE *fid = fopen ("eigsubs.dbg", "a");
176 |     fprintf (fid, "matrix A\n");
177 |     for (i = 0; i < n; i++) {
178 |       for (j = 0; j < n; j++) {
179 | 	fprintf (fid, "%8.4f ", amat[i * n + j]);
180 |       }
181 |       fprintf (fid, "\n");
182 |     }
183 | 
184 |     fprintf (fid, "matrix B\n");
185 |     for (i = 0; i < n; i++) {
186 |       for (j = 0; j < n; j++) {
187 | 	fprintf (fid, "%8.4f ", bmat[i * n + j]);
188 |       }
189 |       fprintf (fid, "\n");
190 |     }
191 |     fclose (fid);
192 |   }
193 | 
194 | 
195 |   /* matrices have to be symetric-definite, so don't 
196 |    * need to convert to column-major order */
197 |   geneigsolve_ (pmat, qmat, eval, &n);
198 | 
199 | 
200 |   /* copy eigenvectors to A and original A,B back */
201 |   /* ith eigenvector should be in row i */
202 |   for (i = 0; i < n; i++) {
203 |     for (j = 0; j < n; j++) {
204 |       evec[i * n + j] = pmat[i * n + j];	/* don't put back in row-major order (?) */
205 |     }
206 |   }
207 |   for (i = 0; i < n * n; i++) {
208 |     pmat[i] = amat[i];
209 |     qmat[i] = bmat[i];
210 |   }
211 | 
212 |   /* LAPACK puts evals and evecs in ascending order           */
213 |   /* reorder evals and evecs so evals are in descending order */
214 | 
215 |   for (i = 0; i < n / 2; i++) {
216 |     double t = eval[i];
217 |     eval[i] = eval[n - 1 - i];
218 |     eval[n - 1 - i] = t;
219 | 
220 |     for (j = 0; j < n; j++) {	/* exchange row i and row(n-1-i) */
221 |       t = evec[i * n + j];
222 |       evec[i * n + j] = evec[(n - 1 - i) * n + j];
223 |       evec[(n - 1 - i) * n + j] = t;
224 |     }
225 |   }
226 | 
227 |   free (amat);
228 |   free (bmat);
229 | 
230 | }
231 | 
232 | void
233 | mkorth (double *orth, double *ww, int n)
234 | // special purpose.  Construct basis of vectors orthogonal to ww
235 | {
236 | 
237 |   double *vv, *evec, *qq;
238 |   double y;
239 | 
240 |   ZALLOC (vv, n * n, double);
241 |   ZALLOC (evec, n * n, double);
242 |   ZALLOC (qq, n, double);
243 | 
244 |   y = asum2 (ww, n);
245 |   vst (qq, ww, 1.0 / sqrt (y), n);
246 |   setidmat (vv, n);
247 |   addouter (vv, qq, n);
248 | 
249 |   eigvecs (vv, orth, evec, n);
250 |   copyarr (evec + n, orth, n * (n - 1));
251 | 
252 |   free (vv);
253 |   free (qq);
254 |   free (evec);
255 | 
256 | }
257 | 


--------------------------------------------------------------------------------
/POPGEN/twtable:
--------------------------------------------------------------------------------
  1 | ### this is  a table of TW using a Runge-Kutta solver suggested by Per-Olaf Persson 
  2 | ### algorithm coded in C by NP using NAG ODE solver
  3 |     -8.000 1.000000000 0.000000000 
  4 |     -7.900 1.000000000 0.000000000 
  5 |     -7.800 1.000000000 0.000000000 
  6 |     -7.700 1.000000000 0.000000000 
  7 |     -7.600 1.000000000 0.000000000 
  8 |     -7.500 1.000000000 0.000000001 
  9 |     -7.400 1.000000000 0.000000002 
 10 |     -7.300 0.999999999 0.000000005 
 11 |     -7.200 0.999999999 0.000000010 
 12 |     -7.100 0.999999997 0.000000019 
 13 |     -7.000 0.999999995 0.000000039 
 14 |     -6.900 0.999999989 0.000000076 
 15 |     -6.800 0.999999978 0.000000146 
 16 |     -6.700 0.999999958 0.000000276 
 17 |     -6.600 0.999999920 0.000000511 
 18 |     -6.500 0.999999849 0.000000932 
 19 |     -6.400 0.999999723 0.000001670 
 20 |     -6.300 0.999999498 0.000002942 
 21 |     -6.200 0.999999105 0.000005097 
 22 |     -6.100 0.999998431 0.000008683 
 23 |     -6.000 0.999997293 0.000014554 
 24 |     -5.900 0.999995401 0.000024005 
 25 |     -5.800 0.999992309 0.000038969 
 26 |     -5.700 0.999987331 0.000062279 
 27 |     -5.600 0.999979441 0.000098012 
 28 |     -5.500 0.999967125 0.000151923 
 29 |     -5.400 0.999948187 0.000231995 
 30 |     -5.300 0.999919496 0.000349097 
 31 |     -5.200 0.999876655 0.000517756 
 32 |     -5.100 0.999813597 0.000757035 
 33 |     -5.000 0.999722082 0.001091485 
 34 |     -4.900 0.999591101 0.001552137 
 35 |     -4.800 0.999406175 0.002177466 
 36 |     -4.700 0.999148569 0.003014256 
 37 |     -4.600 0.998794427 0.004118267 
 38 |     -4.500 0.998313849 0.005554591 
 39 |     -4.400 0.997669962 0.007397591 
 40 |     -4.300 0.996818016 0.009730295 
 41 |     -4.200 0.995704571 0.012643159 
 42 |     -4.100 0.994266851 0.016232112 
 43 |     -4.000 0.992432322 0.020595851 
 44 |     -3.900 0.990118582 0.025832397 
 45 |     -3.800 0.987233631 0.032034971 
 46 |     -3.700 0.983676579 0.039287325 
 47 |     -3.600 0.979338843 0.047658716 
 48 |     -3.500 0.974105853 0.057198759 
 49 |     -3.400 0.967859270 0.067932445 
 50 |     -3.300 0.960479677 0.079855636 
 51 |     -3.200 0.951849687 0.092931337 
 52 |     -3.100 0.941857369 0.107087044 
 53 |     -3.000 0.930399881 0.122213418 
 54 |     -2.900 0.917387157 0.138164458 
 55 |     -2.800 0.902745495 0.154759279 
 56 |     -2.700 0.886420892 0.171785501 
 57 |     -2.600 0.868381957 0.189004169 
 58 |     -2.500 0.848622271 0.206156009 
 59 |     -2.400 0.827162053 0.222968755 
 60 |     -2.300 0.804049066 0.239165233 
 61 |     -2.200 0.779358684 0.254471803 
 62 |     -2.100 0.753193114 0.268626779 
 63 |     -2.000 0.725679802 0.281388431 
 64 |     -1.900 0.696969061 0.292542221 
 65 |     -1.800 0.667231036 0.301906945 
 66 |     -1.700 0.636652122 0.309339558 
 67 |     -1.600 0.605430961 0.314738516 
 68 |     -1.500 0.573774198 0.318045543 
 69 |     -1.400 0.541892124 0.319245849 
 70 |     -1.300 0.509994383 0.318366852 
 71 |     -1.200 0.478285870 0.315475570 
 72 |     -1.100 0.446962951 0.310674866 
 73 |     -1.000 0.416210105 0.304098784 
 74 |     -0.900 0.386197065 0.295907232 
 75 |     -0.800 0.357076521 0.286280263 
 76 |     -0.700 0.328982392 0.275412215 
 77 |     -0.600 0.302028689 0.263505933 
 78 |     -0.500 0.276308949 0.250767272 
 79 |     -0.400 0.251896179 0.237400053 
 80 |     -0.300 0.228843301 0.223601597 
 81 |     -0.200 0.207183986 0.209558915 
 82 |     -0.100 0.186933854 0.195445624 
 83 |      0.000 0.168091934 0.181419571 
 84 |      0.100 0.150642330 0.167621190 
 85 |      0.200 0.134556018 0.154172511 
 86 |      0.300 0.119792709 0.141176787 
 87 |      0.400 0.106302721 0.128718659 
 88 |      0.500 0.094028817 0.116864772 
 89 |      0.600 0.082907953 0.105664756 
 90 |      0.700 0.072872924 0.095152500 
 91 |      0.800 0.063853860 0.085347620 
 92 |      0.900 0.055779577 0.076257058 
 93 |      1.000 0.048578763 0.067876743 
 94 |      1.100 0.042180992 0.060193257 
 95 |      1.200 0.036517582 0.053185457 
 96 |      1.300 0.031522284 0.046826015 
 97 |      1.400 0.027131832 0.041082856 
 98 |      1.500 0.023286351 0.035920459 
 99 |      1.600 0.019929640 0.031301023 
100 |      1.700 0.017009350 0.027185487 
101 |      1.800 0.014477062 0.023534398 
102 |      1.900 0.012288293 0.020308645 
103 |      2.000 0.010402429 0.017470054 
104 |      2.100 0.008782605 0.014981856 
105 |      2.200 0.007395547 0.012809046 
106 |      2.300 0.006211384 0.010918644 
107 |      2.400 0.005203434 0.009279861 
108 |      2.500 0.004347977 0.007864200 
109 |      2.600 0.003624031 0.006645482 
110 |      2.700 0.003013114 0.005599836 
111 |      2.800 0.002499018 0.004705636 
112 |      2.900 0.002067590 0.003943413 
113 |      3.000 0.001706520 0.003295741 
114 |      3.100 0.001405143 0.002747112 
115 |      3.200 0.001154255 0.002283795 
116 |      3.300 0.000945945 0.001893694 
117 |      3.400 0.000773431 0.001566204 
118 |      3.500 0.000630927 0.001292071 
119 |      3.600 0.000513508 0.001063253 
120 |      3.700 0.000416999 0.000872795 
121 |      3.800 0.000337871 0.000714702 
122 |      3.900 0.000273152 0.000583831 
123 |      4.000 0.000220344 0.000475784 
124 |      4.100 0.000177359 0.000386816 
125 |      4.200 0.000142452 0.000313749 
126 |      4.300 0.000114170 0.000253894 
127 |      4.400 0.000091308 0.000204987 
128 |      4.500 0.000072871 0.000165125 
129 |      4.600 0.000058035 0.000132716 
130 |      4.700 0.000046124 0.000106431 
131 |      4.800 0.000036582 0.000085163 
132 |      4.900 0.000028955 0.000067996 
133 |      5.000 0.000022872 0.000054172 
134 |      5.100 0.000018030 0.000043066 
135 |      5.200 0.000014185 0.000034164 
136 |      5.300 0.000011138 0.000027045 
137 |      5.400 0.000008728 0.000021365 
138 |      5.500 0.000006826 0.000016843 
139 |      5.600 0.000005328 0.000013250 
140 |      5.700 0.000004151 0.000010403 
141 |      5.800 0.000003228 0.000008151 
142 |      5.900 0.000002505 0.000006374 
143 |      6.000 0.000001941 0.000004974 
144 |      6.100 0.000001501 0.000003874 
145 |      6.200 0.000001158 0.000003011 
146 |      6.300 0.000000892 0.000002336 
147 |      6.400 0.000000686 0.000001809 
148 |      6.500 0.000000527 0.000001398 
149 |      6.600 0.000000403 0.000001078 
150 |      6.700 0.000000308 0.000000830 
151 |      6.800 0.000000235 0.000000638 
152 |      6.900 0.000000179 0.000000489 
153 |      7.000 0.000000136 0.000000375 
154 |      7.100 0.000000104 0.000000286 
155 |      7.200 0.000000079 0.000000218 
156 |      7.300 0.000000059 0.000000166 
157 |      7.400 0.000000045 0.000000126 
158 |      7.500 0.000000034 0.000000096 
159 |      7.600 0.000000025 0.000000073 
160 |      7.700 0.000000019 0.000000055 
161 |      7.800 0.000000014 0.000000041 
162 |      7.900 0.000000011 0.000000031 
163 |      8.000 0.000000008 0.000000023 
164 | 


--------------------------------------------------------------------------------
/src/smarttables/twtable:
--------------------------------------------------------------------------------
  1 | ### this is  a table of TW using a Runge-Kutta solver suggested by Per-Olaf Persson 
  2 | ### algorithm coded in C by NP using NAG ODE solver
  3 | ###  arg    r tail       pdf
  4 |     -8.000 1.000000000 0.000000000 
  5 |     -7.900 1.000000000 0.000000000 
  6 |     -7.800 1.000000000 0.000000000 
  7 |     -7.700 1.000000000 0.000000000 
  8 |     -7.600 1.000000000 0.000000000 
  9 |     -7.500 1.000000000 0.000000001 
 10 |     -7.400 1.000000000 0.000000002 
 11 |     -7.300 0.999999999 0.000000005 
 12 |     -7.200 0.999999999 0.000000010 
 13 |     -7.100 0.999999997 0.000000019 
 14 |     -7.000 0.999999995 0.000000039 
 15 |     -6.900 0.999999989 0.000000076 
 16 |     -6.800 0.999999978 0.000000146 
 17 |     -6.700 0.999999958 0.000000276 
 18 |     -6.600 0.999999920 0.000000511 
 19 |     -6.500 0.999999849 0.000000932 
 20 |     -6.400 0.999999723 0.000001670 
 21 |     -6.300 0.999999498 0.000002942 
 22 |     -6.200 0.999999105 0.000005097 
 23 |     -6.100 0.999998431 0.000008683 
 24 |     -6.000 0.999997293 0.000014554 
 25 |     -5.900 0.999995401 0.000024005 
 26 |     -5.800 0.999992309 0.000038969 
 27 |     -5.700 0.999987331 0.000062279 
 28 |     -5.600 0.999979441 0.000098012 
 29 |     -5.500 0.999967125 0.000151923 
 30 |     -5.400 0.999948187 0.000231995 
 31 |     -5.300 0.999919496 0.000349097 
 32 |     -5.200 0.999876655 0.000517756 
 33 |     -5.100 0.999813597 0.000757035 
 34 |     -5.000 0.999722082 0.001091485 
 35 |     -4.900 0.999591101 0.001552137 
 36 |     -4.800 0.999406175 0.002177466 
 37 |     -4.700 0.999148569 0.003014256 
 38 |     -4.600 0.998794427 0.004118267 
 39 |     -4.500 0.998313849 0.005554591 
 40 |     -4.400 0.997669962 0.007397591 
 41 |     -4.300 0.996818016 0.009730295 
 42 |     -4.200 0.995704571 0.012643159 
 43 |     -4.100 0.994266851 0.016232112 
 44 |     -4.000 0.992432322 0.020595851 
 45 |     -3.900 0.990118582 0.025832397 
 46 |     -3.800 0.987233631 0.032034971 
 47 |     -3.700 0.983676579 0.039287325 
 48 |     -3.600 0.979338843 0.047658716 
 49 |     -3.500 0.974105853 0.057198759 
 50 |     -3.400 0.967859270 0.067932445 
 51 |     -3.300 0.960479677 0.079855636 
 52 |     -3.200 0.951849687 0.092931337 
 53 |     -3.100 0.941857369 0.107087044 
 54 |     -3.000 0.930399881 0.122213418 
 55 |     -2.900 0.917387157 0.138164458 
 56 |     -2.800 0.902745495 0.154759279 
 57 |     -2.700 0.886420892 0.171785501 
 58 |     -2.600 0.868381957 0.189004169 
 59 |     -2.500 0.848622271 0.206156009 
 60 |     -2.400 0.827162053 0.222968755 
 61 |     -2.300 0.804049066 0.239165233 
 62 |     -2.200 0.779358684 0.254471803 
 63 |     -2.100 0.753193114 0.268626779 
 64 |     -2.000 0.725679802 0.281388431 
 65 |     -1.900 0.696969061 0.292542221 
 66 |     -1.800 0.667231036 0.301906945 
 67 |     -1.700 0.636652122 0.309339558 
 68 |     -1.600 0.605430961 0.314738516 
 69 |     -1.500 0.573774198 0.318045543 
 70 |     -1.400 0.541892124 0.319245849 
 71 |     -1.300 0.509994383 0.318366852 
 72 |     -1.200 0.478285870 0.315475570 
 73 |     -1.100 0.446962951 0.310674866 
 74 |     -1.000 0.416210105 0.304098784 
 75 |     -0.900 0.386197065 0.295907232 
 76 |     -0.800 0.357076521 0.286280263 
 77 |     -0.700 0.328982392 0.275412215 
 78 |     -0.600 0.302028689 0.263505933 
 79 |     -0.500 0.276308949 0.250767272 
 80 |     -0.400 0.251896179 0.237400053 
 81 |     -0.300 0.228843301 0.223601597 
 82 |     -0.200 0.207183986 0.209558915 
 83 |     -0.100 0.186933854 0.195445624 
 84 |      0.000 0.168091934 0.181419571 
 85 |      0.100 0.150642330 0.167621190 
 86 |      0.200 0.134556018 0.154172511 
 87 |      0.300 0.119792709 0.141176787 
 88 |      0.400 0.106302721 0.128718659 
 89 |      0.500 0.094028817 0.116864772 
 90 |      0.600 0.082907953 0.105664756 
 91 |      0.700 0.072872924 0.095152500 
 92 |      0.800 0.063853860 0.085347620 
 93 |      0.900 0.055779577 0.076257058 
 94 |      1.000 0.048578763 0.067876743 
 95 |      1.100 0.042180992 0.060193257 
 96 |      1.200 0.036517582 0.053185457 
 97 |      1.300 0.031522284 0.046826015 
 98 |      1.400 0.027131832 0.041082856 
 99 |      1.500 0.023286351 0.035920459 
100 |      1.600 0.019929640 0.031301023 
101 |      1.700 0.017009350 0.027185487 
102 |      1.800 0.014477062 0.023534398 
103 |      1.900 0.012288293 0.020308645 
104 |      2.000 0.010402429 0.017470054 
105 |      2.100 0.008782605 0.014981856 
106 |      2.200 0.007395547 0.012809046 
107 |      2.300 0.006211384 0.010918644 
108 |      2.400 0.005203434 0.009279861 
109 |      2.500 0.004347977 0.007864200 
110 |      2.600 0.003624031 0.006645482 
111 |      2.700 0.003013114 0.005599836 
112 |      2.800 0.002499018 0.004705636 
113 |      2.900 0.002067590 0.003943413 
114 |      3.000 0.001706520 0.003295741 
115 |      3.100 0.001405143 0.002747112 
116 |      3.200 0.001154255 0.002283795 
117 |      3.300 0.000945945 0.001893694 
118 |      3.400 0.000773431 0.001566204 
119 |      3.500 0.000630927 0.001292071 
120 |      3.600 0.000513508 0.001063253 
121 |      3.700 0.000416999 0.000872795 
122 |      3.800 0.000337871 0.000714702 
123 |      3.900 0.000273152 0.000583831 
124 |      4.000 0.000220344 0.000475784 
125 |      4.100 0.000177359 0.000386816 
126 |      4.200 0.000142452 0.000313749 
127 |      4.300 0.000114170 0.000253894 
128 |      4.400 0.000091308 0.000204987 
129 |      4.500 0.000072871 0.000165125 
130 |      4.600 0.000058035 0.000132716 
131 |      4.700 0.000046124 0.000106431 
132 |      4.800 0.000036582 0.000085163 
133 |      4.900 0.000028955 0.000067996 
134 |      5.000 0.000022872 0.000054172 
135 |      5.100 0.000018030 0.000043066 
136 |      5.200 0.000014185 0.000034164 
137 |      5.300 0.000011138 0.000027045 
138 |      5.400 0.000008728 0.000021365 
139 |      5.500 0.000006826 0.000016843 
140 |      5.600 0.000005328 0.000013250 
141 |      5.700 0.000004151 0.000010403 
142 |      5.800 0.000003228 0.000008151 
143 |      5.900 0.000002505 0.000006374 
144 |      6.000 0.000001941 0.000004974 
145 |      6.100 0.000001501 0.000003874 
146 |      6.200 0.000001158 0.000003011 
147 |      6.300 0.000000892 0.000002336 
148 |      6.400 0.000000686 0.000001809 
149 |      6.500 0.000000527 0.000001398 
150 |      6.600 0.000000403 0.000001078 
151 |      6.700 0.000000308 0.000000830 
152 |      6.800 0.000000235 0.000000638 
153 |      6.900 0.000000179 0.000000489 
154 |      7.000 0.000000136 0.000000375 
155 |      7.100 0.000000104 0.000000286 
156 |      7.200 0.000000079 0.000000218 
157 |      7.300 0.000000059 0.000000166 
158 |      7.400 0.000000045 0.000000126 
159 |      7.500 0.000000034 0.000000096 
160 |      7.600 0.000000025 0.000000073 
161 |      7.700 0.000000019 0.000000055 
162 |      7.800 0.000000014 0.000000041 
163 |      7.900 0.000000011 0.000000031 
164 |      8.000 0.000000008 0.000000023 
165 | 


--------------------------------------------------------------------------------
/bin/ploteig:
--------------------------------------------------------------------------------
  1 | #!/usr/local/bin/perl  -w 
  2 | 
  3 | ### ploteig -i eigfile -p pops -c a:b [-t title] [-s stem] [-g gstem] [-o outfile] [-x] [-k]  [-y] [-z sep] -r colorstring -m xmul -n ymul 
  4 | use Getopt::Std ;
  5 | use File::Basename ;
  6 | 
  7 | ## pops : separated  -x = make postscript and pdf  -z use another separator
  8 | ##  -k keep intermediate files
  9 | ## NEW if pops is a file names are read one per line
 10 | ## scaling on x, y axes 
 11 | 
 12 | getopts('i:o:p:c:s:d:z:t:r:m:n:g:xkyq',\%opts) ;
 13 | $postscmode = $opts{"x"} ;
 14 | $oldkeystyle =  $opts{"y"} ;
 15 | $nopoptitle = $opts{"q"} ;
 16 | $kflag = $opts{"k"} ;
 17 | $keepflag = 1 if ($kflag) ;
 18 | $keepflag = 1 unless ($postscmode) ;
 19 | 
 20 | $zsep = ":" ;
 21 | if (defined $opts{"z"}) {
 22 |  $zsep = $opts{"z"} ;
 23 |  $zsep = "\+" if ($zsep eq "+") ;
 24 | }
 25 | 
 26 | if (defined $opts{"r"}) {
 27 |  $colorstr = $opts{"r"} ;
 28 |  setcolor($colorstr) ;
 29 | }
 30 | $xmul = $opts{"m"} ; 
 31 | $xmul = 1 unless (defined $xmul) ;
 32 | 
 33 | $ymul = $opts{"n"} ; 
 34 | $ymul = 1 unless (defined $ymul) ;
 35 | 
 36 | $title = "" ;
 37 | if (defined $opts{"t"}) {
 38 |  $title = $opts{"t"} ;
 39 | }
 40 | if (defined $opts{"i"}) {
 41 |  $infile = $opts{"i"} ;
 42 | }
 43 | else {
 44 |  usage() ;
 45 |  exit 0 ;
 46 | }
 47 | 
 48 | open (FF, $infile) || die "can't open $infile\n" ;
 49 | @L = (<FF>) ;
 50 | chomp @L ;
 51 | $nf = 0 ;
 52 | foreach $line (@L) { 
 53 |  next if ($line =~ /\#/) ;
 54 |  @Z = split " ", $line ;
 55 |  $x = @Z ;
 56 |  $nf = $x if ($nf < $x) ;
 57 | }
 58 | printf "## number of fields: %d\n", $nf ;
 59 | $popcol = $nf-1 ;
 60 | 
 61 | 
 62 | if (defined $opts{"p"}) {
 63 |  $pops = $opts{"p"} ;
 64 | }
 65 | else {
 66 |  die "p parameter compulsory\n" ;
 67 | }
 68 | 
 69 | $popsname = setpops ($pops) ;
 70 | print "$popsname\n" ;
 71 | 
 72 | $c1 = 1; $c2 =2 ;
 73 | if (defined $opts{"c"}) {
 74 |  $cols = $opts{"c"} ;
 75 |  ($c1, $c2) = split ":", $cols ;
 76 |  die "bad c param: $cols\n" unless (defined $cols) ;
 77 | }
 78 | 
 79 | $stem = "$infile.$c1:$c2" ;
 80 | if (defined $opts{"s"}) {
 81 |  $stem = $opts{"s"} ;
 82 | }
 83 | 
 84 | if (defined $opts{"g"}) {
 85 |  $stem = $gstem = $opts{"g"} ;
 86 | }
 87 | 
 88 | $gstem = "$stem.$popsname" unless (defined $gstem) ; 
 89 | $gnfile = "$gstem.xtxt" ;
 90 |  
 91 | if (defined $opts{"o"}) {
 92 |  $gnfile = $opts{"o"} ;
 93 | }
 94 | 
 95 | 
 96 | @T = () ; ## trash 
 97 | open (GG, ">$gnfile") || die "can't open $gnfile\n" ;
 98 | print GG "## " unless ($postscmode) ;
 99 | print GG "set terminal postscript color noenhanced\n" ;
100 | print GG "set title  \"$title\" \n" ; 
101 | print GG "set key outside\n" unless ($oldkeystyle) ; 
102 | print GG "set xlabel  \"eigenvector $c1\" \n" if ($xmul == 1) ; 
103 | print GG "set xlabel  \"eigenvector $c1 (* $xmul) \" \n" if ($xmul != 1) ; 
104 | print GG "set ylabel  \"eigenvector $c2\" \n" if ($ymul == 1) ; 
105 | print GG "set ylabel  \"eigenvector $c1 (* $ymul) \" \n" if ($ymul != 1) ; 
106 | print GG "plot " ;
107 | $np = @P ;
108 | $lastpop = $P[$np-1] ;
109 | $d1 = $c1+1 ;
110 | $d2 = $c2+1 ;
111 | foreach $pop (@P)  { 
112 |  $dfile = "$stem:$pop" ;
113 |  push @T, $dfile ;
114 |  print GG " \"$dfile\" using (\$$d1)*$xmul:(\$$d2)*$ymul "  ;
115 |  print GG "notitle "  if (defined $nopoptitle) ;
116 |  print GG "title \"$pop\" "  unless (defined $nopoptitle) ;
117 |  if (defined $COL{$pop}) { 
118 |   $color = $COL{$pop} ; 
119 |   print GG "lt rgb \"$color\" " ;
120 |  }
121 |  print GG ", \\\n" unless ($pop eq $lastpop) ;
122 |  open (YY, ">$dfile") || die "can't open $dfile\n" ;
123 |  foreach $line (@L) {
124 |   next if ($line =~ /\#/) ;
125 |   @Z = split " ", $line ;
126 |   next unless (defined $Z[$popcol]) ;
127 |   next unless ($Z[$popcol] eq $pop) ;
128 |   print YY "$line\n" ;
129 |  }
130 |  close YY ;
131 | }
132 | print GG "\n" ;
133 | print GG "## "  if ($postscmode) ;
134 | print GG "pause 9999\n"  ;
135 | close GG ;
136 | 
137 | if ($postscmode) { 
138 | $psfile = "$stem.ps" ;
139 | 
140 |  if ($gnfile =~ /xtxt/) { 
141 |   $psfile = $gnfile ;
142 |   $psfile  =~ s/xtxt/ps/ ;
143 |  }
144 | system "gnuplot < $gnfile > $psfile" ;
145 | system "/home/np29/bin/fixgreen  $psfile" ;
146 | system "ps2pdf  $psfile " ;
147 | }
148 | unlink (@T) unless $keepflag ;
149 | 
150 | sub setcolor {
151 |  my ($colorst) = @_ ; 
152 |  local ($cp, $pop, $color, @CP, $line) ;
153 |  if (-r $colorst) { 
154 |    open (C1, $colorst) || die "can't open $colorst\n" ;
155 |    foreach $line (<C1>) { 
156 |     chomp $line ;
157 |     ($pop, $color) = split " ", $line ; 
158 |     next if ($pop =~ /\#/) ;
159 |     next unless (defined $color) ;
160 |     print STDERR  "setting color for $pop to $color\n" ; 
161 |     $COL{$pop} = $color ;
162 |    }
163 |    close C1 ; 
164 |    return ;
165 |  }
166 | 
167 |  @CP = split " ", $colorst ;
168 |  foreach $cp (@CP) { 
169 |   ($pop, $color) = split ":", $cp ;
170 |   $COL{$pop} = $color ;
171 |  }
172 | }
173 | 
174 | sub usage { 
175 |  
176 | print "ploteig -i eigfile -p pops -c a:b [-t title] [-s stem] [-o outfile] [-x] [-k] -c colorstringh [-m xmul] [-n ymul]\n" ;  
177 | print "-i eigfile     input file first col indiv-id last col population\n" ;
178 | print "## as output by smartpca in outputvecs \n" ;
179 | print "-c a:b         a, b columns to plot.  1:2 would be common and leading 2 eigenvectors\n" ;
180 | print "-p pops        Populations to plot.  : delimited.   eg  -p Bantu:San:French\n" ;
181 | print "## pops can also be a filename.  List populations 1 per line\n" ;
182 | print "[-s stem]      stem will start various output files\n"  ;
183 | print "[-o ofile]     ofile will be gnuplot control file.  Should have xtxt suffix\n"; 
184 | print "[-x]           make ps and pdf files\n" ; 
185 | print "[-k]           keep various intermediate files although  -x set\n" ;
186 | print "## necessary if .xtxt file is to be hand edited\n" ;
187 | print "[-r colorstringpairs or colorstringfile]\n" ;
188 | print "[-g gstem]     make gstem.xtxt gstem.ps  gstem.pdf  \n" ;
189 | print "[-y]           put key at top right inside box (old mode)\n" ;
190 | print "[-t]           title (legend)\n" ;
191 | 
192 | print "The xtxt file is a gnuplot file and can be easily hand edited.  Intermediate files
193 | needed if you want to make your own plot\n" ;
194 | 
195 | }
196 | sub setpops {      
197 |  my ($pops) = @_  ; 
198 |  local (@a, $d, $b, $e) ; 
199 | 
200 |  if (-e $pops) {  
201 |   open (FF1, $pops) || die "can't open $pops\n" ;
202 |   @P = () ;
203 |   foreach $line (<FF1>) { 
204 |   ($a) = split " ", $line ;
205 |   next unless (defined $a) ;
206 |   next if ($a =~ /\#/) ;
207 |   push  @P, $a ;
208 |   }
209 |   $out = join ":", @P ; 
210 |   print "## pops: $out\n" ;
211 |   ($b, $d , $e) = fileparse($pops) ;
212 |   return $b ;
213 |  }
214 |  @P = split $zsep, $pops ;
215 |  return $pops ;
216 | 
217 | }
218 | 


--------------------------------------------------------------------------------
/include/admutils.h:
--------------------------------------------------------------------------------
  1 | #include <ctype.h>
  2 | #include <nicklib.h>
  3 | #include <xsearch.h>
  4 | #define IDSIZE 40
  5 | 
  6 | #ifndef ADMUTILS
  7 | 
  8 | typedef struct
  9 | {
 10 |   char ID[IDSIZE];
 11 |   char *egroup;
 12 |   char gender;			/* 'M' or 'F' */
 13 |   double theta_mode;		/* most likely theta on mesh */
 14 |   double lambda_mode;		/* mean of log10(lambda) from probability distribution */
 15 | 
 16 |   double Xtheta_mode;		/* most likely theta on mesh */
 17 |   double Xlambda_mode;		/* most likely lambda on mesh */
 18 | 
 19 |   int idnum;
 20 |   int affstatus;		/* affected status */
 21 |   int ignore;			/* YES => do not use */
 22 |   int flag;
 23 |   double thetatrue;
 24 |   double Xthetatrue;
 25 |   double lambdatrue;
 26 |   double Xlambdatrue;
 27 |   double totgamms[3];
 28 |   double totscore;
 29 |   double rawqval;
 30 |   double qval;
 31 | } Indiv;
 32 | 
 33 | typedef struct
 34 | {
 35 |   char ID[IDSIZE];
 36 |   int chrom;
 37 |   char cchrom[6];
 38 |   double genpos;
 39 |   double physpos;
 40 |   double aa_cauc_freq;		/* frequencies in ancestral pop to AA */
 41 |   double aa_af_freq;
 42 |   double cauc_freq;		/* frequencies of variant allele */
 43 |   double af_freq;
 44 |   double cftrue;
 45 |   double aftrue;
 46 |   double aa_cftrue;
 47 |   double aa_aftrue;
 48 |   int markernum;		/* marker number */
 49 |   char *pbuff;
 50 |   char *ebuff;			/* for random ethnic path */
 51 |   int isfake;			/* 1 if fake marker  else 0 */
 52 |   int isrfake;
 53 | /* real marker treated as fake.  Used for 2D scoring */
 54 |   int ignore;
 55 |   int ngtypes;			/*    number of gtypes */
 56 |   int *gtypes;
 57 |   int af_nn[2];
 58 |   int cauc_nn[2];
 59 |   double *modelscores;
 60 |   double *totmodelscores;
 61 |   double score;
 62 |   double weight;
 63 |   double estgenpos;
 64 |   double estdis;
 65 |   double dis;
 66 |   double esum;
 67 |   double lsum;
 68 |   double gpsum;
 69 |   double gpnum;
 70 |   void *pcupt;
 71 |   int tagnumber;
 72 |   char alleles[2];
 73 |   int chimpfudge;
 74 | } SNP;
 75 | 
 76 | typedef struct
 77 | {
 78 |   char ID[IDSIZE];
 79 |   Indiv *father;
 80 |   Indiv *mother;
 81 |   Indiv *child;
 82 |   int findex;
 83 |   int mindex;
 84 |   int cindex;
 85 |   int base;
 86 | } TRIO;
 87 | // ?index into original Indiv array.  base is work variable used in phasetrio to store init index in new array
 88 | 
 89 | typedef struct
 90 | {
 91 |   char gname[IDSIZE];
 92 |   SNP **snpm;
 93 |   Indiv **indivm;
 94 |   int numsnps;
 95 |   int numindivs;
 96 |   int rlen;
 97 |   int fdes;
 98 |   int snpindex;
 99 |   unsigned char *buff;
100 | } genofile;
101 | 
102 | typedef struct
103 | {
104 |   double xd[4];
105 |   double xc[9];
106 |   double ap1;
107 |   double ap2;
108 |   double cp1;
109 |   double cp2;
110 |   double rpowersum;
111 |   double crpowersum;
112 |   double gammasum[2];
113 |   double gammanum[2];
114 |   int pubfmodern;
115 | } SNPMC;
116 | // gammasum for cases/controls
117 | #endif
118 | #define ADMUTILS
119 | 
120 | void loadstats (FILE * statsfile, Indiv * indiv_array, int *numindivs);
121 | void loadXstats (FILE * Xstatsfile, Indiv * indiv_array, int numindivs,
122 | 		 int *numloaded);
123 | 
124 | void sett1 (double *tt, double theta, int numstates);
125 | void sett1r (double *tt, double theta, int numstate, double risks);
126 | void gettln (SNP * cupt, Indiv * indx,
127 | 	     double *ptheta, double *plambda, int *pnumstates, int *pignore);
128 | 
129 | void puttln (SNP * cupt, Indiv * indx, double theta, double lambda);
130 | 
131 | 
132 | /* UTILITY FUNCTIONS */
133 | 
134 | int countcol (char *fname);
135 | int countcolumns (FILE * fp);
136 | 
137 | void fataly (const char *name);
138 | int compare_doubles (const void *a, const void *b);
139 | 
140 | void pcheck (char *name, char x);
141 | void printm (double **M, int numstates);
142 | int numvalids (Indiv * indx, SNP ** snpmarkers, int fc, int lc);
143 | void gethpos (int *fc, int *lc, SNP ** snpm, int numsnps,
144 | 	      int xchrom, int lo, int hi);
145 | int numvalidgtypes (SNP * cupt);
146 | double malefreq (Indiv ** indivmarkers, int numindivs);
147 | int isimatch (int a, int b);
148 | void makedir (char *dirname);
149 | int indxindex (char **namelist, int len, char *strid);
150 | int indindex (Indiv ** indivmarkers, int numindivs, char *indid);
151 | int snpindex (SNP ** snpmarkers, int numsnps, char *snpid);
152 | void inddupcheck (Indiv ** indivmarkers, int numindivs);
153 | void freesnpindex ();
154 | int ignoresnp (SNP * cupt);
155 | double entrop (double *a, int n);
156 | double xxlog2 (double t);
157 | void testnan (double *a, int n);
158 | void hap2dip (SNP * cupt);
159 | void flipalleles (SNP * cupt);
160 | void flipalleles_phased (SNP * cupt);
161 | int getgtypes (SNP * cupt, int k);
162 | void putgtypes (SNP * cupt, int k, int val);
163 | int getep (SNP * cupt, int k);
164 | void putep (SNP * cupt, int k, int val);
165 | int hasharr (char **xarr, int nxarr);
166 | void wbuff (unsigned char *buff, int num, int g);
167 | int rbuff (unsigned char *buff, int num);
168 | int ridfile (char *fname);
169 | double hwcheck (SNP * cupt, double *cc);
170 | double hwcheckx (SNP * cupt, Indiv ** indm, double *cc);
171 | void cntit (double *xc, SNP * cupt1, SNP * cupt2);
172 | int setid2pops (char *idpopstring, Indiv ** indmarkers, int numindivs) ;
173 | 
174 | // dup routines 
175 | void setfastdupnum (int num);
176 | void setfastdupthresh (double thresh, double kill);
177 | void killxhets (SNP ** snpmarkers, Indiv ** indivmarkers, int numsnps,
178 | 		int numindivs);
179 | void fastdupcheck (SNP ** snpmarkers, Indiv ** indivmarkers, int numsnps,
180 | 		   int numindivs);
181 | int grabgtypes (int *gtypes, SNP * cupt, int numindivs);
182 | int kcode (int *w, int len, int base);
183 | void cdup (SNP ** snpm, Indiv ** indm, int nsnp, int *buff, int lbuff);
184 | void printdup (SNP ** snpm, int nsnp, Indiv * inda, Indiv * indb, int nmatch,
185 | 	       int nnomatch);
186 | void killdup (Indiv * inda, Indiv * indb, SNP ** snpm, int nsnp);
187 | double kurtosis (double *a, int n);
188 | int getlist (char *name, char **list);
189 | void printvers (char *progname, char *vers);
190 | int numvalidind (Indiv ** indivmarkers, int numind);
191 | void numvalidgtallind (int *x, SNP ** snpm, int numsnps, int numind);
192 | int numvalidgtind (SNP ** snpm, int numsnps, int ind);
193 | int numvalidgt (Indiv ** indivmarkers, SNP * cupt);
194 | int numvalidgtx (Indiv ** indivmarkers, SNP * cupt, int affst);
195 | int isxmale (SNP * cupt, Indiv * indx);
196 | 
197 | void printmatz (double *ww, char **eglist, int n);
198 | void printmatz5 (double *ww, char **eglist, int n);
199 | void printmatz10 (double *ww, char **eglist, int n);
200 | char *get3 (char *ss);
201 | char *getshort (char *ss, int n);
202 | 
203 | 
204 | #undef max
205 | #define max(A,B)  ((A) > (B) ? (A) : (B))
206 | 
207 | #define MAXNUMR  200
208 | // max number models
209 | 
210 | #define CNULL '\0'
211 | 


--------------------------------------------------------------------------------
/include/mcio.h:
--------------------------------------------------------------------------------
  1 | #ifndef _MCIO_
  2 | #define _MCIO_
  3 | 
  4 | #include <stdio.h>
  5 | #include <string.h>
  6 | #include <ctype.h>
  7 | #include <unistd.h>
  8 | #include <math.h>
  9 | 
 10 | #include <vsubs.h>  
 11 | #include <admutils.h> 
 12 | 
 13 | #define MAXSTR 512 
 14 | #define LONGSTR 10000
 15 | #define MAXFF  200
 16 | #define MAXCH  100
 17 | #define MTCHROM 90 
 18 | #define XYCHROM 91
 19 | #define BADCHROM 99
 20 | #define GDISMUL 1000000
 21 | // multiplier for gdis to make integer for sort
 22 | 
 23 | enum outputmodetype  { 
 24 | ANCESTRYMAP, 
 25 | EIGENSTRAT, 
 26 | PED, 
 27 | PACKEDPED,
 28 | PACKEDANCESTRYMAP  }  ;  
 29 | 
 30 | 
 31 | typedef struct {
 32 |   char ID[IDSIZE];
 33 |   double gpos ; 
 34 |   double ppos ; 
 35 |   int chrom ;
 36 |   char cchrom[6] ;
 37 |   int nn[4] ;
 38 |   int ignore ;
 39 |   int isrfake ; 
 40 |   char alleles[2] ;
 41 |   int inputrow ;
 42 |   int cuptnum ;
 43 |   int chimpfudge ;
 44 | }  SNPDATA ;
 45 | 
 46 | 
 47 | int numfakes(SNPDATA **snpraw, int *snpindx, int nreal, double spacing)  ;
 48 | double nextmesh(double val, double spacing) ;
 49 | double interp (double l, double r, double x, double al, double ar) ;
 50 | 
 51 | int
 52 | loadsnps(SNP **snpm, SNPDATA **snpraw, 
 53 |   int *snpindx, int nreal, double spacing, int *numignore) ;
 54 | 
 55 | int readsnpdata(SNPDATA **snpraw, char *fname)  ; 
 56 | int readinddata(Indiv **indivmarkers, char *fname) ;
 57 | int readindpeddata(Indiv **indivmarkers, char *fname)  ;
 58 | void pedname(char *name, char *sx0, char *sx1) ;
 59 | 
 60 | int readtldata(Indiv **indivmarkers, int numind, char *fname) ;
 61 | int readindval(Indiv **indivmarkers, int numind, char *fname) ;
 62 | int readfreqdata(SNP **snpm, int numsnps, char *fname) ;
 63 | void clearsnp(SNP *cupt) ;
 64 | int rmindivs(SNP **snpm, int numsnps, Indiv **indivmarkers, int numindivs) ;
 65 | int rmsnps(SNP **snpm, int numsnps, char *deletesnpoutname) ;
 66 | void clearind(Indiv **indm, int numind)  ;
 67 | void cleartg(Indiv **indm, int nind)  ;
 68 | 
 69 | double mknn(int *nn, int  n0, int n1) ;
 70 | void clearsnpord() ;
 71 | int getsnps(char *snpfname, SNP ***snpmarkpt, double spacing,
 72 |  char *badsnpname, int *nignore, int numrisks) ;
 73 | int getsizex(char *fname) ;
 74 | int getindivs(char *indivfname, Indiv ***indmarkpt) ;
 75 | 
 76 | void setfamilypopnames(int fpop) ;
 77 | int setstatus(Indiv **indm, int numindivs, char *smatch)  ;
 78 | int setstatusv(Indiv **indm, int numindivs, char *smatch, int val)  ;
 79 | int setstatuslist(Indiv **indm, int numindivs, char **smatchlist, int slen)   ;
 80 | 
 81 | long getgenos(char *genoname, SNP **snpmarkers, Indiv **indivmarkers, 
 82 |  int numsnps, int numindivs, int nignore)  ;
 83 | void getgenos_list(char *genotypelist, SNP **snpmarkers, Indiv **indivmarkers, 
 84 |  int  numsnps, int numindivs, int nignore) ; 
 85 | void printsnps(char *snpoutfilename, SNP **snpm, int num, 
 86 |   Indiv **indm, int printfake, int printvalids) ;
 87 | int checkxval(SNP *cupt, Indiv *indx, int val) ;
 88 | void printdata(char *genooutfilename, char *indoutfilename, 
 89 |   SNP **snpm, Indiv **indiv, int numsnps,int numind, int packmode);
 90 | int readgdata(Indiv **indivmarkers, int numind, char *gname) ; 
 91 | int numvalidind(Indiv **indivmarkers, int  numind)   ;
 92 | int numvalidgtind(SNP **snpm, int numsnps, int ind)  ;
 93 | int numvalidgt(Indiv **indivmarkers, SNP *cupt)   ;
 94 | int numvalidgtx(Indiv **indivmarkers, SNP *cupt, int affst)  ;
 95 | int getweights(char *fname, SNP **snpm, int numsnps)   ;
 96 | int getindvals (char *fname, Indiv ** indivmarkers, int numindivs) ;
 97 | void outpack(char *genooutfilename, SNP **snpm, Indiv **indiv, int numsnps, int numind)  ;
 98 | int ispack(char *gname) ;
 99 | int iseigenstrat(char *gname) ;
100 | void inpack(char *genooutfilename, SNP **snpm, Indiv **indiv, int numsnps, int numind)  ;
101 | int inpack2(char *genooutfilename, SNP **snpm, Indiv **indiv, int numsnps, int numind)  ;
102 | int ineigenstrat(char *genooutfilename, SNP **snpm, Indiv **indiv, int numsnps, int numind)  ;
103 | void setepath(SNP **snpm, int n) ;
104 | void clearepath(char *xpack) ;
105 | long bigread(int fdes, char *packg, long numbytes) ;
106 | 
107 | // pedfile support
108 | int getpedgenos(char *genoname, SNP **snpmarkers, Indiv **indivmarkers, 
109 |  int numsnps, int numindivs, int nignore) ;
110 | void genopedcnt(char *genoname, int **gcounts, int nsnp) ;
111 | 
112 | int pedval(char *sx) ;
113 | int xpedval(char c) ;
114 | int ptoachrom(char *ss) ;
115 | 
116 | void setgref(int **gcounts, int nsnp, int *gvar, int *gref) ;
117 | void cleargdata(SNP **snpmarkers, int numsnps, int numindivs) ;
118 | void setgenotypename(char **gname, char *iname) ;
119 | void settersemode(int mode) ;
120 | 
121 | void dobadsnps(SNPDATA **snpraw, int nreal, char *badsnpname) ;
122 | int snprawindex(SNPDATA **snpraw, int nreal, char *sname) ;
123 | int readsnpmapdata(SNPDATA **snpraw, char *fname)  ;
124 | int checkfake(char *ss)  ;
125 | void setbadpedignore() ;
126 | int setsdpos( SNPDATA *sdpt, int pos) ;  
127 | 
128 | void
129 | outeigenstrat(char *snpname, char *indname, char *gname, 
130 | SNP **snpm, Indiv **indiv, int numsnps, int numind) ;
131 | 
132 | void
133 | outped(char *snpname, char *indname, char *gname, 
134 | SNP **snpm, Indiv **indiv, int numsnps, int numind, int ogmode) ;
135 | 
136 | void
137 | outpackped(char *snpname, char *indname, char *gname, SNP **snpm, Indiv **indiv, 
138 |   int numsnps, int numind, int ogmode) ;
139 | 
140 | void setbedbuff(char *buff, int *gtypes, int numind) ;
141 | int bedval(int g) ;
142 | int str2chrom(char *ss) ;
143 | 
144 | void outindped(char *indname, Indiv **indiv, int numind, int ogmode)  ;
145 | 
146 | void
147 | printmap(char *snpname, SNP **snpm, int numsnps, Indiv **indiv) ;
148 | 
149 | int maxlinelength(char *fname)  ;
150 | int checksize(int numindivs, int numsnps, enum outputmodetype outputmode) ;
151 | 
152 | void setomode(enum outputmodetype *outmode, char *omode)  ;
153 | 
154 | void
155 | outfiles(char *snpname, char *indname, char *gname, SNP **snpm, 
156 |   Indiv **indiv, int numsnps, int numind, int packem, int ogmode) ;
157 | 
158 | void snpdecimate(SNP **snpm, int nsnp, int decim, int mindis, int maxdis)  ;
159 | void decimate(SNP **cbuff, int n, int decim, int mindis, int maxdis) ;
160 | int vvadjust(double *cc, int n, double *pmean) ;
161 | int killhir2(SNP **snpm, int numsnps, int numind, double physlim, double genlim, double rhothresh) ; 
162 | void freecupt(SNP **cupt) ;
163 | void freeped() ;
164 | void cntpops(int *count, Indiv **indm, int numindivs, char **eglist, int numeg) ;
165 | void printalleles(SNP  *cupt, FILE *fff) ;
166 | char *getpackgenos()  ;
167 | void clearpackgenos()  ;
168 | void setchr(int mode) ;
169 | void setchimpmode(int mode) ;
170 | 
171 | int genoopenit(genofile **gfile, char *geno2name, SNP **snp2m, 
172 |   Indiv **indiv2m, int numsnp2, int numindiv2, int nignore)  ;
173 | int genoreadit(genofile *gfile, SNP **pcupt)   ; 
174 | void genocloseit(genofile *gfile)  ; 
175 | 
176 | void putped(int num) ;
177 | void getped(int num) ;
178 | 
179 | void logdeletedsnp(char *snpname, char *cmnt, char *deletesnpoutname);
180 | void sortsnps(SNP **snpa, SNP **snpb, int n) ;
181 | void setpordercheck (int mode) ;
182 | void putsnpordered(int mode) ;
183 | int getsnpordered() ; 
184 | void ckdup(char **eglist, int n) ;
185 | 
186 | 
187 | #endif
188 | 


--------------------------------------------------------------------------------
/src/eigensrc/eigx.c:
--------------------------------------------------------------------------------
  1 | // LAPACK-using version of eigensrc/eigx.f, easier to build on OS X
  2 | // Christopher Chang (chrchang@alumni.caltech.edu), BGI Cognitive Genomics Lab
  3 | 
  4 | #include <stdio.h>
  5 | #include <stdlib.h>
  6 | 
  7 | #ifdef __APPLE__
  8 | #include <Accelerate/Accelerate.h>
  9 | #else
 10 | 
 11 | #if __LP64__
 12 | typedef int __CLPK_integer;
 13 | #else
 14 | typedef long int __CLPK_integer;
 15 | #endif
 16 | typedef double __CLPK_doublereal;
 17 | 
 18 | int dspev_ (char *jobz, char *uplo, __CLPK_integer * n,
 19 | 	    __CLPK_doublereal * ap, __CLPK_doublereal * w,
 20 | 	    __CLPK_doublereal * z__, __CLPK_integer * ldz,
 21 | 	    __CLPK_doublereal * work, __CLPK_integer * info);
 22 | 
 23 | int dpotrf_ (char *uplo, __CLPK_integer * n, __CLPK_doublereal * a,
 24 | 	     __CLPK_integer * lda, __CLPK_integer * info);
 25 | 
 26 | int dgetrf_ (__CLPK_integer * m, __CLPK_integer * n, __CLPK_doublereal * a,
 27 | 	     __CLPK_integer * lda, __CLPK_integer * ipiv,
 28 | 	     __CLPK_integer * info);
 29 | 
 30 | int dgetri_ (__CLPK_integer * n, __CLPK_doublereal * a, __CLPK_integer * lda,
 31 | 	     __CLPK_integer * ipiv, __CLPK_doublereal * work,
 32 | 	     __CLPK_integer * lwork, __CLPK_integer * info);
 33 | 
 34 | int dgetrs_ (char *trans, __CLPK_integer * n, __CLPK_integer * nrhs,
 35 | 	     __CLPK_doublereal * a, __CLPK_integer * lda,
 36 | 	     __CLPK_integer * ipiv, __CLPK_doublereal * b,
 37 | 	     __CLPK_integer * ldb, __CLPK_integer * info);
 38 | 
 39 | int dsygv_ (__CLPK_integer * itype, char *jobz, char *uplo,
 40 | 	    __CLPK_integer * n, __CLPK_doublereal * a, __CLPK_integer * lda,
 41 | 	    __CLPK_doublereal * b, __CLPK_integer * ldb,
 42 | 	    __CLPK_doublereal * w, __CLPK_doublereal * work,
 43 | 	    __CLPK_integer * lwork, __CLPK_integer * info);
 44 | #endif // end !__APPLE__
 45 | 
 46 | void
 47 | mem_error ()
 48 | {
 49 |   fprintf (stderr, "CM\n");
 50 |   exit (1);
 51 | }
 52 | 
 53 | void
 54 | inverse_error (char *procname, int info)
 55 | {
 56 |   if (info < 0) {
 57 |     fprintf (stderr, "error (%s): illegal argument %d\n", procname, -info);
 58 |   }
 59 |   else {
 60 |     fprintf (stderr, "error (%s): singular matrix %d\n", procname, info);
 61 |   }
 62 |   exit (1);
 63 | }
 64 | 
 65 | void
 66 | eigx_ (double *pmat, double *ev, __CLPK_integer * n)
 67 | {
 68 |   char jobz = 'N';
 69 |   char uplo = 'L';
 70 |   __CLPK_integer ldz = *n;
 71 |   __CLPK_integer info;
 72 |   double *z;
 73 |   double *work;
 74 |   z = (double *) malloc (ldz * ldz * sizeof (double));
 75 |   if (!z) {
 76 |     mem_error ();
 77 |   }
 78 |   work = (double *) malloc (3 * ldz * sizeof (double));
 79 |   if (!work) {
 80 |     free (z);
 81 |     mem_error ();
 82 |   }
 83 |   dspev_ (&jobz, &uplo, n, pmat, ev, z, &ldz, work, &info);
 84 |   free (z);
 85 |   free (work);
 86 |   if (info) {
 87 | #if __LP64__
 88 |     fprintf (stderr, "INFO: %d\n", info);
 89 | #else
 90 |     fprintf (stderr, "INFO: %ld\n", info);
 91 | #endif
 92 |     exit (1);
 93 |   }
 94 | }
 95 | 
 96 | void
 97 | eigxv_ (double *pmat, double *eval, double *evec, __CLPK_integer * n)
 98 | {
 99 |   char jobz = 'V';
100 |   char uplo = 'L';
101 |   __CLPK_integer ldz = *n;
102 |   __CLPK_integer info;
103 |   double *work = (double *) malloc (3 * ldz * sizeof (double));
104 |   if (!work) {
105 |     mem_error ();
106 |   }
107 |   dspev_ (&jobz, &uplo, n, pmat, eval, evec, &ldz, work, &info);
108 |   free (work);
109 |   if (info) {
110 | #if __LP64__
111 |     fprintf (stderr, "INFO: %d\n", info);
112 | #else
113 |     fprintf (stderr, "INFO: %ld\n", info);
114 | #endif
115 |     exit (1);
116 |   }
117 | }
118 | 
119 | void
120 | cdc_ (double *pmat, __CLPK_integer * n)
121 | {
122 |   char uplo = 'L';
123 |   __CLPK_integer lda = *n;
124 |   __CLPK_integer info;
125 |   dpotrf_ (&uplo, n, pmat, &lda, &info);
126 |   if (info) {
127 |     if (info < 0) {
128 | #if __LP64__
129 |       fprintf (stderr, "error (CDC): illegal argument %d\n", -info);
130 | #else
131 |       fprintf (stderr, "error (CDC): illegal argument %ld\n", -info);
132 | #endif
133 |     }
134 |     else {
135 | #if __LP64__
136 |       fprintf (stderr, "error (CDC): minor not positive definite %d\n", info);
137 | #else
138 |       fprintf (stderr, "error (CDC): minor not positive definite %ld\n",
139 | 	       info);
140 | #endif
141 |     }
142 |     exit (1);
143 |   }
144 | }
145 | 
146 | void
147 | inverse_ (double *pmat, __CLPK_integer * n)
148 | {
149 |   __CLPK_integer lwork = (*n) * (*n);
150 |   __CLPK_integer info;
151 |   __CLPK_integer *ipiv;
152 |   double *work;
153 |   ipiv = (__CLPK_integer *) malloc ((*n) * sizeof (__CLPK_integer));
154 |   if (!ipiv) {
155 |     mem_error ();
156 |   }
157 |   work = (double *) malloc (lwork * sizeof (double));
158 |   if (!work) {
159 |     free (ipiv);
160 |     mem_error ();
161 |   }
162 |   dgetrf_ (n, n, pmat, n, ipiv, &info);
163 |   if (info) {
164 |     free (ipiv);
165 |     free (work);
166 |     inverse_error ("INVERSE", info);
167 |     exit (1);
168 |   }
169 |   dgetri_ (n, pmat, n, ipiv, work, &lwork, &info);
170 |   free (ipiv);
171 |   free (work);
172 |   if (info) {
173 |     inverse_error ("INVERSE", info);
174 |   }
175 | }
176 | 
177 | void
178 | solve_ (double *pmat, double *v, __CLPK_integer * n)
179 | {
180 |   __CLPK_integer ldb = *n;
181 |   char trans = 'N';
182 |   __CLPK_integer nrhs = 1;
183 |   double *work;
184 |   __CLPK_integer *ipiv;
185 |   __CLPK_integer info;
186 |   ipiv = (__CLPK_integer *) malloc (ldb * sizeof (__CLPK_integer));
187 |   if (!ipiv) {
188 |     mem_error ();
189 |   }
190 |   work = (double *) malloc (ldb * ldb * sizeof (double));
191 |   if (!work) {
192 |     free (ipiv);
193 |     mem_error ();
194 |   }
195 |   dgetrf_ (n, n, pmat, n, ipiv, &info);
196 |   if (info) {
197 |     free (ipiv);
198 |     free (work);
199 |     inverse_error ("SOLVE", info);
200 |   }
201 |   dgetrs_ (&trans, n, &nrhs, pmat, n, ipiv, v, &ldb, &info);
202 |   free (ipiv);
203 |   free (work);
204 |   if (info < 0) {
205 |     inverse_error ("SOLVE", info);
206 |   }
207 | }
208 | 
209 | void
210 | geneigsolve_ (double *pmat, double *qmat, double *eval, __CLPK_integer * n)
211 | {
212 |   __CLPK_integer lwork = (*n) * (*n);
213 |   double *work = (double *) malloc (lwork * sizeof (double));
214 |   __CLPK_integer wood_elf = 1;	// Sameer Merchant memorial temporary variable
215 |   __CLPK_integer info;
216 |   if (!work) {
217 |     mem_error ();
218 |   }
219 |   dsygv_ (&wood_elf, "V", "U", n, pmat, n, qmat, n, eval, work, &lwork,
220 | 	  &info);
221 |   free (work);
222 |   if (info && (info <= 2 * (*n))) {
223 |     if (info < 0) {
224 | #if __LP64__
225 |       fprintf (stderr, "error (GENEIGSOLVE): illegal argument %d\n", -info);
226 | #else
227 |       fprintf (stderr, "error (GENEIGSOLVE): illegal argument %ld\n", -info);
228 | #endif
229 |     }
230 |     else if (info <= (*n)) {
231 | #if __LP64__
232 |       fprintf (stderr, "error (GENEIGSOLVE): failure to converge %d\n", info);
233 | #else
234 |       fprintf (stderr, "error (GENEIGSOLVE): failure to converge %ld\n",
235 | 	       info);
236 | #endif
237 |     }
238 |     else {
239 | #if __LP64__
240 |       fprintf (stderr, "error (GENEIGSOLVE): not positive definite %d\n",
241 | 	       info);
242 | #else
243 |       fprintf (stderr, "error (GENEIGSOLVE): not positive definite %ld\n",
244 | 	       info);
245 | #endif
246 |     }
247 |     exit (1);
248 |   }
249 | }
250 | 


--------------------------------------------------------------------------------
/src/nicksrc/sortit.c:
--------------------------------------------------------------------------------
  1 | #include  <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <unistd.h>
  4 | #include <stdlib.h>
  5 | #include "strsubs.h"
  6 | #include "sortit.h"
  7 | #include "vsubs.h"
  8 | 
  9 | /** 
 10 |  a simple set of sort routines
 11 | */
 12 | 
 13 | static double *ttt;
 14 | static long *lttt;
 15 | static int *ittt;
 16 | static int **pttt;
 17 | static int plen = 0;
 18 | static int *porder = NULL;
 19 | 
 20 | void
 21 | setorder (int *pp, int rlen)
 22 | {
 23 |   int *tt;
 24 | 
 25 |   if (plen > 0) {
 26 |     if (porder != NULL)
 27 |       free (porder);
 28 |   }
 29 | 
 30 |   if (pp == NULL) {
 31 |     porder = NULL;
 32 |     plen = rlen;
 33 |     return;
 34 |   }
 35 |   ZALLOC (porder, rlen, int);
 36 |   ZALLOC (tt, rlen, int);
 37 |   copyiarr (pp, tt, rlen);
 38 |   isortit (tt, porder, rlen);
 39 |   free (tt);
 40 |   plen = rlen;
 41 | }
 42 | 
 43 | double
 44 | median (double *aa, int len)
 45 | // should be O(len) algorithm
 46 | {
 47 |   double *b, y;
 48 |   int t, x, a, n;
 49 | 
 50 |   ZALLOC (b, len, double);
 51 |   n = 0;
 52 |   for (a = 0; a < len; ++a) {
 53 |     y = aa[a];
 54 |     if (isfinite (y)) {
 55 |       b[n] = y;
 56 |       ++n;
 57 |     }
 58 |   }
 59 |   if (n == 0)
 60 |     fatalx ("(median) no valids\n");
 61 |   if (n == 1)
 62 |     return b[0];
 63 |   if (n == 2)
 64 |     return 0.5 * (b[0] + b[1]);
 65 |   sortit (b, NULL, n);
 66 |   t = n % 2;
 67 |   x = n / 2;
 68 |   y = b[x];
 69 |   if (t == 0)
 70 |     y = 0.5 * (b[x] + b[x - 1]);
 71 | 
 72 |   free (b);
 73 | //  printf("zzmed: %d %d %d %9.3f\n", len, n, x, y) ;
 74 |   return y;
 75 | 
 76 | 
 77 | }
 78 | 
 79 | void
 80 | sortit (double *a, int *ind, int len)
 81 | {
 82 |   int i, k;
 83 |   int *inda;
 84 | 
 85 |   if (len == 0)
 86 |     fatalx ("(sortit) len = 0\n");
 87 |   ZALLOC (ttt, len, double);
 88 |   ZALLOC (inda, len, int);
 89 | 
 90 |   for (i = 0; i < len; i++) {
 91 |     inda[i] = i;
 92 |   }
 93 | 
 94 |   copyarr (a, ttt, len);
 95 |   qsort ((int *) inda, len, sizeof (int),
 96 |          (int (*)(const void *, const void *)) compit);
 97 | 
 98 |   for (i = 0; i < len; i++) {
 99 |     k = inda[i];
100 |     a[i] = ttt[k];
101 |   }
102 |   free (ttt);
103 |   if (ind != NULL)
104 |     copyiarr (inda, ind, len);
105 |   free (inda);
106 | }
107 | 
108 | int
109 | compit (int *a1, int *a2)
110 | {
111 |   if (ttt[*a1] < ttt[*a2])
112 |     return -1;
113 |   if (ttt[*a1] > ttt[*a2])
114 |     return 1;
115 |   return 0;
116 | }
117 | 
118 | void
119 | lsortit (long *a, int *ind, int len)
120 | {
121 |   int i, k;
122 |   int *inda;
123 | 
124 |   if (len == 0)
125 |     fatalx ("(lsortit) len = 0\n");
126 |   ZALLOC (lttt, len, long);
127 |   ZALLOC (inda, len, int);
128 | 
129 |   for (i = 0; i < len; i++) {
130 |     inda[i] = i;
131 |   }
132 | 
133 |   copylarr (a, lttt, len);
134 |   qsort ((int *) inda, len, sizeof (int),
135 |          (int (*)(const void *, const void *)) lcompit);
136 | 
137 |   for (i = 0; i < len; i++) {
138 |     k = inda[i];
139 |     a[i] = lttt[k];
140 |   }
141 |   free (lttt);
142 |   if (ind != NULL)
143 |     copyiarr (inda, ind, len);
144 |   free (inda);
145 | }
146 | 
147 | void
148 | isortit (int *a, int *ind, int len)
149 | {
150 |   int i, k;
151 |   int *inda;
152 | 
153 |   if (len == 0)
154 |     fatalx ("(isortit) len = 0\n");
155 |   ZALLOC (ittt, len, int);
156 |   ZALLOC (inda, len, int);
157 | 
158 |   for (i = 0; i < len; i++) {
159 |     inda[i] = i;
160 |   }
161 | 
162 |   copyiarr (a, ittt, len);
163 |   qsort ((int *) inda, len, sizeof (int),
164 |          (int (*)(const void *, const void *)) icompit);
165 | 
166 |   for (i = 0; i < len; i++) {
167 |     k = inda[i];
168 |     a[i] = ittt[k];
169 |   }
170 |   free (ittt);
171 |   if (ind != NULL)
172 |     copyiarr (inda, ind, len);
173 |   free (inda);
174 | }
175 | 
176 | int
177 | lcompit (int *a1, int *a2)
178 | {
179 |   if (lttt[*a1] < lttt[*a2])
180 |     return -1;
181 |   if (lttt[*a1] > lttt[*a2])
182 |     return 1;
183 |   return 0;
184 | }
185 | 
186 | int
187 | icompit (int *a1, int *a2)
188 | {
189 |   if (ittt[*a1] < ittt[*a2])
190 |     return -1;
191 |   if (ittt[*a1] > ittt[*a2])
192 |     return 1;
193 |   return 0;
194 | }
195 | 
196 | void
197 | invperm (int *a, int *b, int n)
198 | {
199 | 
200 | /** 
201 |  a, b can be same 
202 | */
203 |   int i, j;
204 |   int *x;
205 | 
206 |   if (n == 0)
207 |     return;
208 |   ZALLOC (x, n, int);
209 | 
210 |   ivclear (x, -1, n);
211 |   for (i = 0; i < n; i++) {
212 |     j = b[i];
213 |     x[j] = i;
214 |   }
215 |   copyiarr (x, a, n);
216 |   free (x);
217 | }
218 | 
219 | void
220 | ipsortit (int **a, int *ind, int len, int rlen)
221 | {
222 |   if (len==0) return ;
223 |   ipsortitp (a, ind, len, rlen, NULL);
224 | 
225 | }
226 | 
227 | void
228 | ipsortitp (int **a, int *ind, int len, int rlen, int *order)
229 | 
230 | /** 
231 |  sort integer array pointers 
232 |  rows of array are sorted in lexicographical order 
233 | 
234 |  compiarr can be called outside the sort
235 | */
236 | {
237 |   int i, k;
238 |   int *inda;
239 | 
240 |   if (len == 0)
241 |     fatalx ("(ipsortit) len = 0\n");
242 |   ZALLOC (pttt, len, int *);
243 |   ZALLOC (inda, len, int);
244 | 
245 |   setorder (order, rlen);       // order defines order as sorted in ascending order.  
246 | 
247 |   for (i = 0; i < len; i++) {
248 |     if (a[i] == NULL)
249 |       fatalx ("(ipsortit) array pointer %d NULL\n", i);
250 |     inda[i] = i;
251 |   }
252 | 
253 |   copyiparr (a, pttt, len);
254 |   qsort ((int *) inda, len,
255 |          sizeof (int), (int (*)(const void *, const void *)) ipcompit);
256 | 
257 |   for (i = 0; i < len; i++) {
258 |     k = inda[i];
259 |     a[i] = pttt[k];
260 | // note that this just reorders pointers.  
261 |   }
262 |   if (ind != NULL)
263 |     copyiarr (inda, ind, len);
264 |   free (inda);
265 |   free (pttt);
266 | }
267 | 
268 | int
269 | ipcompit (int *a1, int *a2)
270 | {
271 |   int l;
272 |   l = compiarr (pttt[*a1], pttt[*a2], plen);
273 |   return l;
274 | }
275 | 
276 | int
277 | compiarr (int *a, int *b, int len)
278 | {
279 |   int i, k;
280 |   for (i = 0; i < len; i++) {
281 |     k = i;
282 |     if (porder != NULL)
283 |       k = porder[i];
284 |     if (a[k] < b[k])
285 |       return -1;
286 |     if (a[k] > b[k])
287 |       return 1;
288 |   }
289 |   return 0;
290 | }
291 | 
292 | int
293 | comparr (double *a, double *b, int len)
294 | {
295 |   int i, k;
296 |   for (i = 0; i < len; i++) {
297 |     k = i;
298 |     if (porder != NULL)
299 |       k = porder[i];
300 |     if (a[k] < b[k])
301 |       return -1;
302 |     if (a[k] > b[k])
303 |       return 1;
304 |   }
305 |   return 0;
306 | }
307 | 
308 | 
309 | void
310 | mkirank (int *rank, int *xin, int n)
311 | // faster to call isortit 
312 | {
313 |   double *a;
314 | 
315 |   ZALLOC (a, n, double);
316 |   floatit (a, xin, n);
317 |   mkrank (rank, a, n);
318 |   free (a);
319 | }
320 | 
321 | void
322 | mkrank (int *rank, double *xin, int n)
323 | 
324 | /**  rank 0:n-1  
325 |  largest element k has rank[k] = 0, smallest, rank[k] = n-1 
326 | */
327 | {
328 |   int i;
329 |   double *a;
330 |   int *ind;
331 | 
332 |   ZALLOC (a, n, double);
333 |   ZALLOC (ind, n, int);
334 | 
335 |   vst (a, xin, -1.0, n);
336 |   sortit (a, ind, n);
337 | 
338 |   for (i = 0; i < n; i++) {
339 |     rank[ind[i]] = i;
340 |   }
341 | 
342 |   free (a);
343 |   free (ind);
344 | 
345 | 
346 | }
347 | 


--------------------------------------------------------------------------------
/src/baseprog.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <string.h>
  3 | #include <unistd.h>
  4 | #include <math.h>
  5 | #include <sys/types.h>
  6 | #include <sys/stat.h>
  7 | #include <fcntl.h>
  8 | 
  9 | #include <nicklib.h>
 10 | #include <getpars.h>
 11 | #include <globals.h>
 12 | 
 13 | #include "admutils.h"
 14 | #include "mcio.h"
 15 | #include "mcmcpars.h"
 16 | #include "egsubs.h"
 17 | #include "exclude.h"
 18 | 
 19 | 
 20 | #define WVERSION   "420"
 21 | 
 22 | // badpairsname added 
 23 | 
 24 | /** 
 25 |  does nothing but read the data 
 26 |  and print snps
 27 |  sometimes a good place to start !!
 28 | 
 29 |  New I/O (mcio.c) added
 30 |  New admutils (snpindex hash)
 31 |  mcio bug fixed (large files)
 32 | */
 33 | 
 34 | 
 35 | #define MAXFL  50
 36 | #define MAXSTR  512
 37 | 
 38 | extern int packmode;
 39 | 
 40 | char *trashdir = "/var/tmp";
 41 | extern int verbose;
 42 | int qtmode = NO;
 43 | Indiv **indivmarkers;
 44 | SNP **snpmarkers;
 45 | int numsnps, numindivs;
 46 | 
 47 | char *genotypename = NULL;
 48 | char *snpname = NULL;
 49 | char *genooutfilename = NULL;
 50 | char *indoutfilename = NULL;
 51 | char *indivname = NULL;
 52 | char *badsnpname = NULL;
 53 | char *goodsnpname = NULL;
 54 | char *badpairsname = NULL;
 55 | char *markername = NULL;
 56 | char *idname = NULL;
 57 | 
 58 | char *outputname = NULL;
 59 | FILE *ofile;
 60 | 
 61 | double fakespacing = 0.0;
 62 | 
 63 | char unknowngender = 'U';
 64 | 
 65 | void readcommands (int argc, char **argv);
 66 | void dophyscheck (SNP ** snpm, int numsnps);
 67 | 
 68 | int
 69 | main (int argc, char **argv)
 70 | {
 71 | 
 72 |   int i, j, k, g;
 73 |   SNP *cupt;
 74 |   Indiv *indx;
 75 |   int ch1, ch2;
 76 | 
 77 |   int numvind, nignore, numrisks = 1;
 78 |   int markernum, idnum;
 79 | 
 80 |   ofile = stdout;
 81 |   packmode = YES;
 82 |   readcommands (argc, argv);
 83 |   if (indivname == NULL) {
 84 |     printf ("no indivname\n");
 85 |     return 0;
 86 |   }
 87 |   if (outputname != NULL)
 88 |     openit (outputname, &ofile, "w");
 89 | 
 90 |   numsnps =
 91 |     getsnps (snpname, &snpmarkers, fakespacing, badsnpname, &nignore,
 92 |              numrisks);
 93 | 
 94 | // fakespacing 0.0 (default)
 95 | 
 96 |   numindivs = getindivs (indivname, &indivmarkers);
 97 |   setstatus (indivmarkers, numindivs, "Case");
 98 | 
 99 |   setgenotypename (&genotypename, indivname);
100 | 
101 |   printf ("genotypename:  %s\n", genotypename);
102 | 
103 |   if (genotypename != NULL) {
104 |     getgenos (genotypename, snpmarkers, indivmarkers,
105 |               numsnps, numindivs, nignore);
106 | 
107 | /**
108 |    if (badpairsname != NULL) {
109 |     loadbadpsc(snpmarkers, numsnps, NO, goodsnpname) ;
110 |     dobadpairs(badpairsname, snpmarkers, numsnps) ;
111 |    }
112 | */
113 |   }
114 |   dophyscheck (snpmarkers, numsnps);
115 | 
116 |   numvind = numvalidind (indivmarkers, numindivs);
117 |   printf ("\n\n");
118 |   printf ("numindivs: %d valid: %d numsnps: %d nignore: %d\n",
119 |           numindivs, numvind, numsnps, nignore);
120 | 
121 |   if (verbose) {
122 |     for (i = 0; i < numindivs; ++i) {
123 |       indx = indivmarkers[i];
124 |       printf ("%20s ", indx->ID);
125 |       for (j = 0; j < numsnps; ++j) {
126 |         cupt = snpmarkers[j];
127 |         if (cupt->ignore)
128 |           continue;
129 |         g = getgtypes (cupt, i);
130 |         if (g < 0)
131 |           g = 9;
132 |         printf ("%1d", g);
133 |       }
134 |       printf ("  %20s", indx->egroup);
135 |       printnl ();
136 |     }
137 |   }
138 | // numsnps includes fakes
139 | 
140 |   if (markername != NULL) {
141 |     markernum = snpindex (snpmarkers, numsnps, markername);
142 |     if (markernum < 0)
143 |       fatalx ("markername %s not found\n", markername);
144 |     cupt = snpmarkers[markernum];
145 |     printf ("markername: %s  %d   %9.3f %12.0f\n",
146 |             cupt->ID, cupt->chrom, cupt->genpos, cupt->physpos);
147 |     for (i = 0; i < numindivs; ++i) {
148 |       indx = indivmarkers[i];
149 |       g = getgtypes (cupt, i);
150 |       printf ("%20s %20s %2d\n", cupt->ID, indx->ID, g);
151 |     }
152 |   }
153 | 
154 |   if (idname != NULL) {
155 |     idnum = indindex (indivmarkers, numindivs, idname);
156 |     if (idnum < 0)
157 |       fatalx ("idname %s not found\n", idname);
158 |     indx = indivmarkers[idnum];
159 |     printf ("idname: %20s  %c   %20s\n",
160 |             indx->ID, indx->gender, indx->egroup);
161 |     for (j = 0; j < numsnps; ++j) {
162 |       cupt = snpmarkers[j];
163 |       if (cupt->ignore)
164 |         continue;
165 |       g = getgtypes (cupt, idnum);
166 |       printf ("%20s %20s %2d", cupt->ID, indx->ID, g);
167 |       printf ("  %3d %12.0f", cupt->chrom, cupt->physpos);
168 |       printf (" %c %c", cupt->alleles[0], cupt->alleles[1]);
169 |       printnl ();
170 |     }
171 |   }
172 | 
173 | /**
174 |   if (genotypename != NULL) {
175 |    printdata(genooutfilename, indoutfilename, snpmarkers, indivmarkers, numsnps, numindivs, NO) ;
176 |   }
177 | */
178 | 
179 |   printf ("##end of run\n");
180 |   return 0;
181 | }
182 | 
183 | void
184 | readcommands (int argc, char **argv)
185 | {
186 |   int i, haploid = 0;
187 |   char *parname = NULL;
188 |   phandle *ph;
189 |   char str[5000];
190 |   char *tempname;
191 |   int n;
192 | 
193 |   while ((i = getopt (argc, argv, "p:vV")) != -1) {
194 | 
195 |     switch (i) {
196 | 
197 |     case 'p':
198 |       parname = strdup (optarg);
199 |       break;
200 | 
201 |     case 'v':
202 |       printf ("version: %s\n", WVERSION);
203 |       break;
204 | 
205 |     case 'V':
206 |       verbose = YES;
207 |       break;
208 | 
209 |     case '?':
210 |       printf ("Usage: bad params.... \n");
211 |       fatalx ("bad params\n");
212 |     }
213 |   }
214 | 
215 | 
216 |   pcheck (parname, 'p');
217 |   printf ("parameter file: %s\n", parname);
218 |   ph = openpars (parname);
219 |   dostrsub (ph);
220 | 
221 | /**
222 | DIR2:  /fg/nfiles/admixdata/ms2
223 | SSSS:  DIR2/outfiles 
224 | genotypename: DIR2/autos_ccshad_fakes
225 | eglistname:    DIR2/eurlist  
226 | output:        eurout
227 | */
228 |   getint (ph, "packmode:", &packmode);  // controls internals 
229 | 
230 |   getstring (ph, "genotypename:", &genotypename);
231 |   getstring (ph, "genooutfilename:", &genooutfilename);
232 |   getstring (ph, "indoutfilename:", &indoutfilename);
233 |   getstring (ph, "snpname:", &snpname);
234 |   getstring (ph, "indivname:", &indivname);
235 |   getstring (ph, "output:", &outputname);
236 |   getstring (ph, "badsnpname:", &badsnpname);
237 |   getstring (ph, "goodsnpname:", &goodsnpname);
238 |   getstring (ph, "badpairsname:", &badpairsname);
239 |   getstring (ph, "markername:", &markername);
240 |   getstring (ph, "idname:", &idname);
241 |   getdbl (ph, "fakespacing:", &fakespacing);
242 |   getint (ph, "familynames:", &familynames);
243 |   writepars (ph);
244 |   closepars (ph);
245 | 
246 | }
247 | 
248 | void
249 | dophyscheck (SNP ** snpm, int numsnps)
250 | {
251 | // catch places where physpos genpos are in opposite order
252 |   SNP *cupt, *cuptold;
253 |   int i;
254 | 
255 |   for (i = 0; i < numsnps; i++) {
256 |     cupt = snpm[i];
257 |     if (i == 0)
258 |       cuptold = cupt;
259 |     if (cupt->isfake)
260 |       continue;
261 |     if (cupt->ignore)
262 |       continue;
263 |     if (cupt->chrom == cuptold->chrom) {
264 |       if (cupt->physpos < cuptold->physpos) {
265 |         printf ("physcheck %20s %15s %12.3f %12.3f %13.0f %13.0f\n",
266 |                 cuptold->ID, cupt->ID,
267 |                 cuptold->genpos, cupt->genpos,
268 |                 cuptold->physpos, cupt->physpos);
269 |       }
270 |     }
271 |     cuptold = cupt;
272 |   }
273 | }
274 | 


--------------------------------------------------------------------------------