├── .gitignore ├── README ├── README.3PopTest ├── README.CONVERTF ├── README.Dstatistics ├── README.F4RatioTest ├── README.INSTALL ├── README.QPGRAPH ├── README.QpWave ├── README.REXPFIT ├── README.ROLLOFF ├── README.ROLLOFF_OUTPUT ├── README.qp4diff ├── README.qpfstats ├── convertf ├── README ├── example.ancestrymapgeno ├── example.bed ├── example.eigenstratgeno ├── example.ind ├── example.ind.packedancestrymap ├── example.map ├── example.packedancestrymap ├── example.packedancestrymapgeno ├── example.ped ├── example.pedind ├── example.pedsnp ├── example.perl ├── example.pheno ├── example.snp ├── example.snp.packedancestrymap ├── example.tgeno ├── ind2pheno.perl ├── par.ANCESTRYMAP.EIGENSTRAT ├── par.ANCESTRYMAP.TGENO ├── par.EIGENSTRAT.PED ├── par.EIGENSTRAT.TGENO ├── par.PACKEDANCESTRYMAP.ANCESTRYMAP ├── par.PACKEDPED.PACKEDANCESTRYMAP ├── par.PED.EIGENSTRAT ├── par.PED.PACKEDPED └── par.TGENO.EIGENSTRAT ├── examples.qpGraph ├── gr1triv ├── gr1x ├── gr1x.oroot ├── gr1x:log:nullout ├── par:sim1:gr1triv ├── par:sim1:gr1x ├── par:sim1:gr1x:2 ├── par:sim1:gr1x:nullout ├── parsimp.templ ├── parsimp2.templ ├── runit ├── runqpgf ├── runqpgf2 └── runqpreroot ├── examples ├── README ├── expfit_Uygur.flog ├── gr1triv:log ├── gr1x:2:log ├── gr1x:log ├── left1 ├── list_qp3Pop ├── list_qp4diff ├── list_qpBound ├── list_qpDpart ├── list_qpDstat ├── list_qpDstat1 ├── list_qpF4ratio ├── mklog ├── parRolloff ├── parqp3Pop ├── parqp4diff ├── parqp4diffa ├── parqpAdm ├── parqpBound ├── parqpDpart ├── parqpDstat ├── parqpDstat1 ├── parqpF4ratio ├── parqpWave ├── parqpff3 ├── plist ├── q1 ├── q2 ├── qp3Pop.log ├── qp4diff.log ├── qpAdm.log ├── qpBound.log ├── qpDstat.log ├── qpDstat1.log ├── qpF4ratio.log ├── qpWave.log ├── right1 ├── rolloff.par ├── rolloff.ref ├── rolloff.target ├── xx:qp4diff ├── xx:qp4diffa └── xx:qpff3 ├── fancy.txt ├── halfscore.pdf ├── pdoc.pdf ├── pdoc.tex ├── perlsrc ├── dof4 ├── dof4a ├── fxtract ├── getresult ├── jackdiff ├── mkpretty ├── numlines ├── xtractcol ├── xtractcoltab ├── xtractcolv └── xtractcolx ├── qpGraph_hints.txt ├── qpfs.pdf ├── src ├── .gitignore ├── LICENSE.txt ├── Makefile ├── Makefile.mac ├── add_extern_C.sh ├── admutils.c ├── admutils.h ├── badpairs.h ├── convertf.c ├── diffmean.c ├── dowtjack.c ├── easycheck.c ├── easylite.c ├── easystats.c ├── egsubs.c ├── egsubs.h ├── eigqpsubs.h ├── eigsubs.c ├── eigsubs.h ├── eigx.c ├── empr.c ├── exclude.c ├── exclude.h ├── expfit.templ ├── f4rank.c ├── f4rank.h ├── faidx.h ├── fitsubs.h ├── fourier.c ├── fstats.c ├── fxtract ├── gcount.c ├── geno.c ├── geno.h ├── geno_single.c ├── globals.h ├── grabpars.c ├── gslkim.c ├── gslqp.c ├── gslrank1.c ├── h2d.c ├── h2d.h ├── kimf.c ├── kimqpg.c ├── kimqpr.c ├── ldsubs.c ├── ldsubs.h ├── mapfile.c ├── mapfile.h ├── mcio.c ├── mcio.h ├── mcmcpars.h ├── merge_transpose.c ├── mergeit.c ├── mkweights.c ├── multimerge.c ├── nagqp.c ├── nagsubs.c ├── nagsubs.h ├── nickhash.c ├── nicklib.h ├── nicksam.h ├── nicksrc │ ├── .gitignore │ ├── LICENSE.txt │ ├── Makefile │ ├── esttime2.c │ ├── gauss.c │ ├── gcd.c │ ├── gds.c │ ├── getpars.c │ ├── getpars.h │ ├── kimf.c │ ├── linmake │ ├── linsolve.c │ ├── linsubs.c │ ├── linsubs.h │ ├── mptable.h │ ├── nicklib.h │ ├── ranmath.h │ ├── sortit.c │ ├── sortit.h │ ├── statsubs.c │ ├── statsubs.h │ ├── strsubs.c │ ├── strsubs.h │ ├── twtable.h │ ├── vsubs.c │ ├── vsubs.h │ ├── xsearch.c │ └── xsearch.h ├── packit.h ├── qfstats1 ├── qp3Pop.c ├── qp4diff.c ├── qpAdm.c ├── qpBound.c ├── qpDpart.c ├── qpDstat.c ├── qpF4ratio.c ├── qpGraph.c ├── qpWave.c ├── qpdslow.c ├── qpff3base.c ├── qpfit.c ├── qpfitr1.c ├── qpfmv.c ├── qpfmvmix.c ├── qpfstats.c ├── qpgsubs.c ├── qpgsubs.h ├── qpmix.c ├── qpreroot.c ├── qpsubs.c ├── qpsubs.h ├── qpsubsarti.c ├── qsubs.h ├── regsubs.c ├── regsubs.h ├── rexpfit.r ├── rolloff.c ├── rolloffp.c ├── script │ ├── .gitignore │ ├── convertf.py │ ├── extract_individuals.py │ ├── geno_header.py │ ├── grepcol │ ├── grepcolf │ ├── hash_checks.py │ ├── link_identical_files.py │ ├── merge_transpose.py │ ├── mktmp │ ├── nick_hash_file.py │ ├── read_group_categories.py │ ├── test_hash_merge.py │ └── xtractcol ├── simpjack2.c ├── snpunion.c ├── test │ ├── H.geno │ ├── H.ind │ ├── J.geno │ ├── J.ind │ ├── K.geno │ ├── K.ind │ ├── ignore │ │ ├── expected.geno │ │ ├── expected.ind │ │ ├── start.geno │ │ ├── start.ind │ │ └── start.snp │ ├── test1.geno │ ├── test1.ind │ ├── test1.snp │ ├── test12.geno │ ├── test12.ind │ ├── test123.geno │ ├── test123.ind │ ├── test123.snp │ ├── test13.geno │ ├── test13.ind │ ├── test2.geno │ ├── test2.ind │ ├── test2.snp │ ├── test3.geno │ ├── test3.ind │ └── test3.snp ├── testr1.c ├── tmcio.h ├── transpose.c ├── twtable ├── wtjack.pl ├── xpsubs.h └── xqpsubs.c └── transpose_info ├── fxtdoc ├── README ├── fxtract.info ├── par2 ├── par_fxtract └── tlist └── transpose_format.txt /.gitignore: -------------------------------------------------------------------------------- 1 | #Temp files 2 | *~ 3 | *.o 4 | *.bak 5 | 6 | #Data - download example data from Reight lab website 7 | data/* 8 | 9 | #Output directories 10 | bin/* 11 | src/admixinclude/* 12 | src/admixlib/* 13 | src/admixtables/* 14 | 15 | # Other directories 16 | src/xsrc/* 17 | src/oldsource/* 18 | 19 | #Various output files 20 | pdoc.aux 21 | pdoc.dvi 22 | pdoc.log 23 | pdoc.ps 24 | 25 | #Examples 26 | examples.qpGraph/*:log 27 | examples.qpGraph/*.ggg 28 | examples.qpGraph/*.dot 29 | examples.qpGraph/*.ps 30 | -------------------------------------------------------------------------------- /README.3PopTest: -------------------------------------------------------------------------------- 1 | DOCUMENTATION OF 3-Population Test (qp3Pop): 2 | 3 | The 3-population test, is a formal test of admixture and can provide clear evidence of 4 | admixture, even if the gene flow events occurred hundreds of generations ago. If we want to test if C has ancestry from populations related to A and B then we can perform the test f3(C; A, B). 5 | If C is unadmixed, then f3 (C ; A, B) has non-negative mean. If f3 (C ; A, B) has negative mean, in contrast, this implies that C is admixed with popu- 6 | lations close to A and B (check the significance of the f3 mean and Z-score). 7 | 8 | qp3Pop requires that the input data is available in EIGENSTRAT format. To convert to the appropriate format, one can use CONVERTF program. See README.CONVERTF for documentation of programs for converting file formats. 9 | 10 | Executable and source code: 11 | ------------------------------------------------------------------------------ 12 | 13 | For information about installing the program, see README.ADMIXTOOLS. After installing the programs, the executable for 3 pop test (qp3Pop) should be located in the bin directory. 14 | 15 | To run qp3Pop, type the following on a linux machine. 16 | $DIR/bin/qp3Pop -p parfile >logfile 17 | 18 | $DIR: Path to the bin directory. 19 | logfile: Name of the logfile. The logfile contains the output of the run. 20 | parfile: Name of parameter file 21 | 22 | DESCRIPTION OF EACH PARAMETER in parfile: 23 | 24 | genotypename: input genotype file (in eigenstrat format) 25 | snpname: input snp file (in eigenstrat format) 26 | indivname: input indiv file (in eigenstrat format) 27 | popfilename: list_qp3test (contains 3 populations on each line < Target (C)> 28 | 29 | ## optional; but important parameter 30 | inbreed: YES 31 | ## Use if target pop is inbred OR (and crucially) if target is pseudo-diploid 32 | 33 | *** NEW *** 34 | By default f_3 output is normalized by estimatated heterozygosity 35 | of the target. This doesn't work if the target has no hets as can 36 | happen in "outgroup f_3" mode. In this case set: 37 | outgroupmode: YES 38 | when the f-3 denominator is set to an arbitrary .001 39 | 40 | 41 | 42 | 43 | DESCRIPTION OF OUTPUT FILE: 44 | The program will write all the output to stdout. The output file prints the parfile entered by the user, the number of populations included in the popfilename (nplist), the number of blocks for block jackknife, the number of snps included in the run and the results. 45 | 46 | The results have the following format - 47 | result: Source1 Source2 Target f_3 std.err Z SNPs 48 | 49 | The result for each set of 3 population is shown on a separate line. 50 | 51 | See example shown in- 52 | examples/qp3Pop.log 53 | 54 | 55 | ------------------------------------------------------------------------------- 56 | Follow on: qpBound 57 | Computes lower and upper bounds for mixing coeff (source 1) as in "Ancient admixture in human history" 58 | parfile as above EXCEPT 59 | popfilename should contain lines with triple of populations f_3 and Z. 60 | and 61 | outpop: outgroup_pop which MUST be present. 62 | 63 | See examples shown in- 64 | examples/qpBound.log 65 | 66 | 67 | Nick Patterson 68 | 69 | ------------------------------------------------------------------------------ 70 | -------------------------------------------------------------------------------- /README.CONVERTF: -------------------------------------------------------------------------------- 1 | convertf converts between the 5 different file formats we support. 2 | See ./convertf/README for details (README) and examples 3 | 4 | mergeit merges two data sets into a third, which has the union of 5 | the individuals and the intersection of the SNPs in the first two. 6 | 7 | Also see ./convertf 8 | 9 | -------------------------------------------------------------------------------- /README.Dstatistics: -------------------------------------------------------------------------------- 1 | DOCUMENTATION OF D-statistics (qpDstat): 2 | 3 | The 4-population test, implemented here as D-statistics, is also a formal test for admixture based on a four taxon 4 statistic, which can provide some information about the direction of gene flow. 4 | For any 4 populations (W, X, Y, Z), qpDstat computes the D-statistics as - 5 | num = (w − x)(y − z ) 6 | den = (w + x − 2wx)(y + z − 2yz ) 7 | 8 | D = num/ den 9 | 10 | The output of qpDstat is informative about the direction of gene flow. So for 4 populations (W, X, Y, Z) as follows - 11 | If the Z-score is +ve, then the gene flow occured either between W and Y or X and Z 12 | If the Z-score is -ve, then the gene flow occured either between W and Z or X and Y. 13 | 14 | qpDstat requires that the input data is available in a Reichlab format such as EIGENSTRAT. 15 | To convert to the appropriate format, one can use CONVERTF. See 16 | README.CONVERTF for documentation of programs for converting file formats. 17 | 18 | Executable and source code: 19 | ------------------------------------------------------------------------------ 20 | The executables of AdmixTools are available here: 21 | https://github.com/DReichLab/AdmixTools/releases 22 | 23 | Please aware AdmixTools is designed for Linux system. If the executables don't work on your Linux machine, please compile and install the program. For information about installing the program, see README.ADMIXTOOLS. After installing the programs, the executable for D-statistics (qpDstat) should be located in the bin directory. 24 | 25 | To run qpDstat, type the following on a linux machine. 26 | $DIR/bin/qpDstat -p parfile [-l lo] [-h hi] >logfile 27 | 28 | $DIR: Path to the bin directory. 29 | logfile: Name of the logfile. The logfile contains the output of the run. 30 | parfile: Name of parameter file 31 | 32 | NEW FEATURE 33 | In some cases using popfilename (see below) the program uses an excessive amount of memory. 34 | A solution, designed to be convenient, is to make multiple runs with -L lo -H hi set when only lines 35 | lo through hi (starting at 1) of popfilename will be used. This makes it easy to do multiple runs 36 | and then extract all results by grep result: ... 37 | *** note new parameter flags. Used to be -l, -h 38 | 39 | DESCRIPTION OF EACH PARAMETER in parfile: 40 | 41 | genotypename: input genotype file (in eigenstrat format) 42 | snpname: input snp file (in eigenstrat format) 43 | indivname: input indiv file (in eigenstrat format) 44 | poplistname: list_qpDstat (contains list of poulations- one population on each line). Program will run the method for all quadrapules. 45 | popfilename: list_qpDstat1 (Enter a list of tests you want to perform - 4 populations on each line. ). Program will run the method for each quadrapule (included on each line). 46 | ## optional parameter 47 | f4mode: YES 48 | ## default NO 49 | ## f4 statistics not D-stats are computed 50 | 51 | DESCRIPTION OF OUTPUT FILE: 52 | The program will write all the output to stdout. The output file prints the parfile entered by the user, number of snps and individuals, jackknife block size, number of blocks for jackknife and the results. 53 | 54 | The results have the following format - 55 | result: Pop1 (W) Pop2 (X) : Pop3 (Y) Pop4 (Z) D-stat Z BABA ABBA #_of_SNPs_that_all_populations_have_data 56 | 57 | The result for each quadrppule is shown on a separate line. 58 | 59 | See example shown in- 60 | examples/qpDstat.log, examples/qpDstat1.log 61 | 62 | NOTE: 63 | Z = D/standard_error 64 | If you would like qpDstat to output standard error, please add this following line to your parameter file: 65 | printsd: YES 66 | 67 | ========================================================================== 68 | New program: qpDpart; implements partitioned D-stats 69 | (Eaton and Ree. Inferring phylogeny and introgression .. Systematic Biology (2013) (Vol 62:5) pp 689-706) 70 | parameter file as qpDstat with the following exceptions. 71 | pattern: pat1 : pat2 72 | ## pat1, pat2 are AB patterns of same length (L) 73 | popfilename: myfile 74 | ##myfile will consisit of L populatons / line. 75 | fmode: YES 76 | ## prints abba(pat1) - abba(pat2) NOT D-stat ; Note fmode NOT f4mode 77 | fscale: 10000000 78 | ## if fmode: YES f-stats are multiplied by fscale on printing. Default is 1000000 79 | ## This is because if L is large f may be quite small. 80 | ## pattern: BABA : ABBA will pprduce results very similar to qpDstat 81 | 82 | 83 | 84 | Nick Patterson 85 | 86 | ------------------------------------------------------------------------------ 87 | 88 | -------------------------------------------------------------------------------- /README.F4RatioTest: -------------------------------------------------------------------------------- 1 | DOCUMENTATION OF F4 ratio estimation (qpF4ratio): 2 | 3 | F4 ratio estimation allows inference of the mixing proportions of an admixture event, even without access to accurate surrogates for the ancestral populations. 4 | 5 | For any 5 populations that are related to each other as shown in Figure 4 (Ancient Admixture, Patterson et al. 2012), then we can compute the admixture proportion \alpha as - 6 | 7 | \alpha = f4(A,O; X,C)/ f4(A,O; B, C) 8 | 9 | qpF4ratio requires that the input data is available in EIGENSTRAT format. To convert to the appropriate format, one can use CONVERTF program. See README.CONVERTF for documentation of programs for converting file formats. 10 | 11 | Executable and source code: 12 | ------------------------------------------------------------------------------ 13 | 14 | For information about installing the program, see README.ADMIXTOOLS. After installing the programs, the executable for F4 ratio estimation (qpF4ratio) should be located in the bin directory. 15 | 16 | To run qpF4ratio, type the following on a linux machine. 17 | $DIR/bin/qpF4ratio -p parfile >logfile 18 | 19 | $DIR: Path to the bin directory. 20 | logfile: Name of the logfile. The logfile contains the output of the run. 21 | parfile: Name of parameter file 22 | 23 | DESCRIPTION OF EACH PARAMETER in parfile: 24 | 25 | genotypename: input genotype file (in eigenstrat format) 26 | snpname: input snp file (in eigenstrat format) 27 | indivname: input indiv file (in eigenstrat format) 28 | popfilename: list (contains list of 5 poulations- A 0 : X C:: A 0 : B C. Colon separators are not required and are ignored by the program. They have been included to make the input/output more readable). 29 | blgsize: jackknife block size (in centimorgan) 30 | 31 | Note: 32 | If you would like to use SNP number to define block size, you can: 33 | make a fake .snp file with (false) physical positions, and 0 genetic distances. 34 | The software defaults to 1M bases = 1cm, so if (for example) you set the 35 | distance between each snp as 10000 bases and blgsize: .01 then each block 36 | will be 100 snps. 37 | 38 | DESCRIPTION OF OUTPUT FILE: 39 | The program will write all the output to stdout. The output file prints the parfile entered by the user, number of snps and individuals, jackknife block size, number of blocks for jackknif 40 | e and the results. 41 | 42 | The results have the following format - 43 | result: Pop1 (A) Pop2 (O) Pop3 (X) Pop4 (C): Pop1 (A) Pop2 (O) Pop5 (B) Pop4 (C) alpha std. err Z (null=0) 44 | 45 | The result for each set of 5 populations is shown on a separate line. 46 | 47 | 48 | See example shown in- 49 | examples/F4ratio.log 50 | 51 | Nick Patterson 52 | 53 | ------------------------------------------------------------------------------ 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /README.INSTALL: -------------------------------------------------------------------------------- 1 | ADMIXTOOLS version 5.0 6/28/18 (for Linux and Mac) 2 | 3 | Best is to recompile on your system. 4 | 5 | You will need to be able to link GNU Scientific library(gsl) , BLAS, gfortran, and prpbably lapack. 6 | This Makefile expects opeenblas but other versions of BLAS should be OK. Edit the Makefile 7 | appropriately. 8 | 9 | cd src; make clobber; make all; make install. Eexcutables will be in ../bin 10 | 11 | A Makefile that worked for a Mac (El Capitan) is in src/Makefile.mac 12 | 13 | Nick Patterson 14 | -------------------------------------------------------------------------------- /README.QPGRAPH: -------------------------------------------------------------------------------- 1 | qpGraph -p parfile -g graph [-o outgraph] [-d dotfile] 2 | 3 | See examples/runit for some simple examples and sample output 4 | 5 | Sample parameter file 6 | DIR: ../data 7 | S1: sim1 8 | indivname: DIR/S1.ind 9 | snpname: DIR/S1.snp 10 | genotypename: DIR/S1.geno 11 | outpop: Out 12 | blgsize: 0.05 13 | ## block size in Morgans for Jackknife 14 | ## see below 15 | lsqmode: YES 16 | diag: .0001 17 | hires: YES 18 | initmix: 1000 19 | precision: .0001 20 | zthresh: 3.0 21 | terse: NO 22 | useallsnps: NO 23 | 24 | 25 | -p parfile specifies input data (any Reich lab format) 26 | 27 | outpop: zzz 28 | f-stats normalized by heterozygosity in population zzz 29 | outpop: NULL has special meaning : no normalization 30 | blgsize: block size for Jackknife (Morgans) 31 | lsqmode: Default is NO; Estimate of error covariance is used. Unstable for large problems 32 | and instead we fit f_3 stats with "base" population first label (see graph spec) 33 | diag: small fudge factor to add on diagonal of f3. Improves stability 34 | Given admixing weights the edge lengths (drifts) are set by minimizing 35 | a degree 2 function, subject to non-negativity of the lengths. 36 | Initially the program tries V initial tries of admixing weights 37 | defailt value for V is 10 . 2^n where there are n admixtures in the graph. 38 | To override this default 39 | 40 | initmix: V 41 | where V is the desired number of initial tries. This may be useful 42 | if you are worried the program may not have found the global optimum. 43 | 44 | hires: controls output sometimes more decimals are desired 45 | precision: controls accuracy of recovereed admixture weights. Coefficients will be within "precision" of MLE estimate. 46 | zthresh: controls outlier output. All f-stats are calculated and those diverging more then "zthresh" from fitted values 47 | printed.e all SNPs even if some populations hav e n data for a SNP (default NO) 48 | 49 | *** new feature *** 50 | admixin: 51 | contains lines such as 52 | admix T S1 S2 w1 w2 53 | where T is a target node, S1, S2 are source nodes and w1 w2 are weights. qpGraph 54 | will rescale these to sum to 1. 55 | These lines are in the same format as the output graph [-o option below] and such a graph can be used 56 | (only admix lines are recognized) 57 | admixout: 58 | This file is of course easy to hand edit. 59 | If admixin is present then initial try estimation is omitted which will make qpGraph run 60 | very much faster. 61 | inbreed: YES 62 | Genotypes are expected to be pseudo-haploid -- 2 samples at least per population or drift lengths on 63 | leaves are not meaningful. 64 | 65 | By default a short form of the pop. is given in the list of outliers. If you want the full name 66 | fulloutlier: YES 67 | 68 | 69 | -g graph 70 | See examples.qpGraph/gr1x 71 | Note comments can be included in file. Begin a line with # 72 | 73 | [-o outgraph] This can be hand edited and input again. Note format is different. 74 | qpGraph will accept either style. 75 | 76 | [-d dotfile] Graph output in "dot format"; dot -Tps < dotfile > dotfile.ps gives postscript 77 | "dot" must be installed (part of Graphviz) 78 | 79 | *** new feature *** 80 | Input can now be an fstats file -- see README.qpfstats 81 | *** new feature *** 82 | halfscore (won't work in combination with fstats) 83 | halfscore: YES 84 | halfjackname: 85 | Does a VERY conservative goodness of fit test. See halfscore.pdf for details. 86 | 87 | 88 | 89 | =================================================================================================== 90 | Utility program: qpreroot 91 | If the input graph is not connected or has loops qpGraph will die unpleasantly. 92 | Best way to see what is happening: 93 | 94 | qpreroot -g graph -d dotfile 95 | ## This also makes postscript 96 | 97 | We also can change the root node 98 | qpreroot -g graph -o outgraph -r node 99 | 100 | 101 | -------------------------------------------------------------------------------- /README.REXPFIT: -------------------------------------------------------------------------------- 1 | Documentation for REXPFIT: 2 | 3 | rexpfit.r is a program that can be used to fit an exponential distribution to the output of ROLLOFF. This program uses the nls function in R to determine the nonlinear (weighted) least-squares estimates of the parameters of a nonlinear model. It also uses the DEoptim package to pick the initial values of all the parameters (Katharine Mullen, David Ardia, David Gil, Donald Windover, James Cline (2011). DEoptim: An R Package for Global Optimization by Differential Evolution. Journal of Statistical Software, 40(6), 1-26. URL http://www.jstatsoft.org/v40/i06/). 4 | 5 | More information on these functions and packages can be found at- 6 | http://stat.ethz.ch/R-manual/R-patched/library/stats/html/nls.html 7 | http://cran.r-project.org/web/packages/DEoptim/index.html 8 | 9 | To run the program, you need R. R is a free software and can be downloaded from- 10 | http://www.r-project.org/ 11 | 12 | ################ 13 | Load the program in R (src/rexpfit.r): 14 | 15 | source("rexpfit.r") 16 | 17 | To run the program- 18 | expfit(input, output, output_col, lval, hval, affine) 19 | 20 | where- 21 | input: ROLLOFF Output file name. As input the program takes the output of ROLLOFF. 22 | output: Filename for the output of expfit. 23 | output_col: column # of the output file which has correlation values. Default is 4. 24 | lval: Lower (starting) value of genetic distance. Default is 0.5cM. 25 | hval: Higher (end) value of genetic distance. Default is 100cM 26 | affine: TRUE/ FALSE. Default is TRUE. 27 | plot: TRUE/ FALSE. plot the output 28 | jackknife: TRUE/ FALSE - jackknife option used in roloff or not. 29 | 30 | example: 31 | expfit(example.out, example, 4, 0.5, 100, TRUE, TRUE, FALSE) 32 | 33 | Output files: 34 | The program creates 3 output files: 35 | output.log: contains the summary of the fit 36 | fit_output: contains the fitted values generated by the model 37 | output.pdf: pdf file which shows the ROLLOFF output and the fitted values. 38 | 39 | where, output = output file name used in the expfit function call. 40 | 41 | Common Errors: 42 | If you get an error for library DEoptim, please install the library using - 43 | 44 | install.packages("DEoptim") 45 | library("DEoptim") 46 | 47 | Load the program again - 48 | source("rexpfit.r") 49 | 50 | Nick Patterson 51 | 52 | 53 | ------------------------------------------------------------------------------ 54 | 55 | -------------------------------------------------------------------------------- /README.qp4diff: -------------------------------------------------------------------------------- 1 | DOCUMENTATION OF qp4diff: 2 | 3 | It is often useful to directly test the difference of 2 f4 statistics. 4 | If we merely run qpDstat and difference 2 results, we cannot correctly compute standard errors. 5 | 6 | qp4diff requires that the input data is available in one of the 5 supported formats. 7 | 8 | 9 | Executable and source code: 10 | ------------------------------------------------------------------------------ 11 | 12 | For information about installing the program, see README.ADMIXTOOLS. After installing the programs, the executable qp4diff) should be located in the bin directory. 13 | 14 | To run qp4diff, type the following on a linux machine. 15 | $DIR/bin/qp4diff -p parfile >logfile 16 | 17 | $DIR: Path to the bin directory. 18 | logfile: Name of the logfile. The logfile contains the output of the run. 19 | parfile: Name of parameter file 20 | 21 | DESCRIPTION OF EACH PARAMETER in parfile: 22 | 23 | genotypename: input genotype file (in eigenstrat format) 24 | snpname: input snp file (in eigenstrat format) 25 | indivname: input indiv file (in eigenstrat format) 26 | popfilename: list (contains list of 8 populations- A B : C D :: E F : G H 27 | Colon separators are not required and are ignored by the program. They have been included to make the input/output more readable). 28 | Populations can repeat beteeen the left and right quadruplets 29 | blgsize: jackknife block size (in centimorgan) 30 | 31 | *** 2 optional parameters *** 32 | allsnps: YES 33 | ## default. By default only SNPs with valid f4 for both quads are used. 34 | firstf4mult: val 35 | The first f4 is multiplied by val. Can be used to test if alpha a_1 = a_2 for a fixed alpha. 36 | 37 | Note: 38 | If you would like to use SNP number to define block size, you can: 39 | make a fake .snp file with (false) physical positions, and 0 genetic distances. 40 | The software defaults to 1M bases = 1cm, so if (for example) you set the 41 | distance between each snp as 10000 bases and blgsize: .01 then each block 42 | will be 100 snps. 43 | 44 | DESCRIPTION OF OUTPUT FILE: 45 | The program will write all the output to stdout. The output file prints the parfile entered by the user, number of snps and individuals, jackknife block size, number of blocks for jackknif 46 | e and the results. 47 | 48 | The results have the following format - 49 | result: A B C D : E F G H f4diff std. error Z 50 | 51 | Nick Patterson 52 | 53 | ------------------------------------------------------------------------------ 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /README.qpfstats: -------------------------------------------------------------------------------- 1 | qpfstats 2 | -------- 3 | DIR: /home/np29/biologyx/v41x/yamdir2/testdir 4 | S1: qdata1 5 | S1X: qdata1 6 | indivname: DIR/S1X.ind 7 | snpname: DIR/S1.snp 8 | genotypename: DIR/S1.geno 9 | poplistname: lista 10 | ## must be present. ne popuation / line. First population is base 11 | fstatsoutname: fstatsa.txt 12 | ## first line is header. Must be retained 13 | allsnps: YES 14 | ## default NO -- dangerous bend 15 | inbreed: NO 16 | ## default (match allsnps:) 17 | scale: NO 18 | ## default YES -- when fstats are scaled to "match" fst in least squares sense 19 | *** strongly advuse to specify allsnp: and inbreed: in the paramter file. 20 | 21 | If inbreed: NO the base population (first in poplistname) MUST be a true diploid. 22 | inbreed: NO when there are pseudo-diploid populations is dubious but may be necessary, 23 | especially if there is a population with a single sample. In this case some f2 statistics 24 | can't be evaluated and are set in the output to have huge variance (100). This means 25 | that in qpGraph some tail edges are set 0 -- really are undertermined. 26 | 27 | qpfmv 28 | ----- 29 | fstatsname: fstatsa.txt 30 | popfilename: f4sslist 31 | ## 4 pops / line (as in qpDstat) but f2, f3, f4 can be mixed. So code A C B C for f3 stat 32 | fmvoutname: fmvq2.out 33 | printsd: YES ## if you want s.err. defualt NO. 34 | 35 | 36 | New parameters for qpAdm (which should be upwards compatible) 37 | fstatsname: 38 | ## if present no need to specify .ind .snp .geno 39 | numboot: <# of bootstrap samples + antithetical samples> 40 | ## default 1000 41 | 42 | New parameters for qpWave (which should be upwards compatible) 43 | fstatsname: 44 | 45 | New parameters for qpGraph (which should be upwards compatible) 46 | qpGraph allows to to make qpfstats file 47 | Example 48 | 1) Make qpfstats :: ~np29/newadm19/src/jtest1/ww2dir/testdir/qpfs_example/parw2 49 | D2: /home/np29/broaddatax/v41 50 | D1: D2 51 | S1: jd1 52 | indivname: D1/S1.ind 53 | snpname: D1/S1.snp 54 | genotypename: D1/S1.geno 55 | fstatsoutname: fstatsw2 56 | allsnps: YES 57 | ## there is an option oldallsnps: YES for compatiblity. Not recommended. 58 | loadf3: YES 59 | ## this is needed to get qpfstats calculated 60 | graphname: graph1 61 | diag: .0001 62 | 63 | 2) Use qpfstats 64 | fstatsname: fstatsw2 65 | graphname: graph1 66 | diag: .0001 67 | 68 | *** qpfstats with allsnps: YES is a much better way to use all the data than 69 | the older allsnps mode. Standard errors are often much reduced and the theory 70 | is now defensible! 71 | 72 | *** there was a bad bug in release 7.0 when qpfstats was run with 73 | 1) allsnps: YES 74 | 2) inbreed: NO 75 | 3) Samples with pseudo-diploid data. 76 | 77 | In the new release if you do this, f-statistics involving psuedo-populations twice (such aas an f_3 with target pseudo-diploid are 78 | (correctly) flagged as having very large standard error. 79 | 80 | 81 | 82 | See ./qpfs.pdf for a brief explanation of the theory. 83 | -------------------------------------------------------------------------------- /convertf/example.ancestrymapgeno: -------------------------------------------------------------------------------- 1 | rs0000 SAMPLE0 1 2 | rs0000 SAMPLE1 1 3 | rs0000 SAMPLE2 1 4 | rs0000 SAMPLE3 0 5 | rs0000 SAMPLE4 0 6 | rs1111 SAMPLE0 0 7 | rs1111 SAMPLE1 1 8 | rs1111 SAMPLE2 2 9 | rs1111 SAMPLE3 1 10 | rs1111 SAMPLE4 2 11 | rs2222 SAMPLE0 2 12 | rs2222 SAMPLE1 1 13 | rs2222 SAMPLE2 1 14 | rs2222 SAMPLE3 0 15 | rs2222 SAMPLE4 1 16 | rs3333 SAMPLE0 0 17 | rs3333 SAMPLE1 0 18 | rs3333 SAMPLE2 1 19 | rs3333 SAMPLE3 2 20 | rs3333 SAMPLE4 2 21 | rs4444 SAMPLE0 2 22 | rs4444 SAMPLE1 1 23 | rs4444 SAMPLE2 1 24 | rs4444 SAMPLE3 0 25 | rs4444 SAMPLE4 0 26 | rs5555 SAMPLE0 0 27 | rs5555 SAMPLE1 0 28 | rs5555 SAMPLE2 1 29 | rs5555 SAMPLE3 1 30 | rs5555 SAMPLE4 1 31 | rs6666 SAMPLE0 2 32 | rs6666 SAMPLE1 2 33 | rs6666 SAMPLE2 1 34 | rs6666 SAMPLE3 1 35 | rs6666 SAMPLE4 0 36 | -------------------------------------------------------------------------------- /convertf/example.bed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DReichLab/AdmixTools/b10ddcfdf43792125052c8bc8b8a96cf5335eb56/convertf/example.bed -------------------------------------------------------------------------------- /convertf/example.eigenstratgeno: -------------------------------------------------------------------------------- 1 | 11100 2 | 01212 3 | 21101 4 | 00122 5 | 21100 6 | 00111 7 | 22110 8 | -------------------------------------------------------------------------------- /convertf/example.ind: -------------------------------------------------------------------------------- 1 | SAMPLE0 F Case 2 | SAMPLE1 M Case 3 | SAMPLE2 F Control 4 | SAMPLE3 M Control 5 | SAMPLE4 F Control 6 | -------------------------------------------------------------------------------- /convertf/example.ind.packedancestrymap: -------------------------------------------------------------------------------- 1 | SAMPLE0 F Case 2 | SAMPLE1 M Case 3 | SAMPLE2 F Control 4 | SAMPLE3 M Control 5 | SAMPLE4 F Control 6 | -------------------------------------------------------------------------------- /convertf/example.map: -------------------------------------------------------------------------------- 1 | 11 rs0000 0.000000 0 2 | 11 rs1111 0.001000 100000 3 | 11 rs2222 0.002000 200000 4 | 11 rs3333 0.003000 300000 5 | 11 rs4444 0.004000 400000 6 | 11 rs5555 0.005000 500000 7 | 11 rs6666 0.006000 600000 8 | -------------------------------------------------------------------------------- /convertf/example.packedancestrymap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DReichLab/AdmixTools/b10ddcfdf43792125052c8bc8b8a96cf5335eb56/convertf/example.packedancestrymap -------------------------------------------------------------------------------- /convertf/example.packedancestrymapgeno: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DReichLab/AdmixTools/b10ddcfdf43792125052c8bc8b8a96cf5335eb56/convertf/example.packedancestrymapgeno -------------------------------------------------------------------------------- /convertf/example.ped: -------------------------------------------------------------------------------- 1 | 1 SAMPLE0 0 0 2 2 1 2 3 3 1 1 1 1 3 3 1 1 3 3 2 | 2 SAMPLE1 0 0 1 2 1 2 1 3 1 4 1 1 1 3 1 1 3 3 3 | 3 SAMPLE2 0 0 2 1 1 2 1 1 1 4 1 2 1 3 1 4 3 4 4 | 4 SAMPLE3 0 0 1 1 2 2 1 3 4 4 2 2 1 1 1 4 3 4 5 | 5 SAMPLE4 0 0 2 1 2 2 1 1 1 4 2 2 1 1 1 4 4 4 6 | -------------------------------------------------------------------------------- /convertf/example.pedind: -------------------------------------------------------------------------------- 1 | 1 SAMPLE0 0 0 2 2 2 | 2 SAMPLE1 0 0 1 2 3 | 3 SAMPLE2 0 0 2 1 4 | 4 SAMPLE3 0 0 1 1 5 | 5 SAMPLE4 0 0 2 1 6 | -------------------------------------------------------------------------------- /convertf/example.pedsnp: -------------------------------------------------------------------------------- 1 | 11 rs0000 0.000000 0 A C 2 | 11 rs1111 0.001000 100000 A G 3 | 11 rs2222 0.002000 200000 A T 4 | 11 rs3333 0.003000 300000 C A 5 | 11 rs4444 0.004000 400000 G A 6 | 11 rs5555 0.005000 500000 T A 7 | 11 rs6666 0.006000 600000 G T 8 | -------------------------------------------------------------------------------- /convertf/example.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | # $parfile = "par.ANCESTRYMAP.EIGENSTRAT"; 4 | $parfile = "par.EIGENSTRAT.PED"; 5 | # $parfile = "par.PED.EIGENSTRAT"; 6 | # $parfile = "par.PED.PACKEDPED"; 7 | # $parfile = "par.PACKEDPED.PACKEDANCESTRYMAP"; 8 | # $parfile = "par.PACKEDANCESTRYMAP.ANCESTRYMAP"; 9 | 10 | system("../bin/convertf -p $parfile"); 11 | -------------------------------------------------------------------------------- /convertf/example.pheno: -------------------------------------------------------------------------------- 1 | 11000 2 | -------------------------------------------------------------------------------- /convertf/example.snp: -------------------------------------------------------------------------------- 1 | rs0000 11 0 0 A C 2 | rs1111 11 0.001000 100000 A G 3 | rs2222 11 0.002000 200000 A T 4 | rs3333 11 0.003000 300000 C A 5 | rs4444 11 0.004000 400000 G A 6 | rs5555 11 0.005000 500000 T A 7 | rs6666 11 0.006000 600000 G T 8 | -------------------------------------------------------------------------------- /convertf/example.snp.packedancestrymap: -------------------------------------------------------------------------------- 1 | rs0000 11 0.000000 0 A C 2 | rs1111 11 0.001000 100000 A G 3 | rs2222 11 0.002000 200000 A T 4 | rs3333 11 0.003000 300000 C A 5 | rs4444 11 0.004000 400000 G A 6 | rs5555 11 0.005000 500000 T A 7 | rs6666 11 0.006000 600000 G T 8 | -------------------------------------------------------------------------------- /convertf/example.tgeno: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DReichLab/AdmixTools/b10ddcfdf43792125052c8bc8b8a96cf5335eb56/convertf/example.tgeno -------------------------------------------------------------------------------- /convertf/ind2pheno.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | $in = $ARGV[0]; # .ind file 4 | $out = $ARGV[1]; # .pheno file 5 | 6 | open(IN,$in) || die("COF"); 7 | open(OUT,">$out") || die("COF"); 8 | 9 | while($line = ) 10 | { 11 | if($line =~ /Case/) { print OUT ("1"); $case=1; } 12 | elsif($line =~ /Control/) { print OUT ("0"); $control=1; } 13 | else { print OUT ("9"); } 14 | } 15 | print OUT ("\n"); 16 | unless($case) { print("WARNING: no cases\n"); } 17 | unless($control) { print("WARNING: no controls\n"); } 18 | 19 | -------------------------------------------------------------------------------- /convertf/par.ANCESTRYMAP.EIGENSTRAT: -------------------------------------------------------------------------------- 1 | genotypename: example.ancestrymapgeno 2 | snpname: example.snp 3 | indivname: example.ind 4 | outputformat: EIGENSTRAT 5 | genotypeoutname: example.eigenstratgeno 6 | snpoutname: example.snp 7 | indivoutname: example.ind 8 | -------------------------------------------------------------------------------- /convertf/par.ANCESTRYMAP.TGENO: -------------------------------------------------------------------------------- 1 | genotypename: example.ancestrymapgeno 2 | snpname: example.snp 3 | indivname: example.ind 4 | outputformat: TGENO 5 | genotypeoutname: example.tgeno 6 | snpoutname: example.snp 7 | indivoutname: example.ind 8 | -------------------------------------------------------------------------------- /convertf/par.EIGENSTRAT.PED: -------------------------------------------------------------------------------- 1 | genotypename: example.eigenstratgeno 2 | snpname: example.snp 3 | indivname: example.ind 4 | outputformat: PED 5 | genotypeoutname: example.ped 6 | snpoutname: example.pedsnp 7 | indivoutname: example.pedind 8 | -------------------------------------------------------------------------------- /convertf/par.EIGENSTRAT.TGENO: -------------------------------------------------------------------------------- 1 | genotypename: example.eigenstratgeno 2 | snpname: example.snp 3 | indivname: example.ind 4 | outputformat: TGENO 5 | genotypeoutname: example.tgeno 6 | snpoutname: example.snp 7 | indivoutname: example.ind 8 | -------------------------------------------------------------------------------- /convertf/par.PACKEDANCESTRYMAP.ANCESTRYMAP: -------------------------------------------------------------------------------- 1 | genotypename: example.packedancestrymapgeno 2 | snpname: example.snp 3 | indivname: example.ind 4 | outputformat: ANCESTRYMAP 5 | genotypeoutname: example.ancestrymapgeno 6 | snpoutname: example.snp 7 | indivoutname: example.ind 8 | -------------------------------------------------------------------------------- /convertf/par.PACKEDPED.PACKEDANCESTRYMAP: -------------------------------------------------------------------------------- 1 | genotypename: example.bed 2 | snpname: example.pedsnp # or example.map, either works 3 | indivname: example.pedind # or example.ped, either works 4 | outputformat: PACKEDANCESTRYMAP 5 | genotypeoutname: example.packedancestrymapgeno 6 | snpoutname: example.snp 7 | indivoutname: example.ind 8 | familynames: NO 9 | -------------------------------------------------------------------------------- /convertf/par.PED.EIGENSTRAT: -------------------------------------------------------------------------------- 1 | genotypename: example.ped 2 | snpname: example.pedsnp # or example.map, either works 3 | indivname: example.pedind # or example.ped, either works 4 | outputformat: EIGENSTRAT 5 | genotypeoutname: example.eigenstratgeno 6 | snpoutname: example.snp 7 | indivoutname: example.ind 8 | familynames: NO 9 | -------------------------------------------------------------------------------- /convertf/par.PED.PACKEDPED: -------------------------------------------------------------------------------- 1 | genotypename: example.ped 2 | snpname: example.pedsnp # or example.map, either works 3 | indivname: example.pedind # or example.ped, either works 4 | outputformat: PACKEDPED 5 | genotypeoutname: example.bed 6 | snpoutname: example.pedsnp 7 | indivoutname: example.pedind 8 | familynames: NO 9 | -------------------------------------------------------------------------------- /convertf/par.TGENO.EIGENSTRAT: -------------------------------------------------------------------------------- 1 | genotypename: example.tgeno 2 | snpname: example.snp 3 | indivname: example.ind 4 | outputformat: EIGENSTRAT 5 | genotypeoutname: example.eigenstratgeno 6 | snpoutname: example.snp 7 | indivoutname: example.ind 8 | -------------------------------------------------------------------------------- /examples.qpGraph/gr1triv: -------------------------------------------------------------------------------- 1 | root R 2 | label Out O 3 | label A A 4 | label B B 5 | label C C 6 | label X X 7 | edge q R Q 8 | edge o R O 9 | edge cc Q CC 10 | edge ab Q AB 11 | edge c CC C 12 | ##edge ca CC CA 13 | edge a AB A 14 | edge bb AB BB 15 | edge b BB B 16 | ##edge ba BB BA 17 | edge bx BB XX 18 | ##admix XX BB CC 70 30 19 | edge x XX X 20 | -------------------------------------------------------------------------------- /examples.qpGraph/gr1x: -------------------------------------------------------------------------------- 1 | root R 2 | label Out O 3 | ## label lables populations (leaf nodes) here population is "Out" vertex is "O" 4 | label A A 5 | label B B 6 | label C C 7 | label X X 8 | edge q R Q 9 | ## edge name q R -> O 10 | edge o R O 11 | edge cc Q CC 12 | edge ab Q AB 13 | edge c CC C 14 | ##edge ca CC CA 15 | edge a AB A 16 | edge bb AB BB 17 | edge b BB B 18 | ##edge ba BB BA 19 | admix XX BB CC 70 30 20 | ## XX is admixture node BB , CC ..> XX with mixing proportions .7 .3 21 | ### qpGraph normalizes weights to sum to 1 22 | edge x XX X 23 | -------------------------------------------------------------------------------- /examples.qpGraph/gr1x.oroot: -------------------------------------------------------------------------------- 1 | vertex O 0 2 | vertex R 0 3 | vertex Q 0 4 | vertex CC 0 5 | vertex AB 0 6 | vertex A 0 7 | vertex C 0 8 | vertex BB 0 9 | vertex B 0 10 | vertex XX 0 11 | vertex X 0 12 | label O Out 13 | label A A 14 | label C C 15 | label B B 16 | label X X 17 | ledge o O R 0.000 18 | ledge q R Q 0.000 19 | ledge cc Q CC 0.000 20 | redge ab Q AB 0.000 21 | ledge c CC C 0.000 22 | ledge a AB A 0.000 23 | redge bb AB BB 0.000 24 | ledge b BB B 0.000 25 | ledge x XX X 0.000 26 | admix XX BB CC 0.700 0.300 27 | -------------------------------------------------------------------------------- /examples.qpGraph/gr1x:log:nullout: -------------------------------------------------------------------------------- 1 | parameter file: par:sim1:gr1x:nullout 2 | -------------------------------------------------------------------------------- /examples.qpGraph/par:sim1:gr1triv: -------------------------------------------------------------------------------- 1 | DIR: ../data 2 | S1: sim1 3 | indivname: DIR/S1.ind 4 | snpname: DIR/S1.snp 5 | genotypename: DIR/S1.geno 6 | outpop: Out 7 | blgsize: 0.005 8 | ## block size in Morgans for Jackknife 9 | lsqmode: YES 10 | diag: .0001 11 | hires: YES 12 | -------------------------------------------------------------------------------- /examples.qpGraph/par:sim1:gr1x: -------------------------------------------------------------------------------- 1 | DIR: ../data 2 | S1: sim1 3 | indivname: DIR/S1.ind 4 | snpname: DIR/S1.snp 5 | genotypename: DIR/S1.geno 6 | outpop: Out 7 | blgsize: 0.005 8 | ## block size in Morgans for Jackknife 9 | lsqmode: YES 10 | diag: .0001 11 | hires: YES 12 | -------------------------------------------------------------------------------- /examples.qpGraph/par:sim1:gr1x:2: -------------------------------------------------------------------------------- 1 | DIR: ../data 2 | S1: sim1 3 | indivname: DIR/S1.ind 4 | snpname: DIR/S1.snp 5 | genotypename: DIR/S1.geno 6 | outpop: Out 7 | blgsize: 0.005 8 | ## block size in Morgans for Jackknife 9 | lsqmode: NO 10 | diag: .0001 11 | hires: YES 12 | ## higher precision on output 13 | -------------------------------------------------------------------------------- /examples.qpGraph/par:sim1:gr1x:nullout: -------------------------------------------------------------------------------- 1 | DIR: ../data 2 | S1: sim1 3 | indivname: DIR/S1.ind 4 | snpname: DIR/S1.snp 5 | genotypename: DIR/S1.geno 6 | outpop: NULL 7 | blgsize: 0.005 8 | ## block size in Morgans for Jackknife 9 | lsqmode: YES 10 | diag: .0001 11 | hires: YES 12 | -------------------------------------------------------------------------------- /examples.qpGraph/parsimp.templ: -------------------------------------------------------------------------------- 1 | DIR: ../data 2 | S1: TAG 3 | indivname: DIR/S1.ind 4 | snpname: DIR/S1.snp 5 | genotypename: DIR/S1.geno 6 | outpop: Out 7 | blgsize: 0.005 8 | ## block size in Morgans for Jackknife 9 | lsqmode: YES 10 | diag: .0001 11 | hires: YES 12 | -------------------------------------------------------------------------------- /examples.qpGraph/parsimp2.templ: -------------------------------------------------------------------------------- 1 | DIR: ../data 2 | S1: TAG 3 | indivname: DIR/S1.ind 4 | snpname: DIR/S1.snp 5 | genotypename: DIR/S1.geno 6 | outpop: Out 7 | blgsize: 0.005 8 | ## block size in Morgans for Jackknife 9 | lsqmode: NO 10 | diag: .0001 11 | hires: YES 12 | ## higher precision on output 13 | -------------------------------------------------------------------------------- /examples.qpGraph/runit: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | ../bin/qpGraph -p par:sim1:gr1x -g gr1x -o sim1:gr1x.ggg -d sim1:gr1x.dot > gr1x:log 3 | ## lsqmode YES 4 | ../bin/qpGraph -p par:sim1:gr1triv -g gr1triv -o sim1:gr1triv.ggg -d sim1:gr1triv.dot > gr1triv:log 5 | ## graph that doesn't fit 6 | ../bin/qpGraph -p par:sim1:gr1x:2 -g gr1x -o sim1:gr1x:2.ggg -d sim1:gr1x:2.dot > gr1x:2:log 7 | ## lsqmode NO Little difference on such a small problem 8 | ../bin/qpGraph -p par:sim1:gr1x:nullout -g gr1x > gr1x:nullout:log 9 | ## outpop set to NULL 10 | -------------------------------------------------------------------------------- /examples.qpGraph/runqpgf: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/perl -w 2 | 3 | ($TAG, $GR) = @ARGV ; 4 | $GX = "$TAG:$GR" ; 5 | system "m4 -D \"TAG=$TAG\" < parsimp.templ > par:$GX" ; 6 | $cmd = " ../bin/qpGraph -p par:$GX -g $GR -o $GX.ggg -d $GX.dot > $GR:log" ; 7 | print "$cmd\n" ; 8 | system "$cmd" ; 9 | -------------------------------------------------------------------------------- /examples.qpGraph/runqpgf2: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/perl -w 2 | 3 | ($TAG, $GR) = @ARGV ; 4 | $GX = "$TAG:$GR:2" ; 5 | system "m4 -D \"TAG=$TAG\" < parsimp2.templ > par:$GX" ; 6 | $cmd = " ../bin/qpGraph -p par:$GX -g $GR -o $GX.ggg -d $GX.dot > $GR:2:log" ; 7 | print "$cmd\n" ; 8 | system "$cmd" ; 9 | -------------------------------------------------------------------------------- /examples.qpGraph/runqpreroot: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ../bin/qpreroot -g gr1x -o gr1x.oroot -d gr1x.oroot.dot -r O 4 | ../bin/qpreroot -g gr1x -d gr1x.dot 5 | -------------------------------------------------------------------------------- /examples/README: -------------------------------------------------------------------------------- 1 | Download data directory from 2 | Reich Lab web page. datasets :: Affymmetrix Human Origins (2012) 3 | -------------------------------------------------------------------------------- /examples/expfit_Uygur.flog: -------------------------------------------------------------------------------- 1 | Summary of fit: 2 | 3 | Formula: wcorr ~ (C + A * exp(-m * dist/100)) 4 | 5 | Parameters: 6 | Estimate Std. Error t value Pr(>|t|) 7 | C 6.191e-03 4.463e-04 13.87 <2e-16 *** 8 | A 9.607e-02 2.451e-03 39.19 <2e-16 *** 9 | m 2.267e+01 8.712e-01 26.02 <2e-16 *** 10 | --- 11 | Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 12 | 13 | Residual standard error: 0.006619 on 393 degrees of freedom 14 | 15 | Number of iterations to convergence: 2 16 | Achieved convergence tolerance: 3.786e-06 17 | 18 | mean (generations): 22.667475 19 | -------------------------------------------------------------------------------- /examples/gr1x:2:log: -------------------------------------------------------------------------------- 1 | parameter file: par:sim1:gr1x:2 2 | ### THE INPUT PARAMETERS 3 | ##PARAMETER NAME: VALUE 4 | DIR: ../data 5 | S1: sim1 6 | indivname: ../data/sim1.ind 7 | snpname: ../data/sim1.snp 8 | genotypename: ../data/sim1.geno 9 | outpop: Out 10 | blgsize: 0.005 11 | forcezmode: YES 12 | lsqmode: NO 13 | diag: .0001 14 | bigiter: 6 15 | hires: YES 16 | ## qpGraph version: 5011 17 | outpop: Out 18 | seed: 1253200593 19 | Out 20 | A 21 | B 22 | C 23 | X 24 | *** warning. genetic distances are in cM not Morgans 25 | SNP:10002 1 100.010000 100010000 C T 26 | 27 | packed geno read OK 28 | end of inpack 29 | 30 | graph: gr1x 31 | 32 | 33 | 0 Out 34 | 1 A 35 | 2 B 36 | 3 C 37 | 4 X 38 | root label: :::C 39 | outpop: Out 40 | population: 0 Out 50 41 | population: 1 A 50 42 | population: 2 B 50 43 | population: 3 C 50 44 | population: 4 X 50 45 | before setwt numsnps: 50000 outpop: Out 46 | setwt numsnps: 48472 47 | number of blocks for moving block jackknife: 987 48 | snps: 48472 indivs: 250 49 | lambdascale: 0.473 50 | fst: 51 | O A B C X 52 | O 0 93 93 94 80 53 | A 93 0 49 72 50 54 | B 93 49 0 73 43 55 | C 94 72 73 0 24 56 | X 80 50 43 24 0 57 | 58 | f2: 59 | O A B C X 60 | O 0 101 103 104 88 61 | A 101 0 46 69 47 62 | B 103 46 0 70 41 63 | C 104 69 70 0 22 64 | X 88 47 41 22 0 65 | 66 | ff3: 67 | O A B C X 68 | O 0 0 0 0 0 69 | A 0 101 79 68 71 70 | B 0 79 103 68 75 71 | C 0 68 68 104 85 72 | X 0 71 75 85 88 73 | 74 | ff3sig*10: 75 | O A B C X 76 | O 0 0 0 0 0 77 | A 0 14 12 11 10 78 | B 0 12 15 11 11 79 | C 0 11 11 15 12 80 | X 0 10 11 12 12 81 | 82 | starting analysis 83 | initial admix wts: score: 0.216 84 | initmixwts: :X XX BB CC 0.301 0.699 85 | 86 | 87 | initial score: 0.216 88 | zzgopt 89 | init vg: 90 | 0.301 91 | gslsetup called 92 | gslans: 13 0.215537 93 | 0.301 94 | final vg: 95 | 0.301 96 | 97 | score: 0.216 dof: 0 nominal p-val 1.000000 constraint: 0.000000 98 | ff3fit: 99 | O A B C X 100 | O 0 0 0 0 0 101 | A 0 102 79 68 71 102 | B 0 79 103 68 75 103 | C 0 68 68 104 85 104 | X 0 71 75 85 88 105 | 106 | ff3diff: 107 | O A B C X 108 | O 0 0 0 0 0 109 | A 0 0 0 0 0 110 | B 0 0 0 0 0 111 | C 0 0 0 0 0 112 | X 0 0 0 0 0 113 | 114 | mix: :X XX BB CC 0.301 0.699 115 | 116 | q o cc ab c a bb b x 117 | 0.0682 0.0000 0.0239 0.0110 0.0117 0.0223 0.0109 0.0125 0.0063 118 | 119 | 120 | fst: fitted estim diff std. err Z score 121 | Out A f2: 0.101528 0.101337 -0.000191 0.001380 -0.138 122 | Out B f2: 0.102624 0.102894 0.000271 0.001461 0.185 123 | Out C f2: 0.103762 0.103780 0.000018 0.001470 0.013 124 | Out X f2: 0.088087 0.088152 0.000065 0.001161 0.056 125 | A B f2: 0.045718 0.045754 0.000037 0.000669 0.055 126 | A C f2: 0.068953 0.069056 0.000103 0.000868 0.118 127 | A X f2: 0.046636 0.046674 0.000038 0.000603 0.063 128 | B C f2: 0.070049 0.070019 -0.000030 0.000936 -0.032 129 | B X f2: 0.041161 0.041172 0.000011 0.000597 0.018 130 | C X f2: 0.022109 0.022114 0.000005 0.000308 0.015 131 | 132 | 133 | outliers: 134 | 135 | graphdotname: sim1:gr1x:2.dot 136 | ## end of run 137 | -------------------------------------------------------------------------------- /examples/gr1x:log: -------------------------------------------------------------------------------- 1 | parameter file: par:sim1:gr1x 2 | ### THE INPUT PARAMETERS 3 | ##PARAMETER NAME: VALUE 4 | DIR: ../data 5 | S1: sim1 6 | indivname: ../data/sim1.ind 7 | snpname: ../data/sim1.snp 8 | genotypename: ../data/sim1.geno 9 | outpop: Out 10 | blgsize: 0.005 11 | forcezmode: YES 12 | lsqmode: YES 13 | diag: .0001 14 | bigiter: 6 15 | hires: YES 16 | ## qpGraph version: 5011 17 | outpop: Out 18 | seed: 1252805837 19 | simple lsqmode 20 | Out 21 | A 22 | B 23 | C 24 | X 25 | *** warning. genetic distances are in cM not Morgans 26 | SNP:10002 1 100.010000 100010000 C T 27 | 28 | packed geno read OK 29 | end of inpack 30 | 31 | graph: gr1x 32 | 33 | 34 | 0 Out 35 | 1 A 36 | 2 B 37 | 3 C 38 | 4 X 39 | root label: :::C 40 | outpop: Out 41 | population: 0 Out 50 42 | population: 1 A 50 43 | population: 2 B 50 44 | population: 3 C 50 45 | population: 4 X 50 46 | before setwt numsnps: 50000 outpop: Out 47 | setwt numsnps: 48472 48 | number of blocks for moving block jackknife: 987 49 | snps: 48472 indivs: 250 50 | lambdascale: 0.473 51 | fst: 52 | O A B C X 53 | O 0 93 93 94 80 54 | A 93 0 49 72 50 55 | B 93 49 0 73 43 56 | C 94 72 73 0 24 57 | X 80 50 43 24 0 58 | 59 | f2: 60 | O A B C X 61 | O 0 101 103 104 88 62 | A 101 0 46 69 47 63 | B 103 46 0 70 41 64 | C 104 69 70 0 22 65 | X 88 47 41 22 0 66 | 67 | ff3: 68 | O A B C X 69 | O 0 0 0 0 0 70 | A 0 101 79 68 71 71 | B 0 79 103 68 75 72 | C 0 68 68 104 85 73 | X 0 71 75 85 88 74 | 75 | ff3sig*10: 76 | O A B C X 77 | O 0 0 0 0 0 78 | A 0 14 12 11 10 79 | B 0 12 15 11 11 80 | C 0 11 11 15 12 81 | X 0 10 11 12 12 82 | 83 | starting analysis 84 | initial admix wts: score: 0.038 85 | initmixwts: :X XX BB CC 0.293 0.707 86 | 87 | 88 | initial score: 0.038 89 | zzgopt 90 | init vg: 91 | 0.293 92 | gslsetup called 93 | gslans: 13 0.038084 94 | 0.291 95 | final vg: 96 | 0.291 97 | 98 | score: 0.038 dof: 0 nominal p-val 1.000000 constraint: 0.000000 99 | ff3fit: 100 | O A B C X 101 | O 0 0 0 0 0 102 | A 0 101 79 68 71 103 | B 0 79 103 68 75 104 | C 0 68 68 104 85 105 | X 0 71 75 85 88 106 | 107 | ff3diff: 108 | O A B C X 109 | O 0 0 0 0 0 110 | A 0 0 0 0 0 111 | B 0 0 0 0 0 112 | C 0 0 0 0 0 113 | X 0 0 0 0 0 114 | 115 | mix: :X XX BB CC 0.291 0.709 116 | 117 | q o cc ab c a bb b x 118 | 0.0682 0.0000 0.0236 0.0111 0.0120 0.0221 0.0121 0.0115 0.0061 119 | 120 | 121 | fst: fitted estim diff std. err Z score 122 | Out A f2: 0.101331 0.101337 0.000006 0.001380 0.004 123 | Out B f2: 0.102891 0.102894 0.000004 0.001461 0.002 124 | Out C f2: 0.103777 0.103780 0.000004 0.001470 0.003 125 | Out X f2: 0.088151 0.088152 0.000001 0.001161 0.001 126 | A B f2: 0.045736 0.045754 0.000018 0.000669 0.028 127 | A C f2: 0.068740 0.069056 0.000316 0.000868 0.364 128 | A X f2: 0.046667 0.046674 0.000007 0.000603 0.011 129 | B C f2: 0.070299 0.070019 -0.000281 0.000936 -0.300 130 | B X f2: 0.041167 0.041172 0.000005 0.000597 0.008 131 | C X f2: 0.022114 0.022114 -0.000000 0.000308 -0.000 132 | 133 | 134 | outliers: 135 | 136 | graphdotname: sim1:gr1x.dot 137 | ## end of run 138 | -------------------------------------------------------------------------------- /examples/left1: -------------------------------------------------------------------------------- 1 | Corded_Ware_LN 2 | WHG 3 | LBK_EN 4 | Yamnaya 5 | -------------------------------------------------------------------------------- /examples/list_qp3Pop: -------------------------------------------------------------------------------- 1 | French Japanese Uygur 2 | French Han Uygur 3 | Russian Japanese Uygur 4 | Russian Han Uygur 5 | Italian Japanese Uygur 6 | Italian Han Uygur 7 | -------------------------------------------------------------------------------- /examples/list_qp4diff: -------------------------------------------------------------------------------- 1 | Yoruba French Han Uygur :: Yoruba French Japanese Uygur 2 | -------------------------------------------------------------------------------- /examples/list_qpBound: -------------------------------------------------------------------------------- 1 | ## Columns copied from result lines of qp3Pop 2 | French Japanese Uygur -0.025170 -66.384 3 | French Han Uygur -0.025172 -67.090 4 | Russian Japanese Uygur -0.021377 -59.897 5 | Russian Han Uygur -0.021403 -58.920 6 | Italian Japanese Uygur -0.025345 -64.880 7 | Italian Han Uygur -0.025102 -62.757 8 | -------------------------------------------------------------------------------- /examples/list_qpDpart: -------------------------------------------------------------------------------- 1 | Chimp Yoruba French Han Uygur 2 | Chimp Yoruba French Japanese Uygur 3 | -------------------------------------------------------------------------------- /examples/list_qpDstat: -------------------------------------------------------------------------------- 1 | Yoruba 2 | French 3 | Han 4 | Uygur 5 | -------------------------------------------------------------------------------- /examples/list_qpDstat1: -------------------------------------------------------------------------------- 1 | Yoruba French Han Uygur 2 | Yoruba French Japanese Uygur 3 | -------------------------------------------------------------------------------- /examples/list_qpF4ratio: -------------------------------------------------------------------------------- 1 | Yoruba Papuan : Uygur Han :: Yoruba Papuan : French Han 2 | Yoruba Adygei : Uygur Han :: Yoruba Adygei : French Han 3 | -------------------------------------------------------------------------------- /examples/mklog: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/perl -w 2 | 3 | ## won't work unless ../data installed 4 | $IN = "plist" ; 5 | open (FF, "$IN") || die "badly\n" ; 6 | 7 | foreach $p () { 8 | chomp $p ; 9 | $par = "par" . $p ; 10 | $log = "$p.log" ; 11 | $pp = $p ; 12 | $pp =~ s/stat1/stat/ ; 13 | $cmd = "../bin/$pp -p $par > $log" ; 14 | print "$cmd\n" ; 15 | system "$cmd" ; 16 | } 17 | -------------------------------------------------------------------------------- /examples/parRolloff: -------------------------------------------------------------------------------- 1 | DIR: ../data/ 2 | SSS: allmap 3 | indivname: DIR/SSS.ind 4 | snpname: DIR/SSS.snp 5 | genotypename: DIR/SSS.geno 6 | poplistname: rolloff.ref 7 | admixlist: rolloff.target 8 | binsize: .001 9 | output: Uygur 10 | maxdis: 0.4 11 | seed: 77 12 | runmode: 1 13 | mincount: 4 14 | minparentcount: 0 15 | jackknife: YES 16 | -------------------------------------------------------------------------------- /examples/parqp3Pop: -------------------------------------------------------------------------------- 1 | DIR: ../data/ 2 | SSS: allmap 3 | indivname: DIR/SSS.ind 4 | snpname: DIR/SSS.snp 5 | genotypename: DIR/SSS.geno 6 | popfilename: list_qp3Pop 7 | -------------------------------------------------------------------------------- /examples/parqp4diff: -------------------------------------------------------------------------------- 1 | DIR: ../data/ 2 | SSS: allmap 3 | indivname: DIR/SSS.ind 4 | snpname: DIR/SSS.snp 5 | genotypename: DIR/SSS.geno 6 | popfilename: list_qp4diff 7 | -------------------------------------------------------------------------------- /examples/parqp4diffa: -------------------------------------------------------------------------------- 1 | DIR: ../data/ 2 | SSS: allmap 3 | indivname: DIR/SSS.ind 4 | snpname: DIR/SSS.snp 5 | genotypename: DIR/SSS.geno 6 | popfilename: list_qp4diff 7 | firstf4mult: 1.2 8 | -------------------------------------------------------------------------------- /examples/parqpAdm: -------------------------------------------------------------------------------- 1 | DIR: ../data 2 | S1: qpdata 3 | indivname: DIR/S1.ind 4 | snpname: DIR/S1.snp 5 | genotypename: DIR/S1.geno 6 | popleft: left1 7 | popright: right1 8 | details: YES ## default NO 9 | -------------------------------------------------------------------------------- /examples/parqpBound: -------------------------------------------------------------------------------- 1 | DIR: ../data/ 2 | SSS: allmap 3 | indivname: DIR/SSS.ind 4 | snpname: DIR/SSS.snp 5 | genotypename: DIR/SSS.geno 6 | popfilename: list_qpBound 7 | ## popfilename: contains output of qp3Pop (f_3 and Z-score) for the populations of interest. 8 | outpop: Yoruba 9 | ## outpop MUST be present 10 | 11 | -------------------------------------------------------------------------------- /examples/parqpDpart: -------------------------------------------------------------------------------- 1 | DIR: ../data/ 2 | SSS: allmap 3 | indivname: DIR/SSS.ind 4 | snpname: DIR/SSS.snp 5 | genotypename: DIR/SSS.geno 6 | popfilename: list_qpDpart 7 | pattern: AABAB : AABBA 8 | -------------------------------------------------------------------------------- /examples/parqpDstat: -------------------------------------------------------------------------------- 1 | DIR: ../data/ 2 | SSS: allmap 3 | indivname: DIR/SSS.ind 4 | snpname: DIR/SSS.snp 5 | genotypename: DIR/SSS.geno 6 | poplistname: list_qpDstat 7 | -------------------------------------------------------------------------------- /examples/parqpDstat1: -------------------------------------------------------------------------------- 1 | DIR: ../data/ 2 | SSS: allmap 3 | indivname: DIR/SSS.ind 4 | snpname: DIR/SSS.snp 5 | genotypename: DIR/SSS.geno 6 | popfilename: list_qpDstat1 7 | -------------------------------------------------------------------------------- /examples/parqpF4ratio: -------------------------------------------------------------------------------- 1 | DIR: ../data/ 2 | SSS: allmap 3 | indivname: DIR/SSS.ind 4 | snpname: DIR/SSS.snp 5 | genotypename: DIR/SSS.geno 6 | popfilename: list_qpF4ratio 7 | -------------------------------------------------------------------------------- /examples/parqpWave: -------------------------------------------------------------------------------- 1 | DIR: ../data 2 | S1: qpdata 3 | indivname: DIR/S1.ind 4 | snpname: DIR/S1.snp 5 | genotypename: DIR/S1.geno 6 | popleft: left1 7 | popright: right1 8 | -------------------------------------------------------------------------------- /examples/parqpff3: -------------------------------------------------------------------------------- 1 | DIR: ../data/ 2 | SSS: allmap 3 | indivname: DIR/SSS.ind 4 | snpname: DIR/SSS.snp 5 | genotypename: DIR/SSS.geno 6 | poplistname: list_qpDstat 7 | -------------------------------------------------------------------------------- /examples/plist: -------------------------------------------------------------------------------- 1 | qpF4ratio 2 | qp3Pop 3 | qpBound 4 | qpDstat1 5 | qpDstat 6 | qpWave 7 | qpAdm 8 | -------------------------------------------------------------------------------- /examples/q1: -------------------------------------------------------------------------------- 1 | parameter file: parqp4diff 2 | ### THE INPUT PARAMETERS 3 | ##PARAMETER NAME: VALUE 4 | DIR: ../data/ 5 | SSS: allmap 6 | indivname: ../data//allmap.ind 7 | snpname: ../data//allmap.snp 8 | genotypename: ../data//allmap.geno 9 | popfilename: list_qp4diff 10 | ## qp4diff version: 450 11 | fancyf4 off 12 | packed geno read OK 13 | end of inpack 14 | nplist: 1 15 | 0 Yoruba 21 16 | 1 French 26 17 | 2 Han 32 18 | 3 Uygur 9 19 | 4 Japanese 28 20 | jackknife block size: 0.050 21 | snps: 621037 indivs: 116 22 | number of blocks for block jackknife: 711 23 | result: Yoruba French Han Uygur : Yoruba French Japanese Uygur 0.000088 0.000059 1.488 621025 24 | ##end of qp4diff: 19.710 seconds cpu 447.386 Mbytes in use 25 | -------------------------------------------------------------------------------- /examples/q2: -------------------------------------------------------------------------------- 1 | parameter file: parqp4diffa 2 | ### THE INPUT PARAMETERS 3 | ##PARAMETER NAME: VALUE 4 | DIR: ../data/ 5 | SSS: allmap 6 | indivname: ../data//allmap.ind 7 | snpname: ../data//allmap.snp 8 | genotypename: ../data//allmap.geno 9 | popfilename: list_qp4diff 10 | firstf4mult: 1.2 11 | ## qp4diff version: 450 12 | fancyf4 off 13 | *** firstf4mult: 1.200 *** 14 | packed geno read OK 15 | end of inpack 16 | nplist: 1 17 | 0 Yoruba 21 18 | 1 French 26 19 | 2 Han 32 20 | 3 Uygur 9 21 | 4 Japanese 28 22 | jackknife block size: 0.050 23 | snps: 621037 indivs: 116 24 | number of blocks for block jackknife: 711 25 | result: Yoruba French Han Uygur : Yoruba French Japanese Uygur 0.000869 0.000069 12.676 621025 26 | ##end of qp4diff: 21.140 seconds cpu 447.386 Mbytes in use 27 | -------------------------------------------------------------------------------- /examples/qp3Pop.log: -------------------------------------------------------------------------------- 1 | parameter file: parqp3Pop 2 | ### THE INPUT PARAMETERS 3 | ##PARAMETER NAME: VALUE 4 | DIR: ../data/ 5 | SSS: allmap 6 | indivname: ../data//allmap.ind 7 | snpname: ../data//allmap.snp 8 | genotypename: ../data//allmap.geno 9 | popfilename: list_qp3Pop 10 | ## qp3Pop version: 300 11 | packed geno read OK 12 | nplist: 6 13 | number of blocks for block jackknife: 711 14 | snps: 621038 15 | Source 1 Source 2 Target f_3 std. err Z SNPs 16 | result: French Japanese Uygur -0.025170 0.000379 -66.384 428873 17 | result: French Han Uygur -0.025172 0.000375 -67.090 429972 18 | result: Russian Japanese Uygur -0.021377 0.000357 -59.897 428523 19 | result: Russian Han Uygur -0.021403 0.000363 -58.920 429573 20 | result: Italian Japanese Uygur -0.025345 0.000391 -64.880 424716 21 | result: Italian Han Uygur -0.025102 0.000400 -62.757 425595 22 | ##end of qp3Pop 23 | -------------------------------------------------------------------------------- /examples/qp4diff.log: -------------------------------------------------------------------------------- 1 | parameter file: parqp4diff 2 | ### THE INPUT PARAMETERS 3 | ##PARAMETER NAME: VALUE 4 | DIR: ../data/ 5 | SSS: allmap 6 | indivname: ../data//allmap.ind 7 | snpname: ../data//allmap.snp 8 | genotypename: ../data//allmap.geno 9 | popfilename: list_qp4diff 10 | ## qp4diff version: 340 11 | packed geno read OK 12 | end of inpack 13 | nplist: 1 14 | 0 Yoruba 21 15 | 1 French 26 16 | 2 Han 32 17 | 3 Uygur 9 18 | 4 Japanese 28 19 | jackknife block size: 0.050 20 | snps: 621037 indivs: 116 21 | number of blocks for moving block jackknife: 711 22 | result: Yoruba French Han Uygur : Yoruba French Japanese Uygur 0.000088 0.000059 1.488 23 | ##end of qp4diff: 17.700 seconds cpu 447.402 Mbytes in use 24 | -------------------------------------------------------------------------------- /examples/qpBound.log: -------------------------------------------------------------------------------- 1 | parameter file: parqpBound 2 | ### THE INPUT PARAMETERS 3 | ##PARAMETER NAME: VALUE 4 | DIR: ../data/ 5 | SSS: allmap 6 | indivname: ../data//allmap.ind 7 | snpname: ../data//allmap.snp 8 | genotypename: ../data//allmap.geno 9 | popfilename: list_qpBound 10 | outpop: Yoruba 11 | ## qpBound version: 120 12 | packed geno read OK 13 | nplist: 6 14 | French Japanese Uygur -0.025170 -66.384 0.447 0.531 15 | French Han Uygur -0.025172 -67.090 0.437 0.527 16 | Russian Japanese Uygur -0.021377 -59.897 0.544 0.587 17 | Russian Han Uygur -0.021403 -58.920 0.533 0.583 18 | Italian Japanese Uygur -0.025345 -64.880 0.422 0.521 19 | Italian Han Uygur -0.025102 -62.757 0.411 0.520 20 | ##end of qpBound 21 | -------------------------------------------------------------------------------- /examples/qpDstat.log: -------------------------------------------------------------------------------- 1 | parameter file: parqpDstat 2 | ### THE INPUT PARAMETERS 3 | ##PARAMETER NAME: VALUE 4 | DIR: ../data/ 5 | SSS: allmap 6 | indivname: ../data//allmap.ind 7 | snpname: ../data//allmap.snp 8 | genotypename: ../data//allmap.geno 9 | poplistname: list_qpDstat 10 | ## qpDstat version: 662 11 | packed geno read OK 12 | 0 Yoruba 21 13 | 1 French 26 14 | 2 Han 32 15 | 3 Uygur 9 16 | jackknife block size: 0.050 17 | nodata: Affx-50079445 18 | nodata: Affx-36048139 19 | nodata: Affx-50056055 20 | snps: 621035 indivs: 88 21 | number of blocks for jackknife: 711 22 | nrows, ncols: 88 621035 23 | result: Yoruba French Han Uygur 0.0410 32.300 - 30753 28328 621026 24 | result: Yoruba Han French Uygur 0.0865 51.447 - 33690 28328 621026 25 | result: Yoruba Uygur French Han 0.0456 22.403 best 33690 30753 621026 26 | ## end of run 27 | -------------------------------------------------------------------------------- /examples/qpDstat1.log: -------------------------------------------------------------------------------- 1 | parameter file: parqpDstat1 2 | ### THE INPUT PARAMETERS 3 | ##PARAMETER NAME: VALUE 4 | DIR: ../data/ 5 | SSS: allmap 6 | indivname: ../data//allmap.ind 7 | snpname: ../data//allmap.snp 8 | genotypename: ../data//allmap.geno 9 | popfilename: list_qpDstat1 10 | ## qpDstat version: 662 11 | packed geno read OK 12 | number of quadruples 2 13 | 0 Yoruba 21 14 | 1 French 26 15 | 2 Han 32 16 | 3 Japanese 28 17 | 4 Uygur 9 18 | jackknife block size: 0.050 19 | nodata: Affx-50079445 20 | snps: 621037 indivs: 116 21 | number of blocks for jackknife: 711 22 | nrows, ncols: 116 621037 23 | result: Yoruba French Han Uygur 0.0410 32.300 30753 28328 621026 24 | result: Yoruba French Japanese Uygur 0.0402 31.016 30690 28320 621026 25 | ## end of run 26 | -------------------------------------------------------------------------------- /examples/qpF4ratio.log: -------------------------------------------------------------------------------- 1 | parameter file: parqpF4ratio 2 | ### THE INPUT PARAMETERS 3 | ##PARAMETER NAME: VALUE 4 | DIR: ../data/ 5 | SSS: allmap 6 | indivname: ../data//allmap.ind 7 | snpname: ../data//allmap.snp 8 | genotypename: ../data//allmap.geno 9 | popfilename: list_qpF4ratio 10 | ## qpF4ratio version: 300 11 | packed geno read OK 12 | nplist: 2 13 | 0 Yoruba 21 14 | 1 Papuan 12 15 | 2 Uygur 9 16 | 3 Han 32 17 | 4 French 26 18 | 5 Adygei 15 19 | jackknife block size: 0.050 20 | snps: 621038 indivs: 115 21 | number of blocks for block jackknife: 711 22 | alpha std. err Z (null=0) 23 | result: Yoruba Papuan Uygur Han : Yoruba Papuan French Han 0.467437 0.015800 29.585 24 | result: Yoruba Adygei Uygur Han : Yoruba Adygei French Han 0.474091 0.011546 41.060 25 | ## end of run 26 | -------------------------------------------------------------------------------- /examples/qpWave.log: -------------------------------------------------------------------------------- 1 | parameter file: parqpWave 2 | ### THE INPUT PARAMETERS 3 | ##PARAMETER NAME: VALUE 4 | DIR: ../data 5 | S1: qpdata 6 | indivname: ../data/qpdata.ind 7 | snpname: ../data/qpdata.snp 8 | genotypename: ../data/qpdata.geno 9 | popleft: left1 10 | popright: right1 11 | ## qpWave version: 201 12 | 13 | left pops: 14 | Corded_Ware_LN 15 | WHG 16 | LBK_EN 17 | Yamnaya 18 | 19 | right pops: 20 | Han 21 | Eskimo 22 | Mbuti 23 | Karitiana 24 | Ulchi 25 | 0 Corded_Ware_LN 4 26 | 1 WHG 2 27 | 2 LBK_EN 12 28 | 3 Yamnaya 9 29 | 4 Han 33 30 | 5 Eskimo 22 31 | 6 Mbuti 10 32 | 7 Karitiana 12 33 | 8 Ulchi 25 34 | jackknife block size: 0.050 35 | snps: 354212 indivs: 129 36 | number of blocks for block jackknife: 710 37 | dof (jackknife): 606.426 38 | numsnps used: 351683 39 | f4info: 40 | f4rank: 0 dof: 12 chisq: 460.444 tail: 5.71351953e-91 dofdiff: 0 chisqdiff: 0.000 taildiff: 1 41 | 42 | f4info: 43 | f4rank: 1 dof: 6 chisq: 24.489 tail: 0.00042443713 dofdiff: 6 chisqdiff: 435.955 taildiff: 5.16806732e-91 44 | B: 45 | scale 1.000 46 | Eskimo 0.966 47 | Mbuti -1.099 48 | Karitiana 1.342 49 | Ulchi 0.246 50 | A: 51 | scale 1211.444 52 | WHG -0.427 53 | LBK_EN -1.594 54 | Yamnaya 0.525 55 | 56 | f4info: 57 | f4rank: 2 dof: 2 chisq: 0.536 tail: 0.764859692 dofdiff: 4 chisqdiff: 23.953 taildiff: 8.16355044e-05 58 | B: 59 | scale 1.000 1.000 60 | Eskimo 1.021 0.540 61 | Mbuti -0.990 1.738 62 | Karitiana 1.387 0.830 63 | Ulchi 0.233 0.001 64 | A: 65 | scale 1202.997 3105.760 66 | WHG -0.477 -1.556 67 | LBK_EN -1.581 0.634 68 | Yamnaya 0.522 0.420 69 | 70 | f4info: 71 | f4rank: 3 dof: 0 chisq: 0.000 tail: 1 dofdiff: 2 chisqdiff: 0.536 taildiff: 0.764859692 72 | B: 73 | scale 1.000 1.000 1.000 74 | Eskimo 0.988 0.490 1.642 75 | Mbuti -0.992 1.730 0.101 76 | Karitiana 1.409 0.875 -1.116 77 | Ulchi 0.231 -0.005 0.218 78 | A: 79 | scale 1193.784 3109.317 29979.194 80 | WHG -0.493 -1.570 0.541 81 | LBK_EN -1.584 0.614 0.338 82 | Yamnaya 0.498 0.398 1.610 83 | 84 | ## end of run 85 | -------------------------------------------------------------------------------- /examples/right1: -------------------------------------------------------------------------------- 1 | Han 2 | Eskimo 3 | Mbuti 4 | Karitiana 5 | ##Kharia 6 | ##Onge 7 | Ulchi 8 | -------------------------------------------------------------------------------- /examples/rolloff.par: -------------------------------------------------------------------------------- 1 | DIR: /groups/reich/eleftheria/AdmixTools/ 2 | indivname: DIR/majfas_mindepth_3_compiled_0.1_eigenstrat.ind 3 | snpname: DIR/majfas_mindepth_3_compiled_0.1_eigenstrat_mod.snp 4 | genotypename: DIR/majfas_mindepth_3_compiled_0.1_eigenstrat.geno 5 | poplistname: elephant.ref 6 | admixlist: elephant.target 7 | binsize: .001 8 | output: Uygur 9 | maxdis: 0.4 10 | seed: 77 11 | runmode: 1 12 | mincount: 4 13 | minparentcount: 0 14 | jackknife: YES 15 | chithresh: 6.000 16 | numchrom: 28 17 | checkmap: NO 18 | -------------------------------------------------------------------------------- /examples/rolloff.ref: -------------------------------------------------------------------------------- 1 | French 2 | Han 3 | -------------------------------------------------------------------------------- /examples/rolloff.target: -------------------------------------------------------------------------------- 1 | Uygur 2 | -------------------------------------------------------------------------------- /examples/xx:qp4diff: -------------------------------------------------------------------------------- 1 | parameter file: parqp4diff 2 | ### THE INPUT PARAMETERS 3 | ##PARAMETER NAME: VALUE 4 | DIR: ../data/ 5 | SSS: allmap 6 | indivname: ../data//allmap.ind 7 | snpname: ../data//allmap.snp 8 | genotypename: ../data//allmap.geno 9 | popfilename: list_qp4diff 10 | ## qp4diff version: 340 11 | packed geno read OK 12 | end of inpack 13 | nplist: 1 14 | 0 Yoruba 21 15 | 1 French 26 16 | 2 Han 32 17 | 3 Uygur 9 18 | 4 Japanese 28 19 | jackknife block size: 0.050 20 | snps: 621037 indivs: 116 21 | number of blocks for moving block jackknife: 711 22 | result: Yoruba French Han Uygur : Yoruba French Japanese Uygur 0.000088 0.000059 1.488 23 | ##end of qp4diff: 17.700 seconds cpu 447.402 Mbytes in use 24 | -------------------------------------------------------------------------------- /examples/xx:qp4diffa: -------------------------------------------------------------------------------- 1 | parameter file: parqp4diffa 2 | ### THE INPUT PARAMETERS 3 | ##PARAMETER NAME: VALUE 4 | DIR: ../data/ 5 | SSS: allmap 6 | indivname: ../data//allmap.ind 7 | snpname: ../data//allmap.snp 8 | genotypename: ../data//allmap.geno 9 | popfilename: list_qp4diff 10 | firstf4mult: 1.2 11 | ## qp4diff version: 340 12 | packed geno read OK 13 | end of inpack 14 | nplist: 1 15 | 0 Yoruba 21 16 | 1 French 26 17 | 2 Han 32 18 | 3 Uygur 9 19 | 4 Japanese 28 20 | jackknife block size: 0.050 21 | snps: 621037 indivs: 116 22 | number of blocks for moving block jackknife: 711 23 | result: Yoruba French Han Uygur : Yoruba French Japanese Uygur 0.000869 0.000069 12.676 24 | ##end of qp4diff: 17.640 seconds cpu 447.402 Mbytes in use 25 | -------------------------------------------------------------------------------- /examples/xx:qpff3: -------------------------------------------------------------------------------- 1 | parameter file: parqpff3 2 | ### THE INPUT PARAMETERS 3 | ##PARAMETER NAME: VALUE 4 | DIR: ../data/ 5 | SSS: allmap 6 | indivname: ../data//allmap.ind 7 | snpname: ../data//allmap.snp 8 | genotypename: ../data//allmap.geno 9 | poplistname: list_qpDstat 10 | ## qpff3base version: 331 11 | seed: 24920198 12 | packed geno read OK 13 | end of inpack 14 | 0 Yoruba 15 | 1 French 16 | 2 Han 17 | 3 Uygur 18 | outpop: Yoruba basepop: Yoruba 19 | nodata: Affx-50079445 20 | nodata: Affx-36048139 21 | nodata: Affx-50056055 22 | population: 0 Yoruba 21 23 | population: 1 French 26 24 | population: 2 Han 32 25 | population: 3 Uygur 9 26 | before setwt numsnps: 621038 27 | setwt numsnps: 492519 28 | number of blocks for moving block jackknife: 710 29 | snps: 492519 indivs: 88 30 | lambdascale: 0.342 31 | statistics multiplied by 1000 32 | fst: 33 | Y F H U 34 | Y 0 144 173 134 35 | F 144 0 110 34 36 | H 173 110 0 32 37 | U 134 34 32 0 38 | 39 | f2: 40 | Y F H U 41 | Y 0 153 182 142 42 | F 153 0 96 28 43 | H 182 96 0 26 44 | U 142 28 26 0 45 | 46 | ff3 (unscaled): 47 | Y F H U 48 | Y 0 0 0 0 49 | F 0 448 350 391 50 | H 0 350 534 437 51 | U 0 391 437 417 52 | number of good snps: 492519 53 | ff3: 54 | Y F H U 55 | Y 0 0 0 0 56 | F 0 153 120 134 57 | H 0 120 182 149 58 | U 0 134 149 142 59 | 60 | ff3sig*10: 61 | Y F H U 62 | Y 0 0 0 0 63 | F 0 14 12 13 64 | H 0 12 16 14 65 | U 0 13 14 14 66 | 67 | 68 | ff3 (base: Yoruba) 69 | Y F H U 70 | Y 0 0 0 0 71 | F 0 153 120 134 72 | H 0 120 182 149 73 | U 0 134 149 142 74 | 75 | 76 | ff3 (base: French) 77 | Y F H U 78 | Y 153 0 33 19 79 | F 0 0 0 0 80 | H 33 0 96 49 81 | U 19 0 49 28 82 | 83 | 84 | ff3 (base: Han) 85 | Y F H U 86 | Y 182 63 0 33 87 | F 63 96 0 47 88 | H 0 0 0 0 89 | U 33 47 0 26 90 | 91 | 92 | ff3 (base: Uygur) 93 | Y F H U 94 | Y 142 9 -7 0 95 | F 9 28 -21 0 96 | H -7 -21 26 0 97 | U 0 0 0 0 98 | 99 | 100 | ##end of qpff3base 101 | -------------------------------------------------------------------------------- /fancy.txt: -------------------------------------------------------------------------------- 1 | Consider f4(l1, l2, r1, r2) 2 | Suppose at some snp l2 is missing, but the allele frequencies of r1, r2 match. Then whatever l2 f4 is zero 3 | Similarly f4=0 if the frequncy of l1 and l2 match, 4 | 5 | qpAdm 6 | qpWave 7 | qpF4ratio 8 | qpf4diff 9 | 10 | use this trick. 11 | 12 | Arie Shaus pointed out to me that this can bias f4 down toward 0. 13 | To disable, code in the parameter file 14 | 15 | fancyf4: NO 16 | 17 | Nick 1/29/19 18 | -------------------------------------------------------------------------------- /halfscore.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DReichLab/AdmixTools/b10ddcfdf43792125052c8bc8b8a96cf5335eb56/halfscore.pdf -------------------------------------------------------------------------------- /pdoc.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DReichLab/AdmixTools/b10ddcfdf43792125052c8bc8b8a96cf5335eb56/pdoc.pdf -------------------------------------------------------------------------------- /perlsrc/dof4: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | ($par) = @ARGV ; 4 | ## $par should be of form parzzzz and list zzzzlist 5 | die "no param file\n" unless (defined $par) ; 6 | die "no param file\n" unless (-r $par) ; 7 | $ss = $par; $ss =~ s/par// ; 8 | $list = `grabpars -p $par -x popfilename: ` ; 9 | chomp $list ; 10 | die "$list not found\n" unless (-r $list) ; 11 | $nlines = `numlines $list` ; chomp $nlines ; 12 | print "nlines: $nlines\n" ; 13 | die "no data\n" unless ($nlines > 0) ; 14 | $lo = 0 ; 15 | $hi = 24 ; 16 | $num = 1000 ; ## easier to sort afterwards 17 | for (;;) { 18 | last if ($lo > $nlines) ; 19 | $out = "xx:$ss:$num" ; 20 | unlink $out ; 21 | system "bsubs -o $out MEM16 qpDstat -p $par -L $lo -H $hi" ; 22 | ++$num ; 23 | $lo += 25 ; 24 | $hi += 25 ; 25 | } 26 | -------------------------------------------------------------------------------- /perlsrc/dof4a: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | ($par) = @ARGV ; 4 | ## $par should be of form parzzzz and list zzzzlist 5 | die "no param file\n" unless (defined $par) ; 6 | die "no param file\n" unless (-r $par) ; 7 | $ss = $par; $ss =~ s/par// ; 8 | $list = `grabpars -p $par -x popfilename: ` ; 9 | chomp $list ; 10 | die "$list not found\n" unless (-r $list) ; 11 | $nlines = `numlines $list` ; chomp $nlines ; 12 | print "nlines: $nlines\n" ; 13 | die "no data\n" unless ($nlines > 0) ; 14 | $lo = 0 ; 15 | $hi = 24 ; 16 | $num = 1000 ; ## easier to sort afterwards 17 | for (;;) { 18 | last if ($lo > $nlines) ; 19 | $out = "xx:$ss:$num" ; 20 | unlink $out ; 21 | ## system "bsubs -o $out MEM16 qpDstat -p $par -L $lo -H $hi" ; 22 | system "qpDstat -p $par -L $lo -H $hi > $out &" ; 23 | ++$num ; 24 | $lo += 25 ; 25 | $hi += 25 ; 26 | } 27 | -------------------------------------------------------------------------------- /perlsrc/fxtract: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | use Getopt::Std ; 4 | 5 | getopts('p:V',\%opts) ; 6 | 7 | $parname = $opts{"p"} ; 8 | $verbose = $opts{"V"} ; 9 | 10 | die ("Usage: fxtract -p ") unless (defined $parname) ; 11 | 12 | $dir = `mktmp ddd 71` ; 13 | $tname = "tname.$$" ; 14 | $ttname = "ttname.$$" ; 15 | $pwd = `pwd` ; 16 | ### $X = "/n/groups/reich/matt/pipeline/code/nick/pull_jan08/script" ; 17 | 18 | chomp ($pwd, $dir, $tname) ; 19 | 20 | $instem = `grabpars -p $parname -x "instem:" ` ; 21 | $outstem = `grabpars -p $parname -x "outstem:" ` ; 22 | $poplist = `grabpars -p $parname -x "poplistname:" ` ; 23 | $tempdir = `grabpars -p $parname -x "tempdir:" ` ; 24 | $indlist = `grabpars -p $parname -x "indivlistname:" ` ; 25 | 26 | chomp $tempdir if (defined $tempdir) ; 27 | undef ($tempdir) if ($tempdir =~ /NOTFOUND/) ; 28 | $dir = $tempdir if (defined $tempdir) ; 29 | printf "mkdir -p $dir\n" ; 30 | mkdir $dir ; 31 | ##system "mkdir -p $dir" ; 32 | 33 | chomp ($instem, $outstem, $poplist, $indlist) ; 34 | 35 | 36 | $ilist = "fxt:$$.ind" ; 37 | $INDLIST = "$dir/$ilist" ; 38 | 39 | if ($indlist =~ /NOTFOUND/) { 40 | die "no poplist or indlist\n" if ($poplist =~ /NOTFOUND/) ; 41 | system "grepcolf 2 $poplist < $instem.ind | xtractcol 0 > $INDLIST" ; 42 | } 43 | else { 44 | system "grepcolf 0 $indlist < $instem.ind | xtractcol 0 > $INDLIST" ; 45 | } 46 | 47 | $S = "single_sample.py" ; 48 | $SG = "single_sample_tgeno.py" ; 49 | $M = "merge_adaptive_pulldown.py" ; 50 | 51 | open (I1, $INDLIST) || die ("bad $INDLIST\n") ; 52 | 53 | $SX = $S ; 54 | $tgfile = "$instem.tgeno" ; 55 | $SX = $SG if (-e $tgfile) ; 56 | print "$tgfile\n$SX\n" ; 57 | system "cp $instem.snp $dir/$tname.snp" ; 58 | foreach $line () { 59 | ($id) = split " ", $line ; 60 | ## system "python3 $S -i $instem -s $id -o $dir/$id >& /dev/null" ; 61 | ## system "grepcol 0 $id < $smallind > $dir.$id.ind" ; 62 | system "python3 $SX -x -i $instem -s $id -o $dir/$id " if (defined $verbose); 63 | system "python3 $SX -x -i $instem -s $id -o $dir/$id >& /dev/null" unless (defined $verbose); 64 | print "$id extracted\n" ; 65 | } 66 | 67 | close I1 ; 68 | 69 | open (I1, $INDLIST) || die ("bad $INDLIST\n") ; 70 | 71 | chdir $dir ; 72 | foreach $line () { 73 | ($id) = split " ", $line ; 74 | system "ln -f $tname.snp $id.snp" ; 75 | } 76 | 77 | system "python3 $M -f $ilist -o $ttname" ; 78 | ##print "got here\n" ; 79 | chdir $pwd ; 80 | 81 | system " cp $dir/$ttname.snp $outstem.snp" ; 82 | system " cp $dir/$ttname.ind $outstem.ind" ; 83 | system " cp $dir/$ttname.geno $outstem.geno" ; 84 | 85 | unlink $dir unless (defined $tempdir) ; 86 | -------------------------------------------------------------------------------- /perlsrc/getresult: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | ($IN, $OUT, $tag) = @ARGV ; 4 | $tag = "result:" unless (defined $tag) ; 5 | system "fgrep $tag < $IN | xtractcolv 0 | mkpretty > $OUT" ; 6 | $num = `cat $OUT | wc --lines ` ; chomp $num ; 7 | print "number of results: $num\n" ; 8 | -------------------------------------------------------------------------------- /perlsrc/jackdiff: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | use Getopt::Std ; 4 | getopts('a:b:o:',\%opts) ; 5 | 6 | $fa = $opts{"a"} ; 7 | $fb = $opts{"b"} ; 8 | $fout = $opts{"o"} ; 9 | 10 | die "no -a parameter\n" unless (defined $fa) ; 11 | die "no -b parameter\n" unless (defined $fb) ; 12 | 13 | open (I1, "$fa") || die "can't open $fa\n" ; 14 | open (I2, "$fb") || die "can't open $fa\n" ; 15 | 16 | $maxa = -1 ; 17 | foreach $line () { 18 | ($a, $w, $v) = split " ", $line ; 19 | next if ($a =~ /\#/) ; 20 | next unless (defined $v) ; 21 | $WA[$a] = $w ; 22 | $VA[$a] = $v ; 23 | } 24 | foreach $line () { 25 | ($a, $w, $v) = split " ", $line ; 26 | next if ($a =~ /\#/) ; 27 | next unless (defined $v) ; 28 | $WB[$a] = $w ; 29 | $VB[$a] = $v ; 30 | $maxa = $a ; 31 | } 32 | $T1 = "/tmp/t1.$$" ; 33 | open (Y1, ">$T1") ; 34 | 35 | foreach $a (0..$maxa) { 36 | $wa = $WA[$a] ; 37 | $wb = $WB[$a] ; 38 | $va = $VA[$a] ; 39 | $vb = $VB[$a] ; 40 | die "mismatched weights: line index $a\n" if ($wa != $wb) ; 41 | printf Y1 "%d %d %12.6f\n", $a, $wa, $va - $vb ; 42 | } 43 | close Y1 ; 44 | system "mkpretty $T1 $fout" if (defined $fout) ; 45 | printf "##jackknife difference $fa - $fb\n" ; 46 | system "simpjack2 -i $T1 -v" ; 47 | -------------------------------------------------------------------------------- /perlsrc/mkpretty: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | ## mkpretty infile outfile 4 | 5 | ($infile, $outfile) = @ARGV ; 6 | $T1 = "/tmp/t1.$$" ; 7 | $T2 = "/tmp/t2.$$" ; 8 | 9 | if (defined $infile) { 10 | die "Usage: mkpretty < infile > outfile OR mkpretty infile outfile\n" unless (defined $outfile) ; 11 | system "cp $infile $T1" ; 12 | } 13 | else { 14 | system "cat - > $T1" ; 15 | } 16 | open (F1, $T1) || die "badly\n" ; 17 | for (;;) { 18 | last if (eof(F1)) ; 19 | $line = ; 20 | chomp $line ; 21 | (@Z) = split " ", $line ; 22 | next unless (defined $Z[0]) ; 23 | next if ($Z[0] =~ /\#/) ; 24 | $n = @Z ; 25 | foreach $x (0..$n-1) { 26 | $W[$x] = 0 unless (defined ($W[$x])) ; 27 | $w = length($Z[$x]) ; 28 | $W[$x] = $w if ($w > $W[$x]) ; 29 | } 30 | } 31 | ## @W is maximum width 32 | close F1 ; 33 | $nw = @W ; 34 | foreach $x (0..$nw-1) { 35 | $w = $W[$x] ; 36 | $fmt = "%" . $w . "s" ; 37 | $F[$x] = $fmt ; 38 | } 39 | 40 | open (F1, $T1) || die "badly\n" ; 41 | open (YY, ">$T2") || die "very badly\n" ; 42 | for (;;) { 43 | last if (eof(F1)) ; 44 | $line = ; 45 | chomp $line ; 46 | (@Z) = split " ", $line ; 47 | next unless (defined $Z[0]) ; 48 | if ($Z[0] =~ /\#/) { 49 | print YY "$line\n"; 50 | next ; 51 | } 52 | $n = @Z ; 53 | foreach $x (0..$n-1) { 54 | printf YY "$F[$x]", $Z[$x] ; 55 | print YY " ", if ($x != ($n-1)) ; 56 | } 57 | print YY "\n" ; 58 | } 59 | if (defined $outfile) { 60 | system "cp $T2 $outfile" ; 61 | } 62 | else { 63 | system "cat $T2" ; 64 | } 65 | unlink ($T1, $T2) ; 66 | 67 | -------------------------------------------------------------------------------- /perlsrc/numlines: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | ($IN) = @ARGV ; 4 | $xx = `wc --lines $IN` ; 5 | ($a) = split " ", $xx ; 6 | print "$a\n" ; 7 | -------------------------------------------------------------------------------- /perlsrc/xtractcol: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | ($COLS, $xflag) = @ARGV ; 4 | 5 | die "no columns\n" unless (defined $COLS) ; 6 | if ($COLS =~ /:/) { 7 | @CC = split ":", $COLS ; 8 | } 9 | else { 10 | $CC[0] = $COLS ; 11 | } 12 | 13 | for (;;) { 14 | last if (eof(STDIN)) ; 15 | $line = ; 16 | chomp $line ; 17 | @XX = split " ", $line ; 18 | if (!defined $XX[0]) {print "$line\n" if ($xflag); next; } ## skip blanks 19 | $f = $XX[0] ; 20 | if ($f =~ /\#/) {print "$line\n" if ($xflag); next; } 21 | $ncc = @CC ; 22 | foreach $x (1..$ncc) { 23 | $nn = $CC[$x-1] ; 24 | $n = $nn ; 25 | $xx = @XX ; 26 | $n = $xx + $nn if ($nn<0) ; ## -1 is last column 27 | die "bad col $n :: $line\n" unless (defined $XX[$n]) ; 28 | print "$XX[$n]" ; 29 | print " " unless ($x == $ncc) ; 30 | } 31 | print "\n" ; 32 | } 33 | -------------------------------------------------------------------------------- /perlsrc/xtractcoltab: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | ($COLS, $xflag) = @ARGV ; 4 | 5 | if ($COLS =~ /:/) { 6 | @CC = split ":", $COLS ; 7 | } 8 | else { 9 | $CC[0] = $COLS ; 10 | } 11 | 12 | for (;;) { 13 | last if (eof(STDIN)) ; 14 | $line = ; 15 | chomp $line ; 16 | @XX = split "\t", $line ; 17 | if (!defined $XX[0]) {print "$line\n" if ($xflag); next; } ## skip blanks 18 | $f = $XX[0] ; 19 | if ($f =~ /\#/) {print "$line\n" if ($xflag); next; } 20 | $num = 0 ; 21 | foreach $n (@CC) { 22 | die "bad col $n :: $line\n" unless (defined $XX[$n]) ; 23 | print "\t" unless ($num==0) ; 24 | print "$XX[$n]" ; 25 | ++$num ; 26 | } 27 | print "\n" ; 28 | } 29 | -------------------------------------------------------------------------------- /perlsrc/xtractcolv: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | ($COLS, $xflag) = @ARGV ; 4 | 5 | if ($COLS =~ /:/) { 6 | @CC = split ":", $COLS ; 7 | } 8 | else { 9 | $CC[0] = $COLS ; 10 | } 11 | 12 | for (;;) { 13 | last if (eof(STDIN)) ; 14 | $line = ; 15 | chomp $line ; 16 | @XX = split " ", $line ; 17 | if (!defined $XX[0]) {print "$line\n" if (defined $xflag); next; } ## skip blanks 18 | if ($XX[0] =~ /\#/) {print "$line\n" if (defined $xflag); next; } 19 | foreach $n (@CC) { 20 | die "bad col $n :: $line\n" unless (defined $XX[$n]) ; 21 | $XX[$n] = " " ; 22 | } 23 | $out = join " ", @XX ; 24 | print "$out" ; 25 | print "\n" ; 26 | } 27 | -------------------------------------------------------------------------------- /perlsrc/xtractcolx: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | ($COLS, $xflag) = @ARGV ; 4 | 5 | die "no columns\n" unless (defined $COLS) ; 6 | if ($COLS =~ /:/) { 7 | @CC = split ":", $COLS ; 8 | } 9 | else { 10 | $CC[0] = $COLS ; 11 | } 12 | 13 | for (;;) { 14 | last if (eof(STDIN)) ; 15 | $line = ; 16 | chomp $line ; 17 | @XX = split " ", $line ; 18 | if (!defined $XX[0]) {print "$line\n" if ($xflag); next; } ## skip blanks 19 | $f = $XX[0] ; 20 | if ($f =~ /\#/) {print "$line\n" if ($xflag); next; } 21 | $ncc = @CC ; 22 | $bad = 0 ; 23 | foreach $x (1..$ncc) { 24 | $nn = $CC[$x-1] ; 25 | $n = $nn ; 26 | $xx = @XX ; 27 | $n = $xx + $nn if ($nn<0) ; ## -1 is last column 28 | $bad = 1 if (!defined $XX[$n]) ; 29 | } 30 | if ($bad == 1) { 31 | print STDERR "bad line: $line\n" ; 32 | next ; 33 | } 34 | foreach $x (1..$ncc) { 35 | last if ($bad == 1) ; 36 | $nn = $CC[$x-1] ; 37 | $n = $nn ; 38 | $xx = @XX ; 39 | $n = $xx + $nn if ($nn<0) ; ## -1 is last column 40 | die "bad col $n :: $line\n" unless (defined $XX[$n]) ; 41 | print "$XX[$n]" ; 42 | print " " unless ($x == $ncc) ; 43 | } 44 | print "\n" ; 45 | } 46 | -------------------------------------------------------------------------------- /qpGraph_hints.txt: -------------------------------------------------------------------------------- 1 | Hints on using qpGraph 2 | ---------------------- 3 | 1) After writing a new graph use qpreroot -g graph -d junk.dot to check parsing and that the graph is 4 | what you want. 5 | 6 | 2) optional modes 7 | a) 8 | inbreed: YES Use if some populations are pseudo-diploid and all population sizes >= 2 9 | Without this single samples (like MA1) can bus used, but the edge-length leading to the leaf 10 | is not meaningful. 11 | b) 12 | useallsnps: YES 13 | if coverage is low. But this is theoretically dubious. 14 | Default is that SNPs are only used if 15 | not monomorphic and data is present for all populations. 16 | 3) 17 | On can fix admixture proportions by specifying 18 | lock V 19 | where V is a vertex that is admixed. 20 | This is sometimes useful. 21 | 22 | 4) 23 | For very large graphs the program sometimes has trouble finding the globally best fit. 24 | 25 | 5) edge lengths of zero suggest that the graph should perhaps be modified. 26 | 27 | A -> B of length 0 suggests that maybe B should be ancestral to A 28 | 29 | ====================================================================== 30 | Biggest problem: How good is the fit? Arbitrarily good fits can be 31 | obtained by adding enough admixture events. 32 | 33 | Nick 6/10/17 34 | 35 | -------------------------------------------------------------------------------- /qpfs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DReichLab/AdmixTools/b10ddcfdf43792125052c8bc8b8a96cf5335eb56/qpfs.pdf -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | backup 2 | convertf 3 | copych 4 | diff.out 5 | diffch 6 | dirlist 7 | dowtjack 8 | easycheck 9 | easylite 10 | easystats 11 | expfit.sh 12 | gcount 13 | geno_single 14 | gmon.out 15 | grabpars 16 | kimf 17 | libnick.a 18 | mergeit 19 | merge_transpose 20 | multimerge 21 | nickhash 22 | nicktmp 23 | oldmake 24 | oldmakefile 25 | q1 26 | qaz 27 | qp3Pop 28 | qp4diff 29 | qpAdm 30 | qpBound 31 | qpDpart 32 | qpdslow 33 | qpDstat 34 | qpff3base 35 | qpfmv 36 | qpfstats 37 | qpF4ratio 38 | qpGraph 39 | qpWave 40 | qpreroot 41 | rolloff 42 | rolloffp 43 | simpjack2 44 | snpunion 45 | tables 46 | transpose 47 | qpmix 48 | -------------------------------------------------------------------------------- /src/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009-2016, Broad Institute, Inc. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * 8 | Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | 12 | * 13 | Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | 18 | * 19 | Neither the name Broad Institute, Inc. nor the names of its 20 | contributors may be used to endorse or promote products derived from 21 | this software without specific prior written permission. 22 | 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 24 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 27 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 29 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 32 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE 33 | -------------------------------------------------------------------------------- /src/add_extern_C.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script modifies headers to allow C++ linking. 4 | 5 | # cd rolloffdir 6 | for header_file in *.h nicksrc/*.h 7 | do 8 | echo $header_file 9 | mv $header_file $header_file.bak 10 | echo '#ifdef __cplusplus 11 | extern "C" { 12 | #endif' | cat - $header_file.bak > $header_file 13 | echo '#ifdef __cplusplus 14 | } 15 | #endif' >> $header_file 16 | done 17 | -------------------------------------------------------------------------------- /src/badpairs.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | #include 5 | #include "admutils.h" 6 | 7 | void dobadpairs(char *badpairsname, SNP **snpm, int numsnps) ; 8 | void dogood(char *goodsnpname, SNP **snpm, int numsnps) ; 9 | void getsnpsc(char *snpscname, SNP **snpm, int numsnps) ; 10 | int killsnps(Indiv **indivmarkers, SNP **snpmarkers, int numsnps, int mincasenum) ; 11 | void loadbadpsc(SNP **snpm, int numsnps, int rmode, char *gname) ; 12 | 13 | double entrop(double *a, int n) ; 14 | double xxlog2(double t) ; 15 | double mutx(double *dd) ; 16 | double mutxx(double *dd, int m , int n) ; 17 | 18 | 19 | #ifdef __cplusplus 20 | } 21 | #endif 22 | -------------------------------------------------------------------------------- /src/diffmean.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | char *iname = NULL, *jname = NULL; 7 | int xflag = NO ; 8 | int terse = NO ; 9 | 10 | int getd (char *name, double *mm, double *vv, int n) ; 11 | void readcommands(int argc, char **argv) ; 12 | 13 | int main(int argc, char **argv) 14 | { 15 | 16 | int n1, n2, n, nx ; 17 | double *m1, *var1, *m2, *var2, *dm, *dv, *dq, ychi, ytail ; 18 | 19 | if (argc == 3) { 20 | iname = strdup(argv[1]) ; 21 | jname = strdup(argv[2]) ; 22 | } 23 | 24 | else { 25 | readcommands(argc, argv) ; 26 | } 27 | if (iname==NULL) printf ("Usage: diffmean -i in1 -j in2 [-x]\n") ; 28 | if (jname==NULL) printf ("Usage: diffmean -i in1 -j in2 [-x]\n") ; 29 | 30 | if ((iname == NULL) || (jname == NULL)) return -1 ; 31 | n = n1 = numcols(iname) ; 32 | n2 = numcols(jname) ; 33 | if (n1 != n2) fatalx("mismatch source number\n") ; 34 | ZALLOC(m1, n, double) ; 35 | ZALLOC(m2, n, double) ; 36 | ZALLOC(var1, n*n, double) ; 37 | ZALLOC(var2, n*n, double) ; 38 | 39 | nx = getd(iname, m1, var1, n) ; 40 | nx = getd(jname, m2, var2, n) ; 41 | 42 | if (terse == NO) { 43 | printmat(m1, 1, nx) ; 44 | printmat(m2, 1, nx) ; 45 | 46 | printnl() ; 47 | printmatl(var1, nx, nx) ; 48 | printmatl(var2, nx, nx) ; 49 | 50 | } 51 | 52 | ZALLOC(dm, nx, double) ; 53 | ZALLOC(dv, nx*nx, double) ; 54 | ZALLOC(dq, nx*nx, double) ; 55 | 56 | vvm(dm, m1, m2, nx) ; 57 | vvp(dv, var1, var2, nx*nx) ; 58 | pdinv(dq, dv, nx) ; 59 | 60 | ychi = scx(dq, NULL, dm, nx) ; 61 | ytail = rtlchsq(nx, ychi) ; 62 | 63 | if (terse == NO) { 64 | printf("%15s ", iname) ; printmat(m1, 1, nx) ; 65 | printf("%15s ", jname) ; printmat(m2, 1, nx) ; 66 | printf("%15s ", "diff") ; printmat(dm, 1, nx) ; 67 | printf("result: dof: %d chisq: %9.3f tail: %12.6f\n", nx, ychi, ytail) ; 68 | } 69 | 70 | else { 71 | printf(" %9.3f ", m1[0]) ; 72 | printf(" %9.3f ", m2[0]) ; 73 | printmatx(dm, 1, nx) ; 74 | printf(" %12.6f", ytail) ; 75 | printnl() ; 76 | } 77 | 78 | return 0; 79 | 80 | } 81 | 82 | int getd (char *name, double *mm, double *vv, int n) 83 | { 84 | double **xx ; 85 | int j, k, kk ; 86 | int nx ; 87 | 88 | xx = initarray_2Ddouble(n+1, n+1, 0) ; 89 | nx = n-1 ; 90 | if (xflag) ++nx ; 91 | 92 | getxx(xx, n+1, n, name) ; 93 | for (j=0; j 2 | #include 3 | #include 4 | #include 5 | 6 | #define MAXSTR 512 7 | #define MAXFF 20 8 | 9 | char *iname = NULL ; 10 | char *oname = NULL ; 11 | double mean; 12 | 13 | int terse = NO ; 14 | 15 | void readcommands(int argc, char **argv) ; 16 | void callwtjack(char *iname, char *oname); 17 | 18 | int main(int argc, char **argv) 19 | { 20 | readcommands(argc, argv) ; 21 | callwtjack(iname, oname); 22 | } 23 | 24 | void readcommands(int argc, char **argv) 25 | { 26 | int i; 27 | 28 | while ((i = getopt (argc, argv, "i:o:m:t:")) != -1) 29 | { 30 | switch (i) 31 | { 32 | 33 | case 'i': 34 | iname = strdup(optarg) ; 35 | break; 36 | 37 | case 'o': 38 | oname = strdup(optarg) ; 39 | break; 40 | 41 | case 'm': 42 | mean = atof(optarg) ; 43 | break; 44 | 45 | case 't': 46 | terse = NO ; 47 | break; 48 | 49 | case '?': 50 | printf ("Usage: bad params.... \n") ; 51 | fatalx("bad params\n") ; 52 | } 53 | } 54 | } 55 | 56 | 57 | void 58 | callwtjack(char *iname, char *oname) 59 | { 60 | FILE *ifile, *ofile ; 61 | char line[MAXSTR]; 62 | char *sx; 63 | char *spt[MAXFF]; 64 | int nsplit, len, i, k; 65 | char c; 66 | 67 | openit(iname, &ifile, "r") ; 68 | openit(oname, &ofile, "w") ; 69 | 70 | double *jwt, *jmean; 71 | 72 | /* output variable */ 73 | double est, sig; // NB 74 | est =0; sig=0; 75 | 76 | /*input variables */ 77 | len = numlines(iname); 78 | ZALLOC(jwt, len, double); 79 | ZALLOC(jmean, len, double); 80 | k = 0; 81 | 82 | /* read input file and store data */ 83 | while (fgets(line,MAXSTR,ifile) != NULL) 84 | { 85 | nsplit = splitup(line, spt, MAXFF); 86 | sx = spt[0]; 87 | c = sx[0]; 88 | if (c == '#') { 89 | freeup(spt, nsplit); 90 | continue; 91 | } 92 | 93 | jwt[k] = atof(spt[1]); 94 | jmean[k] = atof(spt[2]); 95 | //printf("mean: %9.3f len: %9.3f\n", jmean[k], jwt[k]) ; 96 | k++; 97 | freeup(spt, nsplit); 98 | } 99 | len = k ; // better style who knows how numlines handles commas 100 | fclose(ifile) ; 101 | // printf("mean: %9.3f len: %d\n", mean, len) ; 102 | 103 | 104 | /*call weightjack */ 105 | weightjack(&est, &sig, mean, jmean, jwt, len); 106 | if (terse) { 107 | fprintf(ofile,"%9.3f ", est); // d format ?? 108 | fprintf(ofile,"%9.3f", sig); 109 | fprintf(ofile,"\n"); 110 | } 111 | else { 112 | fprintf(ofile,"mean: %9.3f ", est); // d format ?? 113 | fprintf(ofile,"std error: %9.3f ", sig); 114 | fprintf(ofile,"Z: %9.3f", est/sig); 115 | fprintf(ofile,"\n"); 116 | } 117 | free(jmean); 118 | free(jwt); 119 | fclose(ofile) ; 120 | 121 | } 122 | -------------------------------------------------------------------------------- /src/egsubs.c: -------------------------------------------------------------------------------- 1 | #include "mcio.h" 2 | #include "egsubs.h" 3 | 4 | 5 | int 6 | makeeglist (char **eglist, int maxnumeg, Indiv ** indivmarkers, int numindivs) 7 | // old routine mkeglist 8 | { 9 | 10 | Indiv *indx; 11 | int i, k, numeg = 0; 12 | for (i = 0; i < numindivs; i++) { 13 | indx = indivmarkers[i]; 14 | if (indx->ignore) 15 | continue; 16 | k = indxindex (eglist, numeg, indx->egroup); 17 | if (k < 0) { 18 | if (numeg >= maxnumeg) { 19 | printf 20 | ("number of populations too large. Increase maxpops if you wish\n"); 21 | fatalx 22 | ("(makeeglist) You really want to analyse more than %d populations?\n", 23 | maxnumeg); 24 | } 25 | eglist[numeg] = strdup (indx->egroup); 26 | ++numeg; 27 | } 28 | } 29 | return numeg; 30 | } 31 | 32 | int 33 | mkeglist (Indiv ** indm, int numindivs, char **eglist) 34 | { 35 | Indiv *indx; 36 | int i, k, numeg = 0; 37 | for (i = 0; i < numindivs; i++) { 38 | indx = indm[i]; 39 | if (indx->ignore) 40 | continue; 41 | k = indxindex (eglist, numeg, indx->egroup); 42 | if (k < 0) { 43 | eglist[numeg] = strdup (indx->egroup); 44 | ++numeg; 45 | } 46 | } 47 | return numeg; 48 | } 49 | 50 | int 51 | loadlist_type (char **list, char *listname, int *ztypes, int off) 52 | // listname is just a list of names ... 53 | { 54 | FILE *lfile; 55 | char line[MAXSTR]; 56 | char *spt[MAXFF]; 57 | char *sx; 58 | Indiv *indx; 59 | int nsplit, i, n = 0, tt; 60 | 61 | if (listname == NULL) 62 | return 0; 63 | openit (listname, &lfile, "r"); 64 | while (fgets (line, MAXSTR, lfile) != NULL) { 65 | nsplit = splitup (line, spt, MAXFF); 66 | if (nsplit == 0) 67 | continue; 68 | sx = spt[0]; 69 | if (sx[0] == '#') { 70 | freeup (spt, nsplit); 71 | continue; 72 | } 73 | if (nsplit < 2) 74 | fatalx ("bad listname: %s\n", sx); 75 | list[n] = strdup (sx); 76 | tt = atoi (spt[1]); 77 | ztypes[n] = tt + off; 78 | ++n; 79 | freeup (spt, nsplit); 80 | } 81 | return n; 82 | } 83 | 84 | 85 | void 86 | seteglist (Indiv ** indm, int nindiv, char *eglistname) 87 | { 88 | FILE *egfile; 89 | char line[MAXSTR]; 90 | char *spt[MAXFF]; 91 | char *sx; 92 | Indiv *indx; 93 | int nsplit, i; 94 | 95 | if (eglistname == NULL) 96 | return; 97 | openit (eglistname, &egfile, "r"); 98 | while (fgets (line, MAXSTR, egfile) != NULL) { 99 | nsplit = splitup (line, spt, MAXFF); 100 | if (nsplit == 0) 101 | continue; 102 | sx = spt[0]; 103 | if (sx[0] == '#') 104 | continue; 105 | setstatus (indm, nindiv, sx); 106 | freeup (spt, nsplit); 107 | } 108 | fclose (egfile); 109 | } 110 | 111 | void 112 | seteglistv (Indiv ** indm, int nindiv, char *eglistname, int val) 113 | { 114 | FILE *egfile; 115 | char line[MAXSTR]; 116 | char *spt[MAXFF]; 117 | char *sx = NULL; 118 | Indiv *indx; 119 | int nsplit, i; 120 | 121 | if (eglistname == NULL) { 122 | setstatusv (indm, nindiv, NULL, val); 123 | } 124 | 125 | openit (eglistname, &egfile, "r"); 126 | while (fgets (line, MAXSTR, egfile) != NULL) { 127 | nsplit = splitup (line, spt, MAXFF); 128 | if (nsplit == 0) 129 | continue; 130 | sx = spt[0]; 131 | if (sx[0] == '#') 132 | continue; 133 | setstatusv (indm, nindiv, sx, val); 134 | freeup (spt, nsplit); 135 | } 136 | fclose (egfile); 137 | } 138 | -------------------------------------------------------------------------------- /src/egsubs.h: -------------------------------------------------------------------------------- 1 | #include "admutils.h" 2 | 3 | 4 | int makeeglist(char **eglist, int maxnumeg, Indiv **indivmarkers, int numindivs) ; 5 | int mkeglist(Indiv **indm, int numindivs, char **eglist) ; 6 | void seteglist(Indiv **indm, int nindiv, char *eglistname) ; 7 | void seteglistv(Indiv **indm, int nindiv, char *eglistname, int val) ; 8 | int loadlist(char **list, char *listname) ; 9 | int loadlist_type(char **list, char *listname, int *ztypes, int off) ; 10 | -------------------------------------------------------------------------------- /src/eigsubs.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | void eigvals(double *mat, double *evals, int n) ; 7 | void eigvecs(double *mat, double *evals, double *evecs, int n) ; 8 | double logdet(double *mat, int n) ; 9 | void eigb(double *lam, double *a, double *b, int n) ; 10 | void eigc(double *lam, double *a, double *b, int n) ; 11 | double twestxx(double *lam, int m, double *pzn, double *pzvar) ; 12 | void mkorth(double *orth, double *ww, int n) ; 13 | 14 | typedef struct { 15 | int vecno ; 16 | double score ; 17 | } OUTLINFO ;; 18 | 19 | -------------------------------------------------------------------------------- /src/exclude.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define MAXRGN 1000 6 | 7 | void 8 | excluderegions (char *xregionname, SNP ** snps, int nsnps, 9 | char *deletesnpoutname) 10 | { 11 | FILE *fp; 12 | 13 | int chr[MAXRGN]; 14 | int lo[MAXRGN]; 15 | int hi[MAXRGN]; 16 | 17 | char line[MAXSTR]; 18 | char *spt[MAXFF]; 19 | int nsplit, nrgn, i, j; 20 | 21 | if ((fp = fopen (xregionname, "r")) == NULL) { 22 | printf ("excluderegions: can't open file %s\n", xregionname); 23 | return; 24 | } 25 | 26 | for (i = 0; i < MAXRGN; i++) { 27 | 28 | if (fgets (line, MAXSTR, fp) == NULL) 29 | break; 30 | 31 | nsplit = splitup (line, spt, MAXFF); 32 | if (nsplit != 3) 33 | continue; 34 | 35 | chr[i] = atoi (spt[0]); 36 | lo[i] = atoi (spt[1]); 37 | hi[i] = atoi (spt[2]); 38 | 39 | } 40 | fclose (fp); 41 | nrgn = i; 42 | 43 | for (i = 0; i < nsnps; i++) { 44 | SNP *cupt = snps[i]; 45 | for (j = 0; j < nrgn; j++) { 46 | if (cupt->chrom == chr[j] && cupt->physpos >= lo[j] 47 | && cupt->physpos <= hi[j]) { 48 | cupt->ignore = YES; 49 | if (deletesnpoutname != NULL) { 50 | logdeletedsnp (cupt->ID, "xregion", deletesnpoutname); 51 | } 52 | } 53 | } 54 | } 55 | 56 | return; 57 | 58 | } 59 | 60 | void 61 | hwfilter (SNP ** snps, int nsnps, int nindiv, double nhwfilter, 62 | char *deletesnpoutname) 63 | { 64 | 65 | int i, k; 66 | 67 | for (i = 0; i < nsnps; i++) { 68 | int num = 0, den = 0, het = 0, n0 = 0, n1 = 0, n2 = 0, nsamples; 69 | double p, Q, stdv; 70 | SNP *cupt = snps[i]; 71 | 72 | for (k = 0; k < nindiv; k++) { 73 | int g = getgtypes (cupt, k); 74 | if (g >= 0) { 75 | num += g; 76 | den += 2; 77 | } 78 | if (g == 1) { 79 | het++; 80 | n1++; 81 | } 82 | else if (g == 0) { 83 | n0++; 84 | } 85 | else if (g == 2) { 86 | n2++; 87 | } 88 | } 89 | 90 | if ((nsamples = den / 2) == 0) 91 | continue; 92 | p = (double) num / den; 93 | Q = 2 * p * (1 - p); 94 | stdv = sqrt (Q * (1 - Q) / nsamples); 95 | if (fabs ((double) het / nsamples - Q) > nhwfilter * stdv) { 96 | printf ("SNP %s removed by Hardy-Weinberg filter\n", cupt->ID); 97 | cupt->ignore = YES; 98 | if (deletesnpoutname != NULL) { 99 | logdeletedsnp (cupt->ID, "hwfilt", deletesnpoutname); 100 | } 101 | } 102 | } 103 | 104 | 105 | } 106 | -------------------------------------------------------------------------------- /src/exclude.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | #ifndef _EXCLUDE_ 5 | #define _EXCLUDE_ 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | /* file name parameter : xregionname */ 12 | /* HW filter parameter : nhwfilter (-1 means no-filter) */ 13 | /* maximum number of regions : 1000 14 | closed intervals in physical position include endpoints */ 15 | /* read file and set ignore flag for SNPs */ 16 | void excluderegions(char *xregionname, SNP **snps, int nsnps, char *deletesnpoutname); 17 | void hwfilter(SNP **snps, int nsnps, int nindiv, double nhwfilter, char *deletesnpoutname); 18 | 19 | #endif 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | -------------------------------------------------------------------------------- /src/expfit.templ: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | parfile=$1 4 | BIN=BBBIN 5 | 6 | if [ $# -lt 1 ]; then 7 | echo -e "USAGE: expfit.sh parfile" 8 | exit 1 9 | fi 10 | BOOL_T=1 11 | BOOL_F=0 12 | 13 | function verify_file 14 | { 15 | if [ ! -s $1 ]; then 16 | echo "ERROR: $1 does not exist or is empty" 17 | return $BOOL_F 18 | fi 19 | return $BOOL_T 20 | } 21 | 22 | verify_file $parfile 23 | if [ $? == $BOOL_F ]; then exit 1; fi 24 | 25 | 26 | echo -e "Running rexpfit.r " 27 | input=`cat ${parfile} | grep output: | awk '{print $2}'` 28 | echo -e "Input file: " ${input} 29 | 30 | output="expfit_${input}" 31 | echo -e "Output file: " ${output} 32 | 33 | # Set defaults 34 | output_col=4 # data column containing weighted correlation 35 | lval=0.5 # start fit at lval genetic distance. Distances hval ignored 37 | affine=TRUE # affine = TRUE/FALSE 38 | plot=FALSE # plot the output = TRUE/FALSE 39 | jackknife=FALSE #jackknife = TRUE/FALSE 40 | 41 | # specified in parfile 42 | jackknife=`cat ${parfile} | grep jackknife: | awk '{print $2}'` 43 | 44 | # Fit exponential using R 45 | echo -e "Expfit logfile:" ${output}".flog" 46 | plot="TRUE" 47 | echo -e "Output plot:" ${output}".pdf" 48 | Rscript $BIN/rexpfit.r ${input} ${output} ${output_col} ${lval} ${hval} ${affine} ${plot} "FALSE" 49 | 50 | if [ "${jackknife}" != 'NO' ]; then 51 | # one chromosome removed 52 | jOutput="expfit_${input}" 53 | echo -e "Jackknife logfile:" ${jOutput}".log" 54 | echo -e "Jackknife Results: \n" > ${jOutput}.log 55 | plot="FALSE" 56 | 57 | for chr in {1..22} 58 | do 59 | inputChr="${input}:${chr}" 60 | Rscript $BIN/rexpfit.r ${inputChr} ${jOutput} ${output_col} ${lval} ${hval} ${affine} ${plot} "TRUE" 61 | done 62 | 63 | snpname=`$BIN/grabpars -p $parfile -x "snpname:" ` 64 | perl $BIN/wtjack.pl -i ${input} -m ${snpname} -b ${BIN} 65 | fi 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /src/f4rank.h: -------------------------------------------------------------------------------- 1 | typedef struct { 2 | int nl ; 3 | int nr ; 4 | char *lbase ; 5 | char *rbase ; 6 | char **lpops ; 7 | char **rpops ; 8 | int rank ; 9 | double dofjack ; 10 | double dof ; 11 | double dofdiff ; 12 | double chisq ; 13 | double chisqdiff ; 14 | double *A ; 15 | double *B ; 16 | double *mean ; 17 | double *resid ; 18 | struct F4INFO *bestparent ; 19 | struct F4INFO *bestchild ; 20 | } F4INFO ; 21 | // dofjack is dof from jackknife blocks 22 | // dof is dof from approx chisq 23 | 24 | 25 | void doranktest(double *mean, double *var, int m, int n, int rank, F4INFO *f4pt) ; 26 | void doranktestfix(double *mean, double *var, int m, int n, int rank, F4INFO *f4pt, int *vfix) ; 27 | double ranktest(double *mean, double *var, int m, int n, int rank, double *pA, double *pB) ; 28 | double ranktestfix(double *mean, double *var, int m, int n, int rank, double *pA, double *pB, int *vfix) ; 29 | void normab(double *A, double *B, int m, int n, int rank) ; 30 | int dofrank(int m, int n, int rank) ; 31 | void f4info_init(F4INFO *f4pt, int nl, int nr, char **popllist, char **poprlist, int rank) ; 32 | void printf4info(F4INFO *f4pt) ; 33 | int checkmv(double *mean, double *var, int m, int n) ; 34 | 35 | 36 | -------------------------------------------------------------------------------- /src/faidx.h: -------------------------------------------------------------------------------- 1 | /* The MIT License 2 | 3 | Copyright (c) 2008 Genome Research Ltd (GRL). 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | */ 25 | 26 | /* Contact: Heng Li */ 27 | 28 | #ifndef FAIDX_H 29 | #define FAIDX_H 30 | 31 | /*! 32 | @header 33 | 34 | Index FASTA files and extract subsequence. 35 | 36 | @copyright The Wellcome Trust Sanger Institute. 37 | */ 38 | 39 | struct __faidx_t; 40 | typedef struct __faidx_t faidx_t; 41 | 42 | #ifdef __cplusplus 43 | extern "C" { 44 | #endif 45 | 46 | /*! 47 | @abstract Build index for a FASTA or razip compressed FASTA file. 48 | @param fn FASTA file name 49 | @discussion File "fn.fai" will be generated. 50 | */ 51 | 52 | int fai_build(const char *fn) ; 53 | int fai_getlen(const faidx_t *fai, const char *name) ; 54 | 55 | /*! 56 | @abstract Distroy a faidx_t struct. 57 | @param fai Pointer to the struct to be destroyed 58 | */ 59 | void fai_destroy(faidx_t *fai); 60 | 61 | /*! 62 | @abstract Load index from "fn.fai". 63 | @param fn File name of the FASTA file 64 | */ 65 | faidx_t *fai_load(const char *fn); 66 | 67 | /*! 68 | @abstract Fetch the sequence in a region. 69 | @param fai Pointer to the faidx_t struct 70 | @param reg Region in the format "chr2:20,000-30,000" 71 | @param len Length of the region 72 | @return Pointer to the sequence; null on failure 73 | 74 | @discussion The returned sequence is allocated by malloc family 75 | and should be destroyed by end users by calling free() on it. 76 | */ 77 | char *fai_fetch(const faidx_t *fai, const char *reg, int *len); 78 | 79 | #ifdef __cplusplus 80 | } 81 | #endif 82 | 83 | #endif 84 | -------------------------------------------------------------------------------- /src/fitsubs.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | int iscanon(int a, int b, int c, int d) ; 5 | void getmv (int a, int b, int c, int d, double *mean, double *var, double *ff3, double *ff3var, int dimff3) ; 6 | void bumpm(double *y, int a, int c, double val, double *ff3, int dim); 7 | void bumpw(double *w, int a, int c, double val, int dim); 8 | 9 | void fitclear() ; 10 | void fitsetup(double *ff3, double *ff3var, char **fglist, int numfg, char **elist, int numee) ; 11 | double fitgr(double *elen, double **vmix, int *lmix, int *pnpedge, int *pnmix, int numb, double zinc) ; 12 | void scorx(double *pscor, double *pychi) ; 13 | double scorppchi(double *ppwts, double *elen, double *pchi) ; 14 | 15 | 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | -------------------------------------------------------------------------------- /src/fourier.c: -------------------------------------------------------------------------------- 1 | void weightjackfourier(double *est,double *sig,double mean,double *kmean,double *jwt,int g,double* prho) 2 | { 3 | 4 | double mp,mpr,rhonr,rhodr,rho,mdr,S,cS,jpmean; 5 | int k,i,l; 6 | double *jst, *d,*c,*jp,*wt, *wk, *qq; 7 | double y, y1, y2, ymx, ycx, yg, *jmean, gmean ; 8 | 9 | 10 | double pi,tmean; 11 | 12 | pi = 2.0*acos(0.0) ; 13 | yg = (double) g ; 14 | 15 | ZALLOC(jst,g+1,double); 16 | ZALLOC(d,g,double); 17 | ZALLOC(c,2*g,double); 18 | ZALLOC(jmean,2*g,double); 19 | ZALLOC(jp,g,double); 20 | ZALLOC(wt,g+1,double); 21 | 22 | copyarr(kmean, jmean, g) ; 23 | copyarr(kmean, jmean+g, g) ; 24 | copyarr(jwt, wt, g) ; 25 | wt[g] = wt[0] ; 26 | 27 | /**Calculate the mean of the jackknifed means*/ 28 | mpr = asum(jmean, g) ; 29 | mp = mpr / yg ; 30 | 31 | gmean = mean ; 32 | 33 | if (debug) printf("mp: %12.6f mean: %12.6f\n", mp, mean) ; 34 | 35 | vsp(jst, jmean, -mp, g) ; 36 | 37 | jst[g] = jst[0]; 38 | 39 | rho = corr(jst, jst+1, g) ; 40 | 41 | if (debug) printf("rho: %12.6f g:: %d\n", rho, g) ; 42 | if (verbose) { 43 | for (k=0; kignore) 60 | continue; 61 | wt = cupt->weight; 62 | if (wt <= 0.0) 63 | continue; 64 | bnum = cupt->tagnumber; 65 | if (bnum < 0) continue; 66 | if (bnum>=nblocks) fatalx("logic bug\n") ; 67 | 68 | loadaa (cupt, xindex, xtypes, nrows, numeg); 69 | 70 | ++wjack[bnum]; 71 | ++ngood ; 72 | 73 | top = btop[bnum]; 74 | bot = bbot[bnum]; 75 | 76 | for (k=0; k") unless (defined $parname) ; 16 | 17 | $dir = `$MM/mktmp ddd 71` ; 18 | $tname = "tname.$$" ; 19 | $ttname = "ttname.$$" ; 20 | $pwd = `pwd` ; 21 | 22 | chomp ($pwd, $dir, $tname) ; 23 | 24 | $instem = `$MB/grabpars -p $parname -x "instem:" ` ; 25 | $outstem = `$MB/grabpars -p $parname -x "outstem:" ` ; 26 | $poplist = `$MB/grabpars -p $parname -x "poplistname:" ` ; 27 | $tempdir = `$MB/grabpars -p $parname -x "tempdir:" ` ; 28 | $indlist = `$MB/grabpars -p $parname -x "indivlistname:" ` ; 29 | 30 | chomp $tempdir if (defined $tempdir) ; 31 | undef ($tempdir) if ($tempdir =~ /NOTFOUND/) ; 32 | $dir = $tempdir if (defined $tempdir) ; 33 | printf "mkdir -p $dir\n" ; 34 | mkdir $dir ; 35 | ##system "mkdir -p $dir" ; 36 | 37 | chomp ($instem, $outstem, $poplist, $indlist) ; 38 | 39 | 40 | $ilist = "fxt:$$.ind" ; 41 | $INDLIST = "$dir/$ilist" ; 42 | 43 | if ($indlist =~ /NOTFOUND/) { 44 | die "no poplist or indlist\n" if ($poplist =~ /NOTFOUND/) ; 45 | system "$MM/grepcolf 2 $poplist < $instem.ind | $MM/xtractcol 0 > $INDLIST" ; 46 | } 47 | else { 48 | system "$MM/grepcolf 0 $indlist < $instem.ind | $MM/xtractcol 0 > $INDLIST" ; 49 | } 50 | 51 | $S = "$X/extract_individuals.py" ; 52 | $SG = "$X/extract_individuals.py -t" ; 53 | $M = "$X/merge_transpose.py" ; 54 | 55 | open (I1, $INDLIST) || die ("bad $INDLIST\n") ; 56 | 57 | $SX = $S ; 58 | $tgfile = "$instem.tgeno" ; 59 | $SX = $SG if (-e $tgfile) ; 60 | print "$tgfile\n$SX\n" ; 61 | 62 | if (-e $tgfile) { 63 | $gfile = $tgfile ; 64 | } 65 | else { 66 | $gfile = "$instem.geno" ; 67 | } 68 | 69 | open (BB, $gfile) || die "can't find $SX\n" ; 70 | read(BB, $chk, 4) ; 71 | close BB ; 72 | 73 | ##print "check: $SX $chk\n" ; 74 | if (($chk ne "GENO") && ($chk ne "TGEN")) { 75 | die "only tgeno and geno formats supported\n" ; 76 | } 77 | 78 | 79 | 80 | system "cp $instem.snp $dir/$tname.snp" ; 81 | foreach $line () { 82 | ($id) = split " ", $line ; 83 | ## system "python3 $S -i $instem -s $id -o $dir/$id >& /dev/null" ; 84 | ## system "$MM/grepcol 0 $id < $smallind > $dir.$id.ind" ; 85 | system "python3 $SX -x -i $instem -o $dir/$id $id" if (defined $verbose); 86 | system "python3 $SX -x -i $instem -o $dir/$id $id >& /dev/null" unless (defined $verbose); 87 | print "$id extracted\n" ; 88 | } 89 | 90 | close I1 ; 91 | 92 | open (I1, $INDLIST) || die ("bad $INDLIST\n") ; 93 | 94 | chdir $dir ; 95 | foreach $line () { 96 | ($id) = split " ", $line ; 97 | system "ln -f $tname.snp $id.snp" ; 98 | } 99 | 100 | system "python3 $M -f $ilist -o $ttname" ; 101 | ##print "got here\n" ; 102 | chdir $pwd ; 103 | 104 | system " cp $dir/$ttname.snp $outstem.snp" ; 105 | system " cp $dir/$ttname.ind $outstem.ind" ; 106 | system " cp $dir/$ttname.geno $outstem.geno" ; 107 | 108 | unlink $dir unless (defined $tempdir) ; 109 | -------------------------------------------------------------------------------- /src/geno.h: -------------------------------------------------------------------------------- 1 | #ifndef GENO_PACKING_H 2 | #define GENO_PACKING_H 3 | 4 | #include 5 | 6 | #define GENO_HEADER_SIZE 48 7 | 8 | // This is the position of a 2 bit unit within the byte array 9 | typedef struct{ 10 | size_t byte_index; // offset to find byte 11 | int bit_shift; // bits needed to shift [0, 2, 4, 6] 12 | } bit_array_position; 13 | 14 | 15 | typedef struct genotype_matrix_t genotype_matrix; // forward declaration to enable function pointer 16 | /* 17 | * This a simple 2-dimensional data structure for storing genotypes by sample and SNP. 18 | * Each genotype is 2 bits. 19 | * 00 20 | * 01 heterozygous 21 | * 10 22 | * 11 no data 23 | * 24 | * genotypes can be stored in one of two formats 25 | * 1. One row = one SNP, all samples. This is the packed ancestry format, with variable length header depending on the row size (number of samples), with minumum row size 48 bytes. Each row is byte-aligned. Due to this design, a text eigenstrat file will be smaller for <48 samples. 26 | * 2. One row = one sample, all SNPs. This is the transpose packed format, with fixed length header of 48 bytes. This is the newer format, and is much faster for merging because the byte-aligned sample rows can simply be concatenated. 27 | * 28 | * The genotype matrix may be 29 | * 1. memory mapped reading from file 30 | * 2. allocated starting in memory 31 | */ 32 | typedef struct genotype_matrix_t{ 33 | // normal has SNP rows and sample columns 34 | // transpose has sample rows and SNP columns 35 | bool transpose; 36 | size_t num_rows; 37 | size_t num_columns; 38 | size_t row_size_bytes; // to keep rows byte-aligned 39 | 40 | void (*get_position)(const genotype_matrix*, size_t sample, size_t snp, bit_array_position *); 41 | 42 | int individual_file_hash; 43 | int snp_file_hash; 44 | 45 | char *memory_map_start; 46 | size_t memory_map_length; 47 | 48 | unsigned char *genotypes; 49 | } genotype_matrix; 50 | 51 | void genotype_matrix_initialize(genotype_matrix *this); 52 | // When not loading from a file, this allocates the genotype array in memory 53 | void genotype_matrix_allocate(genotype_matrix *this, size_t num_samples, size_t num_snps, bool transpose); 54 | void genotype_matrix_destructor(genotype_matrix *this); 55 | 56 | // write the string for this matrix's header to the destination char buffer 57 | void genotype_matrix_header(const genotype_matrix *this, bool transpose, char *dest); 58 | 59 | // transpose: true - write out a transpose_packed file 60 | // transpose: false - write out a packed ancestry map file 61 | int genotype_matrix_write_packed_file(const genotype_matrix *this, const char *filename, bool transpose); 62 | 63 | // helper functions to locate bit array position 64 | void row_position(size_t offset, bit_array_position *position); 65 | void normal_position(const genotype_matrix *this, size_t sample, size_t snp, bit_array_position *position); 66 | void transpose_position(const genotype_matrix *this, size_t sample, size_t snp, bit_array_position *position); 67 | 68 | // Read a genotype matrix from file, either in the packed ancestry map or transpose packed format 69 | // Optionally, check the individual and snp file hashes against the header 70 | // the current implementation will not write changes in the matrix back out to the file 71 | int genotype_matrix_read_file(genotype_matrix *this, const char * const genotype_filename, int flags, const int *individual_file_hash, const int *snp_file_hash); 72 | // Read genotpye file as above in genotype_matrix_read_file, and place contents into memory 73 | int genotype_matrix_read_file_full(genotype_matrix *this, const char * const genotype_filename, int flags, const int *individual_file_hash, const int *snp_file_hash); 74 | 75 | int genotype_matrix_get_snp_hash(const genotype_matrix *this); 76 | void genotype_matrix_set_snp_hash(genotype_matrix *this, int hash); 77 | int genotype_matrix_get_individual_hash(const genotype_matrix *this); 78 | void genotype_matrix_set_individual_hash(genotype_matrix *this, int hash); 79 | 80 | // size of the byte-aligned genotype data, not including header 81 | size_t genotype_matrix_byte_size(const genotype_matrix *this); 82 | // size of the matrix 83 | size_t genotype_matrix_num_snps(const genotype_matrix *this); 84 | size_t genotype_matrix_num_samples(const genotype_matrix *this); 85 | 86 | unsigned char genotype_matrix_get_genotype_internal(const genotype_matrix *this, const bit_array_position *position); 87 | // simple accessor for 2 bit genotype data for a sample at particular SNP 88 | unsigned char genotype_matrix_get_genotype(const genotype_matrix *this, size_t sample, size_t snp); 89 | 90 | void array_set_genotype(unsigned char *genotypes, size_t offset, char value); 91 | // set a genotype for the requested sample and snp 92 | // negative values are set to 0x3 93 | void genotype_matrix_set_genotype(genotype_matrix *this, size_t sample, size_t snp, char value); 94 | 95 | #endif 96 | -------------------------------------------------------------------------------- /src/geno_single.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "geno.h" 5 | 6 | // extract a single individual's genotypes from a packed genotype file 7 | int main(int argc, char **argv){ 8 | genotype_matrix geno; 9 | genotype_matrix selected; 10 | 11 | //printf("reading %s\n", argv[1]); 12 | size_t ind = strtoull(argv[2], NULL, 10); 13 | char *output_filename = argv[3]; 14 | 15 | genotype_matrix_initialize(&geno); 16 | int result = genotype_matrix_read_file(&geno, argv[1], 0, NULL, NULL); 17 | if (result < 0){ 18 | printf("Error reading %s: %d\n", argv[1], result); 19 | return result; 20 | } 21 | 22 | size_t num_snps = genotype_matrix_num_snps(&geno); 23 | size_t num_ind = genotype_matrix_num_samples(&geno); 24 | if (ind < 0 || ind >= num_ind){ 25 | fprintf(stderr, "Bad index %zu. There are %zu samples.", ind, num_ind); 26 | } 27 | 28 | genotype_matrix_initialize(&selected); 29 | bool transpose = true; 30 | genotype_matrix_allocate(&selected, 1, num_snps, transpose); 31 | 32 | for (size_t snp = 0; snp < num_snps; snp++){ 33 | char value = genotype_matrix_get_genotype(&geno, ind, snp); 34 | genotype_matrix_set_genotype(&selected, 0, snp, value); 35 | } 36 | int snp_hash = genotype_matrix_get_snp_hash(&geno); 37 | genotype_matrix_set_snp_hash(&selected, snp_hash); 38 | 39 | genotype_matrix_write_packed_file(&selected, output_filename, transpose); 40 | 41 | genotype_matrix_destructor(&geno); 42 | genotype_matrix_destructor(&selected); 43 | return 0; 44 | } 45 | -------------------------------------------------------------------------------- /src/globals.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | #ifndef _GLOBALS_ 5 | #define _GLOBALS_ 6 | 7 | int numchrom = 22; 8 | int fancynorm=YES, verbose=NO, plotmode=NO, outnum = -1 ; 9 | 10 | #endif 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | -------------------------------------------------------------------------------- /src/grabpars.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define MAXSTR 512 8 | #define MAXFF 20 9 | 10 | char *dir = "/home/np29/broaddata/tables/hg17"; 11 | char *iname = NULL; 12 | char *oname = NULL; 13 | 14 | int verbose = NO; 15 | char *grabp (int argc, char **argv); 16 | 17 | int 18 | main (int argc, char **argv) 19 | { 20 | 21 | char *xx; 22 | 23 | xx = grabp (argc, argv); 24 | if (xx == NULL) 25 | xx = strdup ("NOTFOUND"); 26 | printf ("%s\n", xx); 27 | if (strlen (xx) > 0) 28 | return 0; 29 | 30 | return -1; 31 | 32 | 33 | } 34 | 35 | char * 36 | grabp (int argc, char **argv) 37 | { 38 | int i; 39 | phandle *ph; 40 | char str[512]; 41 | int n, kode; 42 | char *parname = NULL, *xname = NULL; 43 | static char *xval = NULL; 44 | 45 | while ((i = getopt (argc, argv, "p:x:V")) != -1) { 46 | 47 | switch (i) { 48 | 49 | case 'p': 50 | parname = strdup (optarg); 51 | break; 52 | 53 | case 'x': 54 | xname = strdup (optarg); 55 | break; 56 | 57 | case 'V': 58 | verbose = YES; 59 | break; 60 | 61 | case '?': 62 | printf ("Usage: bad params.... \n"); 63 | fatalx ("bad params\n"); 64 | } 65 | } 66 | 67 | if (parname == NULL) 68 | return NULL; 69 | if (xname == NULL) 70 | return NULL; 71 | 72 | ph = openpars (parname); 73 | dostrsub (ph); 74 | 75 | getstring (ph, xname, &xval); 76 | return xval; 77 | 78 | } 79 | -------------------------------------------------------------------------------- /src/gslkim.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "qpsubs.h" 15 | 16 | #define WVERSION "100" 17 | #define MAXSTR 512 18 | 19 | extern int gsldetails; 20 | extern double gslprecision; 21 | 22 | double grscore (double *www) ; 23 | static int xnpars; 24 | static double *xvpars; 25 | static double *www; 26 | 27 | gsl_multimin_fminimizer *s = NULL; 28 | static gsl_vector *ss, *x; 29 | gsl_multimin_function minex_func; 30 | 31 | double fjunk (const gsl_vector *v, void *params); 32 | double mkcanon (double *a) ; 33 | 34 | int 35 | gslsetup (int npars, double *vpars, double initscale) 36 | { 37 | int k; 38 | const gsl_multimin_fminimizer_type *T = gsl_multimin_fminimizer_nmsimplex2; 39 | 40 | if (npars == 0) 41 | return 0; 42 | xnpars = npars; 43 | ZALLOC (xvpars, npars, double); 44 | copyarr (vpars, xvpars, npars); 45 | //www 46 | 47 | 48 | minex_func.n = npars; 49 | minex_func.f = fjunk; 50 | minex_func.params = NULL; 51 | 52 | x = gsl_vector_alloc (npars); 53 | for (k = 0; k < npars; ++k) { 54 | gsl_vector_set (x, k, vpars[k]); 55 | } 56 | 57 | /* Set initial step sizes to initscale */ 58 | ss = gsl_vector_alloc (npars); 59 | gsl_vector_set_all (ss, initscale); 60 | 61 | /* Initialize method and iterate */ 62 | 63 | s = gsl_multimin_fminimizer_alloc (T, npars); 64 | gsl_multimin_fminimizer_set (s, &minex_func, x, ss); 65 | 66 | gsl_set_error_handler_off (); 67 | 68 | // printf ("gslsetup called\n"); 69 | return 1; 70 | } 71 | 72 | void gslfree() 73 | { 74 | 75 | gsl_vector_free(ss) ; 76 | gsl_vector_free(x) ; 77 | gsl_multimin_fminimizer_free(s) ; 78 | 79 | } 80 | 81 | 82 | double 83 | gslopt (double *wpars) 84 | { 85 | size_t iter = 0, k; 86 | int status; 87 | double size; 88 | double q = 999999; 89 | double qbest = 1.0e40; 90 | 91 | /* Starting point */ 92 | 93 | 94 | for (k = 0; k < xnpars; ++k) { 95 | gsl_vector_set (x, k, wpars[k]); 96 | } 97 | gsl_multimin_fminimizer_set (s, &minex_func, x, ss); 98 | 99 | gsl_set_error_handler_off (); 100 | 101 | do { 102 | iter++; 103 | status = gsl_multimin_fminimizer_iterate (s); 104 | 105 | if (status) 106 | break; 107 | 108 | size = gsl_multimin_fminimizer_size (s); 109 | status = gsl_multimin_test_size (size, gslprecision); 110 | q = s->fval; 111 | 112 | if (q < qbest) { 113 | qbest = q; 114 | for (k = 0; k < xnpars; ++k) { 115 | wpars[k] = gsl_vector_get (s->x, k); 116 | } 117 | if (gsldetails) { 118 | printf ("+++ new best %12.6f", qbest); 119 | printmatw(wpars, 1, xnpars, xnpars) ; 120 | printf("zzheap: %x\n", topheap()) ; 121 | } 122 | } 123 | } 124 | while (status == GSL_CONTINUE && iter < 1000); 125 | 126 | mkcanon (wpars); 127 | /** 128 | printf ("gslans: %4d %12.6f\n", (int) iter, q); 129 | printmatwl (wpars, 1, xnpars, xnpars); 130 | */ 131 | 132 | fflush (stdout); 133 | return q; 134 | } 135 | 136 | double 137 | fjunk (const gsl_vector * v, void *params) 138 | { 139 | double xx; 140 | double q; 141 | int k, t = 0; 142 | double *tt; 143 | double penalty = 0; 144 | 145 | ZALLOC (tt, xnpars, double); 146 | for (k = 0; k < xnpars; ++k) { 147 | tt[k] = gsl_vector_get (v, k); 148 | } 149 | penalty = mkcanon (tt); 150 | 151 | q = grscore (tt); 152 | q += 10 * penalty; 153 | if (gsldetails) { 154 | printf ("zzfjunk %9.3f\n", q); 155 | printmat (tt, 1, xnpars); 156 | } 157 | free (tt); 158 | return q; 159 | } 160 | -------------------------------------------------------------------------------- /src/gslqp.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "qpsubs.h" 15 | 16 | #define WVERSION "100" 17 | #define MAXSTR 512 18 | 19 | extern int gsldetails; 20 | extern double gslprecision; 21 | 22 | double scorit (double *www, int n, double *pfix, double *ans); 23 | static int xnmix; 24 | static double *xvmix; 25 | static double *www; 26 | 27 | gsl_multimin_fminimizer *s = NULL; 28 | static gsl_vector *ss, *x; 29 | gsl_multimin_function minex_func; 30 | 31 | double fjunk (const gsl_vector *v, void *params); 32 | static double fff (double *a, int n); 33 | 34 | int 35 | gslsetup (int nmix, double *vmix) 36 | { 37 | int k; 38 | const gsl_multimin_fminimizer_type *T = gsl_multimin_fminimizer_nmsimplex2; 39 | 40 | if (nmix == 0) 41 | return 0; 42 | xnmix = nmix; 43 | ZALLOC (xvmix, nmix, double); 44 | copyarr (vmix, xvmix, nmix); 45 | ZALLOC (www, 2 * nmix, double); 46 | 47 | 48 | minex_func.n = nmix; 49 | minex_func.f = fjunk; 50 | minex_func.params = NULL; 51 | 52 | x = gsl_vector_alloc (nmix); 53 | for (k = 0; k < nmix; ++k) { 54 | gsl_vector_set (x, k, vmix[k]); 55 | } 56 | 57 | /* Set initial step sizes to 0.1 */ 58 | ss = gsl_vector_alloc (nmix); 59 | gsl_vector_set_all (ss, 0.1); 60 | 61 | /* Initialize method and iterate */ 62 | 63 | s = gsl_multimin_fminimizer_alloc (T, nmix); 64 | gsl_multimin_fminimizer_set (s, &minex_func, x, ss); 65 | 66 | gsl_set_error_handler_off (); 67 | 68 | printf ("gslsetup called\n"); 69 | return 1; 70 | } 71 | 72 | int gslnewvals(int nmix, double *vmix) 73 | { 74 | int k ; 75 | 76 | for (k = 0; k < nmix; ++k) { 77 | gsl_vector_set (x, k, vmix[k]); 78 | } 79 | 80 | return 1 ; 81 | 82 | } 83 | 84 | double 85 | gslopt (double *wpars) 86 | { 87 | size_t iter = 0, k; 88 | int status; 89 | double size; 90 | double q = 999999; 91 | double qbest = 1.0e40; 92 | 93 | /* Starting point */ 94 | 95 | 96 | for (k = 0; k < xnmix; ++k) { 97 | gsl_vector_set (x, k, wpars[k]); 98 | } 99 | gsl_multimin_fminimizer_set (s, &minex_func, x, ss); 100 | 101 | gsl_set_error_handler_off (); 102 | 103 | do { 104 | iter++; 105 | status = gsl_multimin_fminimizer_iterate (s); 106 | 107 | if (status) 108 | break; 109 | 110 | size = gsl_multimin_fminimizer_size (s); 111 | status = gsl_multimin_test_size (size, gslprecision); 112 | q = s->fval; 113 | if (gsldetails) 114 | printf ("gslopt: %3d %12.6f\n", (int) iter, q); 115 | if (q < qbest) { 116 | if (gsldetails) 117 | printf ("+++ new best\n"); 118 | qbest = q; 119 | for (k = 0; k < xnmix; ++k) { 120 | wpars[k] = gsl_vector_get (s->x, k); 121 | } 122 | } 123 | } 124 | while (status == GSL_CONTINUE && iter < 10000); 125 | 126 | printf ("gslans: %4d %12.6f\n", (int) iter, q); 127 | fff (wpars, xnmix); 128 | printmat (wpars, 1, xnmix); 129 | 130 | fflush (stdout); 131 | return q; 132 | } 133 | 134 | double 135 | fff (double *a, int n) 136 | // make a cononical return penalty 137 | { 138 | double penalty = 0; 139 | int k; 140 | double xx, y, yy; 141 | for (k = 0; k < xnmix; ++k) { 142 | yy = xx = a[k]; 143 | if ((xx >= 0) && (xx <= 1)) 144 | continue; 145 | if (xx < 0) { 146 | yy = -yy; 147 | } 148 | if (yy > 1) { 149 | yy = 2.0 * modf (0.5 * yy, &y); // periodicity is 2 150 | if (yy > 1) 151 | yy = 2 - yy; 152 | } 153 | y = yy - xx; 154 | penalty += y * y; 155 | a[k] = xx = yy; 156 | } 157 | return penalty; 158 | } 159 | 160 | double 161 | fjunk (const gsl_vector * v, void *params) 162 | { 163 | double xx; 164 | double q; 165 | int k, t = 0; 166 | double *tt; 167 | double penalty = 0; 168 | 169 | ZALLOC (tt, xnmix, double); 170 | for (k = 0; k < xnmix; ++k) { 171 | tt[k] = gsl_vector_get (v, k); 172 | } 173 | penalty = fff (tt, xnmix); 174 | 175 | t = 0; 176 | for (k = 0; k < xnmix; ++k) { 177 | xx = tt[k]; 178 | www[t] = xx; 179 | www[t + 1] = 1.0 - xx; 180 | t += 2; 181 | } 182 | q = scorit (www, t, NULL, NULL); 183 | q += 10 * penalty; 184 | if (gsldetails) { 185 | printf ("zzfjunk %9.3f\n", q); 186 | printmat (tt, 1, xnmix); 187 | } 188 | free (tt); 189 | return q; 190 | } 191 | -------------------------------------------------------------------------------- /src/h2d.h: -------------------------------------------------------------------------------- 1 | int mkindh2d (Indiv ** indivmarkers, Indiv *** pindm2, int numindivs) ; 2 | int mkindd2h (Indiv ** indivmarkers, Indiv *** pindm2, int numindivs) ; 3 | 4 | void remaph2d (SNP ** snpmarkers, int numsnps, Indiv ** indivmarkers, 5 | Indiv ** indm2, int numindivs, int numind2) ; 6 | 7 | void remapd2h (SNP ** snpmarkers, int numsnps, Indiv ** indivmarkers, 8 | Indiv ** indm2, int numindivs, int numind2) ; 9 | -------------------------------------------------------------------------------- /src/ldsubs.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | typedef struct { 6 | double S0 ; 7 | double S1 ; 8 | double S2 ; 9 | double S11 ; 10 | double S12 ; 11 | double S22 ; 12 | double m1 ; 13 | double m2 ; 14 | double v11 ; 15 | double v12 ; 16 | double v22 ; 17 | double corr ; 18 | double Z ; 19 | } CORR; 20 | 21 | int calccorr(CORR *corrpt, int mode, int ztrans) ; 22 | void printcorr(CORR *corrpt) ; 23 | void clearcorr(CORR *corrpt) ; 24 | void addcorr(CORR *corrpt, double x1, double x2) ; 25 | void addcorrn(CORR *corrpt, double x1, double x2, double yn) ; 26 | void minuscorr(CORR *out, CORR *c1, CORR *c2) ; 27 | 28 | double lddip(double *xc) ; 29 | double zdip(double *xc) ; 30 | void setzdipmode(int mode) ; 31 | void setzdphasedmode(int mode) ; 32 | #ifdef __cplusplus 33 | } 34 | #endif 35 | -------------------------------------------------------------------------------- /src/mapfile.c: -------------------------------------------------------------------------------- 1 | /* 2 | * MapFile.c 3 | * 4 | * Created on: Jun 21, 2021 5 | * Author: Cokie; modified by Nick 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "mapfile.h" 19 | #include "admutils.h" 20 | 21 | // Memory mapping by Cokie Parke 22 | 23 | //function returns value of single 2-bit snp, given 24 | //row and col location and genFile object containing data matrix 25 | //return int value 26 | int getgtypemap(genFile *data, int snpRow, int snpCol) 27 | { 28 | char* snps = data->snps; 29 | static long ncall = 0 ; 30 | long rowsize, offset ; 31 | char *buff ; 32 | 33 | ++ncall ; 34 | rowsize = data -> rowSize ; // rowsize is size of a row NOT number of rows 35 | offset = data -> hdrSize ; 36 | 37 | buff = snps + offset + snpRow*rowsize ; // start of row 38 | return rbuff((unsigned char *) buff, snpCol) ; 39 | 40 | } 41 | 42 | //memory maps file with given filename, returns 43 | //genFile struct containing pointer representing file 44 | genFile* constructMap(char* filename, long numrows, long rowSize, long hdrSize) { 45 | genFile* geno; 46 | long siz ; 47 | 48 | //open file to be mapped 49 | int mmfd = open(filename, O_RDWR); 50 | 51 | if (mmfd < 0) fatalx("(constructMap) failure to open %s\n", filename) ; 52 | 53 | //get file length 54 | struct stat status; 55 | if (fstat(mmfd, &status) != 0) fatalx("fstat fails\n") ; 56 | long fileLength = status.st_size; 57 | 58 | siz = numrows * rowSize + hdrSize ; 59 | if (siz != fileLength) fatalx("(constructMqp) file size mismatch\n") ; 60 | 61 | //memory map file 62 | void *mmfaddr = mmap(NULL, fileLength, PROT_READ, MAP_PRIVATE, mmfd, 0); 63 | if (mmfaddr == (void*)-1) { 64 | printf("error number: %d/n", errno); 65 | } 66 | 67 | //close file 68 | close(mmfd); 69 | 70 | //build genFile struct 71 | geno = malloc(sizeof(genFile)); 72 | geno->colSize = numrows ; 73 | geno->rowSize = rowSize; 74 | geno -> hdrSize = hdrSize ; 75 | geno->snps = (char*) mmfaddr; 76 | 77 | printf("mmfaddr: %p fileLength: %ld\n", geno -> snps, fileLength) ;; 78 | fflush(stdout) ; 79 | 80 | return geno; 81 | } 82 | 83 | //unmaps data file contained in given genFile struct 84 | //frees dynamically allocated struct container 85 | void destroyMap(genFile* data) { 86 | void* mmfaddr = (void*) data->snps; 87 | int fileLength = data->colSize * data->rowSize + data -> hdrSize; 88 | munmap(mmfaddr, fileLength); 89 | free(data); 90 | } 91 | -------------------------------------------------------------------------------- /src/mapfile.h: -------------------------------------------------------------------------------- 1 | /* 2 | * MapFile.h 3 | * 4 | * Created on: Jun 21, 2021 5 | * Author: csf 6 | */ 7 | 8 | #ifndef MAPFILE_H_ 9 | #define MAPFILE_H_ 10 | 11 | #include 12 | 13 | typedef struct { //struct containing pointer to genome data file 14 | char* snps; 15 | int rowSize; 16 | int colSize; 17 | int hdrSize ; 18 | } genFile; 19 | 20 | genFile* constructMap(char* fileName, long rowSize, long colSize, long hdrSize); 21 | 22 | //helper function for checking correctness of array, not currently used in main 23 | bool checkArray(long rowSize, long colSize, char* array); 24 | int getgtypemap(genFile *data, int row, int col) ; 25 | 26 | void destroyMap(genFile* data); 27 | 28 | #endif /* MAPFILE_H_ */ 29 | -------------------------------------------------------------------------------- /src/mcmcpars.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | double thp1=1.0, thp2 = 5.0 ; /* params for theta */ 6 | double thxp1=1.0, thxp2 = 10.0 ; /* params for theta X */ 7 | double thxp0 = 40 ; /* cross term */ 8 | /* -1 is default value (=0 logically) */ 9 | 10 | double lp1 = 10.2, lp2 = 2 ; 11 | double lxp1 = 10.2, lxp2 = 2 ; 12 | 13 | double priorlmean = 6 ; 14 | double priorlmsig = 5 ; 15 | /* hyperprior on mean, s.dev for gamma prior on lambda */ 16 | 17 | double qtrbase = 0.0 ; 18 | 19 | double loclip = -20.0 ; 20 | double hiclip = 15.0 ; 21 | 22 | double a1 = 2, b1= 8 ; 23 | double aa2 = 2, bb2 = 14, cc2 = 85 ; 24 | double p1 = 18, psi1 = 3 ; 25 | /* for toys */ 26 | 27 | double muval = 0.0, tmumean = 0.2, muval1 ; 28 | /* for bridge sampler */ 29 | 30 | int pubxindiv = -1 ; 31 | int alkesmode = NO ; 32 | int malexhet = NO ; 33 | int familynames = YES ; 34 | 35 | int decim = 0 , dmindis = 200000, dmaxdis = 500000 ; // decimation parameters 36 | int hashcheck = YES ; 37 | int outputall = NO ; 38 | int sevencolumnped = NO ; 39 | 40 | FILE *fstdetails = NULL; 41 | 42 | #ifdef __cplusplus 43 | } 44 | #endif 45 | -------------------------------------------------------------------------------- /src/nagqp.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | extern int verbose; 12 | 13 | 14 | double scorit (double *www, int n, double *pfix, double *ans); 15 | static int windex, pplen; 16 | static double *ppp, *ppbest, scbest; 17 | void normvec (double *www, int n); 18 | void printmix (); 19 | 20 | double nagopt1 (double lo, double hi, double *plam, int wind); 21 | void nagxfun (double xp, double *nagxval, Nag_Comm * comm); 22 | 23 | void 24 | myfun (Integer n, double *xc, double *fc, Nag_Comm * comm) 25 | { 26 | int nparams = n; 27 | double *params; 28 | double y, yval, yfix; 29 | int i; 30 | 31 | ZALLOC (params, nparams, double); 32 | copyarr (xc, params, n); 33 | 34 | yval = scorit (params, nparams, &yfix, NULL); 35 | 36 | *fc = yval + 1000 * yfix; 37 | //printf("zz %15.9f %15.9f\n", *fc, yfix) ; 38 | 39 | free (params); 40 | 41 | } 42 | 43 | void 44 | simpest (double *params, int n) 45 | { 46 | double oldval, lam, val; 47 | int iter, maxiter = 50, t, k, ind, miniter = 5; 48 | double xlimit = 1.0; // insist on this much improvement 49 | double xdiff, tmp; 50 | 51 | ZALLOC (ppp, n, double); 52 | ZALLOC (ppbest, n, double); 53 | pplen = n; 54 | 55 | copyarr (params, ppp, n); 56 | copyarr (params, ppbest, n); 57 | 58 | scbest = scorit (ppp, n, NULL, NULL); 59 | printf ("simpest: %4d %4d %9.3f\n", 0, -1, scbest); 60 | printmix (); 61 | 62 | for (iter = 1; iter <= maxiter; ++iter) { 63 | oldval = scbest; 64 | for (t = 0; t < n; ++t) { 65 | /** 66 | getvmind(t, &k, &ind) ; 67 | if (ind == 0) continue ; 68 | */ 69 | lam = ppbest[t]; 70 | val = nagopt1 (0.0, 5.0, &lam, t); 71 | if (val < scbest) { 72 | ppbest[t] = lam; 73 | normvec (ppbest, n); 74 | scbest = val; 75 | printf ("simpest: %4d %4d %9.3f\n", iter, t, scbest); 76 | printmix (); 77 | } 78 | } 79 | xdiff = oldval - scbest; 80 | if ((iter >= miniter) && (xdiff < xlimit)) 81 | break; 82 | } 83 | 84 | copyarr (ppbest, params, n); 85 | normvec (params, n); 86 | val = scorit (params, n, NULL, NULL); 87 | printf ("simpest final: %4d %9.3f\n", iter, val); 88 | printmix (); 89 | 90 | 91 | free (ppp); 92 | free (ppbest); 93 | 94 | } 95 | 96 | void 97 | nagopt (double *params, int n) 98 | { 99 | 100 | double val; 101 | Nag_E04_Opt options; 102 | static NagError fail; 103 | double *ycoeffs; 104 | static int ncall = 0; 105 | int nparams = n, i; 106 | double yval, oldyval, tmp; 107 | 108 | if (n == 0) 109 | return; 110 | simpest (params, n); 111 | 112 | ++ncall; 113 | if (ncall == 1) 114 | printf ("calling nag\n"); 115 | e04xxc (&options); 116 | 117 | options.print_level = Nag_NoPrint; 118 | if (verbose) 119 | options.print_level = Nag_Soln; 120 | options.optim_tol = .00001; 121 | options.max_iter = n * 200; 122 | 123 | options.list = FALSE; 124 | 125 | fail.print = TRUE; 126 | verbose = NO; 127 | 128 | ZALLOC (ycoeffs, nparams, double); 129 | copyarr (params, ycoeffs, nparams); 130 | 131 | oldyval = yval = scorit (params, nparams, NULL, NULL); 132 | 133 | /** 134 | printf("nagopt ncall: %d value: %9.3f\n", ncall, yval) ; 135 | 136 | for (i=0; i 5 | #include 6 | #include 7 | #include 8 | 9 | double psi(double x) ; 10 | double tau(double x) ; 11 | void mleg(double a1, double a2, double *p, double *lam) ; 12 | double chisqtail(double x, double df) ; 13 | 14 | void eigx(double *evec, double *eval, 15 | double *mat, int *nval, int n) ; 16 | 17 | void linsolv(double *ans, double *mat, 18 | double *rhs, int n) ; 19 | 20 | 21 | #define YES 1 22 | #define NO 0 23 | 24 | #ifdef __cplusplus 25 | } 26 | #endif 27 | -------------------------------------------------------------------------------- /src/nickhash.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include "mcmcpars.h" 12 | #include "globals.h" 13 | #include "admutils.h" 14 | 15 | #define WVERSION "100" 16 | 17 | extern int verbose ; 18 | int qtmode = NO ; 19 | int dolower = NO ; 20 | 21 | char *iname = NULL ; 22 | 23 | void readcommands(int argc, char **argv) ; 24 | 25 | 26 | int main(int argc, char **argv) 27 | { 28 | 29 | char **names ; 30 | int n, z ; 31 | char ss[10] ; 32 | 33 | // printf("hello world!\n") ; 34 | 35 | readcommands(argc, argv) ; 36 | if (iname == NULL) { 37 | printf("Usage: nickhash -i [-l]\n") ; 38 | return -1 ; 39 | } 40 | 41 | n = numlines(iname) ; 42 | ZALLOC(names, n, char *) ; 43 | n = getlist(iname, names) ; 44 | 45 | z = hasharr(names, n) ; 46 | sprintf(ss, "%08X", z) ; 47 | if (dolower) mklower(ss) ; 48 | printf("%s", ss) ; 49 | 50 | printnl() ; 51 | 52 | return 0 ; 53 | 54 | } 55 | 56 | void readcommands(int argc, char **argv) 57 | 58 | { 59 | int i ; 60 | 61 | while ((i = getopt (argc, argv, "i:lvV")) != -1) { 62 | 63 | switch (i) 64 | { 65 | 66 | case 'i': 67 | iname = strdup(optarg) ; 68 | break; 69 | 70 | case 'v': 71 | printf("version: %s\n", WVERSION) ; 72 | break ; 73 | 74 | case 'V': 75 | verbose = YES ; 76 | break ; 77 | 78 | case 'l': 79 | dolower = YES ; 80 | break ; 81 | 82 | 83 | } 84 | 85 | } 86 | 87 | return ; 88 | 89 | } 90 | -------------------------------------------------------------------------------- /src/nicklib.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | -------------------------------------------------------------------------------- /src/nicksam.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "faidx.h" 3 | 4 | #ifndef NJPSAM 5 | 6 | #define MAXD 100 7 | #define MAXDD 10 8 | #define MAXPOP 299 9 | #define MAXPOPX 300 10 | #define MAXLEN 2000 11 | 12 | typedef struct { 13 | char *faname ; 14 | char *alias ; 15 | char *famask ; 16 | int lopos ; 17 | int hipos ; 18 | int len ; 19 | int rlen ; 20 | int mlen ; 21 | int popnum ; 22 | int isref ; 23 | char *regname ; 24 | char *rstring ; 25 | char *mstring ; 26 | faidx_t *fai ; 27 | faidx_t *faimask ; 28 | } FATYPE ; 29 | 30 | typedef struct { 31 | char qname[MAXLEN] ; 32 | char pop[MAXLEN] ; 33 | int ipop ; 34 | int ilib ; 35 | int ihdr ; 36 | int isamp ; 37 | int flag ; 38 | int isnean ; 39 | int isxw ; 40 | int isancient ; 41 | int strand ; 42 | char rname[MAXLEN] ; 43 | int pos ; 44 | int mapq ; 45 | char cigar[MAXLEN]; 46 | char mrname[MAXLEN] ; // null => rname "=" => rname 47 | int mpos ; 48 | int isize ; 49 | int bonenumber ; 50 | char seq[MAXLEN] ; 51 | char qual[MAXLEN] ; 52 | int iqual[MAXLEN] ; 53 | int diff[MAXLEN] ; 54 | int readlen ; // strlen(seq) = strlen(qual) 55 | char refseq[MAXLEN] ; 56 | int posdiff[MAXLEN] ; 57 | int numposdiff ; 58 | char lib[MAXLEN] ; 59 | char samp[MAXLEN] ; 60 | char rgstring[MAXLEN] ; 61 | int rgcat ; 62 | int mapthresh ; 63 | int lobasequal ; 64 | int ispair ; 65 | int isdup ; 66 | int rckode ; 67 | int isfix ; 68 | double pmdscore ; 69 | char *readbuff ; 70 | int zz[3] ; 71 | void *parent ; 72 | } READ ; 73 | 74 | typedef struct { 75 | char *chrom ; 76 | int cpos ; 77 | int len ; 78 | char *seq ; 79 | int *aqual ; 80 | } AREC ; 81 | 82 | typedef struct { 83 | char refbase ; 84 | int ipop ; 85 | int pos ; 86 | char bases[MAXD] ; 87 | char base ; 88 | char last ; 89 | char next ; 90 | int sbase ; // strand for base 91 | int qual[MAXD] ; 92 | int diff[MAXD] ; 93 | int strand[MAXD] ; 94 | double pmd[MAXD] ; 95 | int depth ; 96 | int bonenumber[MAXD] ; 97 | int libnumber[MAXD] ; 98 | char strandbase[2] ; 99 | int mapq[MAXD] ; 100 | int pickmapq ; 101 | int pickbonenumber ; 102 | int pickindex ; 103 | } BASEP ; 104 | 105 | typedef struct { 106 | char refbase ; 107 | char refnext ; 108 | char reflast ; 109 | int pos ; 110 | BASEP *bp[MAXPOPX] ; 111 | int isprocessed ; 112 | } PILEUP ; 113 | 114 | typedef struct { 115 | int lomapthresh ; 116 | int himapthresh ; 117 | int quallo ; 118 | int qualhi ; 119 | int lobasedepth ; 120 | int hibasedepth ; 121 | int skiplength ; 122 | int ispair ; 123 | int setthresh ; 124 | double pmdlo ; 125 | double pmdhi ; 126 | } THRESH ; 127 | 128 | 129 | typedef struct { 130 | int mq ; 131 | int bq ; 132 | int strand ; 133 | int pp ; // - is 3' end 134 | int hist[16] ; 135 | int hsum ; 136 | int ignore ; 137 | } HIST ; 138 | 139 | typedef struct { 140 | int mq ; 141 | int bq ; 142 | int strand ; 143 | int pp ; // - is 3' end 144 | double deamin[2] ; 145 | int ignore ; 146 | // int flag[6] ; 147 | int hitcount ; 148 | int hist[16] ; 149 | // double bcprob[16] ; 150 | } HISTS ; 151 | 152 | 153 | typedef struct { 154 | int maxmq ; 155 | int maxbq ; 156 | int nump ; 157 | int isfix ; 158 | char *sampname ; 159 | char *bamname ; 160 | char *histname ; 161 | int bighistlen ; 162 | double baserate[6] ; 163 | double basedeamin[6] ; 164 | char deaminbases[5] ; // pos 1 -1 strand 0; 1 -1 strand 1 example: C, G, C, G 165 | int rgcat ; 166 | HISTS ***bighistlist ; 167 | } HISTINFO ; 168 | 169 | 170 | typedef struct { 171 | int kodeindex ; 172 | long hist ; 173 | char *string ; 174 | } KINFO ; 175 | 176 | #endif 177 | #define NJPSAM 178 | 179 | void clearkh(KINFO *kpt) ; 180 | void addkhist(int kode, int val) ; 181 | void addkbhist(int kode, int val) ; 182 | void printkinfo(KINFO **kh, int len) ; 183 | 184 | 185 | 186 | -------------------------------------------------------------------------------- /src/nicksrc/.gitignore: -------------------------------------------------------------------------------- 1 | libnick.a 2 | -------------------------------------------------------------------------------- /src/nicksrc/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009-2016, Broad Institute, Inc. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * 8 | Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | 12 | * 13 | Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | 18 | * 19 | Neither the name Broad Institute, Inc. nor the names of its 20 | contributors may be used to endorse or promote products derived from 21 | this software without specific prior written permission. 22 | 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 24 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 27 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 29 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 32 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE 33 | -------------------------------------------------------------------------------- /src/nicksrc/Makefile: -------------------------------------------------------------------------------- 1 | override CFLAGS += -c -g -p -Wimplicit -fno-common -I./ 2 | 3 | all: libnick.a 4 | 5 | libnick.a: strsubs.o sortit.o vsubs.o statsubs.o linsubs.o getpars.o xsearch.o gauss.o gds.o 6 | ar -r libnick.a $^ 7 | ranlib libnick.a 8 | 9 | clean: 10 | rm -f *.o 11 | rm -f libnick.a 12 | -------------------------------------------------------------------------------- /src/nicksrc/gauss.c: -------------------------------------------------------------------------------- 1 | #include "ranmath.h" 2 | 3 | double 4 | gauss () 5 | { 6 | 7 | /** 8 | Numer alg. in C pp 289 ff 9 | */ 10 | 11 | static int iset = 0; 12 | static double gset; 13 | double v1, v2, rsq, fac; 14 | 15 | if (iset == 1) { 16 | iset = 0; 17 | return gset; 18 | } 19 | 20 | do { 21 | v1 = 2.0 * DRAND2 () - 1.0; 22 | v2 = 2.0 * DRAND2 () - 1.0; 23 | rsq = v1 * v1 + v2 * v2; 24 | } while (rsq >= 1.0 || rsq == 0.0); 25 | 26 | fac = sqrt (-2.0 * log (rsq) / rsq); 27 | gset = v1 * fac; 28 | iset = 1; 29 | return v2 * fac; 30 | 31 | } 32 | 33 | void 34 | gaussa (double *a, int n) 35 | { 36 | int i; 37 | for (i = 0; i < n; i++) 38 | a[i] = gauss (); 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/nicksrc/gcd.c: -------------------------------------------------------------------------------- 1 | long gcdx(long a, long b, long *x, long *y) 2 | { 3 | long x1, y1; // To store results of recursive call 4 | if (a == 0) 5 | { 6 | *x = 0; 7 | *y = 1; 8 | return b; 9 | } 10 | 11 | 12 | // Update x and y using results of recursive 13 | // call 14 | *x = y1 - (b/a) * x1; 15 | *y = x1; 16 | 17 | return gcdx(b%a, a, &x1, &y1) ; 18 | } 19 | 20 | // gcd = a*x + b*y 21 | -------------------------------------------------------------------------------- /src/nicksrc/getpars.h: -------------------------------------------------------------------------------- 1 | typedef struct { 2 | int numpars ; 3 | FILE *fx ; 4 | char **ppars ; 5 | char **pdata ; 6 | } phandle ; 7 | 8 | void writepars(phandle *pp) ; 9 | void closepars(phandle *pp) ; 10 | phandle *openpars(char *fname) ; 11 | 12 | int getlongstring(phandle *pp, char *parname, char **kret) ; 13 | // whole of line 14 | int getstring(phandle *pp, char *parname, char **kret) ; 15 | int getint(phandle *pp, char *parname, int *kret) ; 16 | int getlong(phandle *pp, char *parname, long *kret) ; 17 | int getints(phandle *pp, char *parname, int *aint, int nint) ; 18 | int getintss(phandle *pp, char *parname, int *aint, int *xint) ; 19 | 20 | int getdbl(phandle *pp, char *parname, double *dbl) ; 21 | int getdbls(phandle *pp, char *parname, double *dbl, int ndbl) ; 22 | int getdblss(phandle *pp, char *parname, double *dbl, int *ndbl) ; 23 | int subst(char *outstr, char *instr, char *ins, char *outs) ; 24 | void dostrsub(phandle *pp) ; 25 | int upstring (char *ss) ; 26 | void subcolon(char *ss) ; 27 | 28 | #define MAXFIELD 1000 29 | -------------------------------------------------------------------------------- /src/nicksrc/linmake: -------------------------------------------------------------------------------- 1 | HOMEL=/home/np29 2 | DEBUG_OPTIONS= -g -pg 3 | 4 | IDIR=$(HOMEL)/include 5 | NLIB=$(HOMEL)/lib/nicklib.a 6 | TWTAB=\"$(HOMEL)/tables/twtable\" 7 | ## this MUST be set 8 | 9 | CFLAGS= -c -g -p -Wimplicit -I$(IDIR) 10 | 11 | OBJ=strsubs.o sortit.o vsubs.o statsubs.o linsubs.o getpars.o xsearch.o gauss.o gds.o 12 | 13 | P4=twstats 14 | P4O=twstats.o 15 | 16 | T1=ttt 17 | T1O=ttt.o 18 | 19 | statsubs.o: statsubs.c 20 | cc $(CFLAGS) -DTWTAB=$(TWTAB) -o statsubs.o statsubs.c 21 | 22 | $(P1): $(P1O) 23 | gcc $(DEBUG_OPTIONS) -I $(IDIR) -lm -o $(P1) $(P1O) $(NLIB) 24 | 25 | $(P2): $(P2O) 26 | gcc $(DEBUG_OPTIONS) -I $(IDIR) -lm -o $(P2) $(P2O) $(NLIB) 27 | 28 | 29 | $(P3): $(P3O) 30 | gcc $(DEBUG_OPTIONS) -I$(IDIR) -lm -o $(P3) $(P3O) $(NLIB) 31 | 32 | $(P4): $(P4O) 33 | gcc $(DEBUG_OPTIONS) -I$(IDIR) -lm -o $(P4) $(P4O) $(NLIB) 34 | 35 | $(T1): $(T1O) 36 | gcc $(DEBUG_OPTIONS) -I$(IDIR) -lm -o $(T1) $(T1O) $(NLIB) 37 | 38 | ##libnick.a: $(OBJ) 39 | libnick.a: $(OBJ) 40 | ar -r libnick.a $(OBJ) 41 | 42 | nicklib: dirs libnick.a 43 | cp *.h $(IDIR) 44 | cp libnick.a $(NLIB) 45 | 46 | dirs: 47 | mkdir -p $(HOMEL)/lib 48 | mkdir -p $(HOMEL)/tables 49 | mkdir -p $(HOMEL)/include 50 | 51 | clean: 52 | rm -f *.o 53 | rm -f core 54 | 55 | clobber: clean 56 | rm libnick.a 57 | 58 | -------------------------------------------------------------------------------- /src/nicksrc/linsolve.c: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Linear System Solution by Gauss method 4 | // 5 | // Developer: Henry Guennadi Levkin 6 | // 7 | //============================================================================== 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | //============================================================================== 14 | // return 1 if system not solving 15 | // nDim - system dimension 16 | // pfMatr - matrix with coefficients 17 | // pfVect - vector with free members 18 | // pfSolution - vector with system solution 19 | // pfMatr becames trianglular after function call 20 | // pfVect changes after function call 21 | // 22 | // Developer: Henry Guennadi Levkin 23 | // 24 | //============================================================================== 25 | int LinearEquationsSolving(int nDim, double* pfMatr, double* pfVect, double* pfSolution) 26 | { 27 | double fMaxElem; 28 | double fAcc; 29 | 30 | int i , j, k, m; 31 | 32 | 33 | for(k=0; k<(nDim-1); k++) // base row of matrix 34 | { 35 | // search of line with max element 36 | fMaxElem = fabs( pfMatr[k*nDim + k] ); 37 | m = k; 38 | for(i=k+1; i=0; k--) 76 | { 77 | pfSolution[k] = pfVect[k]; 78 | for(i=(k+1); i 2 | #include 3 | #include 4 | #include 5 | 6 | void bal(double *a, double *b, int n) ; 7 | 8 | /* linear algebra */ 9 | void mulmat(double *a, double *b, double *c, int a1, int a2, int a3) ; 10 | void xmultx(double *a, double *b, int m, int n) ; 11 | void txmulx(double *a, double *b, int m, int n) ; 12 | 13 | int solvit (double *prod, double *rhs,int n, double *ans); 14 | int solvitfix (double *prod, double *rhs, int n, double *ans, int *vfix, double *vvals, int nfix) ; 15 | int oldsolvitfix (double *prod, double *rhs, int n, double *ans, int *vfix, double *vvals, int nfix) ; 16 | double pdinv(double *cinv, double *coeff, int n) ; 17 | 18 | /* numer recipes p 97 */ 19 | double logdet(double *mat, int n) ; 20 | int choldc (double *a, int n, double p[]); 21 | void cholsl (double *a, int n, double p[], double b[], double x[]); 22 | int isposdef (double *a, int n) ; 23 | void cholesky(double *cf, double *a, int n) ; 24 | void pmat(double *mat, int n) ; 25 | void imulmat(int *a, int *b, int *c, int a1, int a2, int a3) ; 26 | int linsolv(int n, double* pfMatr, double* pfVect, double* sol) ; // Developer: Henry Guennadi Levkin 27 | 28 | double qval(double *vv, double *q, double *l, int n) ; 29 | void qgrad(double *grad, double *vv, double *q, double *l, int n) ; 30 | double mquad(double y0, double y1, double y2, double *pmx) ; 31 | double qminpos(double *vv, double *q, double *l, int n) ; 32 | double qminposfix(double *vv, double *q, double *l, int n, int *fixlist, double *fixvals, int nfix) ; 33 | double qminposfixc(double *vv, double *q, double *l, int n, int *fixlist, double *fixvals, int nfix, int *constraint) ; 34 | double qmin(double *vv, double *q, double *l, int n) ; 35 | double qminfix(double *vv, double *q, double *l, int n, int *fixlist, double *fixvals, int nfix) ;; 36 | double qmpc (double *vnew, double *vold, double *q, double *l, int *dead, int level, int *constraint, int n) ; 37 | double qmp (double *vnew, double *vold, double *q, double *l, int *dead, int level, int n) ; 38 | -------------------------------------------------------------------------------- /src/nicksrc/nicklib.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | -------------------------------------------------------------------------------- /src/nicksrc/ranmath.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | 5 | #include 6 | 7 | #define BIGINT INT_MAX 8 | #define SRAND srandom 9 | #define LRAND random 10 | #define DRAND() ( (double) (random() % BIGINT) / (double) (BIGINT)) 11 | #define DRAND2() ( drand2() ) 12 | /* random must return random integer in range 0 to BIGINT-1 */ 13 | 14 | 15 | #define NORMAL gauss 16 | 17 | double gauss() ; // standard normal 18 | void gaussa(double *a, int n) ; // array of standard normals 19 | double gds(double a) ; // obsolete 20 | double poidev(double mean) ; // obsolete 21 | double ranpoiss(double mean) ; // poisson Note double 22 | double ranpoissx(double mean) ; // poisson | > 0 23 | void ranperm(int *a, int n) ; // randomly permute a; if random permulation wanted : idperm(a,n) ; ranperm(a,n) 24 | double ranexp( void) ; // exponential mean 1 25 | double rangam(double a) ; // standard gamma mean a 26 | int randis(double *a, int n) ; // element from discrete distribution a 27 | void ransamp(int *samp, int nsamp, double *p, int plen) ; // sample nsamp samples from p 28 | void pick2(int n, int *k1, int *k2) ; // pick 2 elements from 0..n-1 29 | int ranmod(int n) ; // random mod n 30 | int iranpick(int lo, int hi) ; // random int in [lo, hi] 31 | double ranbeta(double a, double b) ; // beta 32 | int ranbinom(int n, double p) ; // binomial 33 | void setrand(double *vv, int n) ; // filll vv with U[0,1] 34 | int ewens(int *a, int n, double theta) ; // ewens sampling formula 35 | void genmultgauss(double *rvec, int num, int n, double *covar) ; // multivariate 36 | double drand2() ; 37 | void ranmultinom(int *samp, int n, double *p, int len) ; // multinomial 38 | double ranchi (int d) ; // chisq d dof. 39 | void raninvwis(double *wis, int t, int d, double *s) ; // inverse wishart 40 | double uniform(double lo, double hi) ; // uniform (lo..hi) 41 | void ransimplex(double *x, int n) ; // uniform on n-simplex 42 | void randirichlet(double *x, double *pp, int n) ; // dirichlet parameter vector pp 43 | void randirmult(double *pp, int *aa, int len, int m) ; // dirichlet multinomial. Output aa 44 | int prob1(double p) ; // return YES with probability p 45 | double rant(double df) ; // t distribution 46 | double samppow(double e, double a, double b) ; 47 | double rejnorm(double lo, double hi) ; // usually call ranboundnorm 48 | double ranboundnorm(double lo, double hi) ; // sample standard normal in [lo, hi] 49 | double rtrunc2(double T) ; // sample standard normal > T Rayleigh rejection 50 | double rantruncnorm(double T, int upper) ; // sample standard normal > T (upper =1) < T (upper = 0) 51 | int ranhprob(int n, int a, int m) ; // n balls a block sample m . Fow many black 52 | double rangeom (double theta) ; // Geometric distribution, mean 1/theta 53 | -------------------------------------------------------------------------------- /src/nicksrc/sortit.h: -------------------------------------------------------------------------------- 1 | 2 | void sortit(double *a, int *ind, int len) ; 3 | double median (double *a, int n) ; 4 | int compit (int *a1, int *a2) ; 5 | void isortit(int *a, int *ind, int len) ; 6 | void lsortit(long *a, int *ind, int len) ; 7 | int icompit (int *a1, int *a2) ; 8 | int lcompit (int *a1, int *a2) ; 9 | void invperm(int *a, int *b, int n) ; 10 | int ipcompit (int *a1, int *a2) ; 11 | int comparr(double *a, double *b, int len) ; 12 | int compiarr(int *a, int *b, int len) ; 13 | int complarr(long *a, long *b, int len) ; 14 | void ipsortit(int **a, int *ind, int len, int rlen) ; 15 | void ipsortitp(int **a, int *ind, int len, int rlen, int *pp) ; 16 | void setorder (int *pp, int rlen) ; 17 | void mkirank(int *rank, int *xin, int n) ; 18 | void mkrank(int *rank, double *xin, int n) ; 19 | void sortstrings (char **a, int *ind, int len) ; 20 | int scompit (int *a1, int *a2) ; 21 | 22 | -------------------------------------------------------------------------------- /src/nicksrc/xsearch.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int xfindit(char *ss) ; 4 | int xloadsearchx(char **ss, int n) ; 5 | int finddup(char **ss, int n) ; 6 | int finddupalt(char **ss, int n) ; 7 | void xloadsearch(char **ss, int n) ; 8 | void xdestroy() ; 9 | 10 | void xhcreate (int n) ; 11 | void xhinit (int n) ; 12 | void xhdestroy() ; 13 | ENTRY *xhsearch(ENTRY item, ACTION act) ; 14 | 15 | int xlookup(char *key, ACTION act) ; 16 | void xstore(char *key, int val) ; 17 | int xhash (char *key) ; 18 | int xhash1(int ww) ; 19 | int xhash2 (int x) ; 20 | int xcshift(int x, int shft) ; 21 | int stringhash(char *key) ; 22 | void setfancyhash(int val) ; 23 | int getfancyhash() ; 24 | long xlhash (long x) ; 25 | int xshash (int x) ; 26 | int fnv_hash(char *strng) ; 27 | void dumpxs() ; 28 | 29 | #define FNV_PRIME 0x01000193 30 | #define FNV_OFFSET_BASIS 0x811c9dc5 31 | 32 | -------------------------------------------------------------------------------- /src/packit.h: -------------------------------------------------------------------------------- 1 | int packmode = NO ; 2 | unsigned char *packgenos = NULL ; 3 | long packlen = 0 ; 4 | long rlen = -1 ; 5 | int rdismode = NO ; 6 | unsigned char *packepath ; 7 | 8 | -------------------------------------------------------------------------------- /src/qpgsubs.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | #ifdef __cplusplus 5 | } 6 | #endif 7 | -------------------------------------------------------------------------------- /src/qsubs.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | #include "admutils.h" 5 | void setccr(double aa, double bb, double *qrho, double *qtr, int numrisks) ; 6 | void setqscores(Indiv *indx, double *gg, double *qsc) ; 7 | void calcbmv(double aa, double bb, double *mean, double *var) ; 8 | void setqcox(double *qcc, double aa, double bb, 9 | double qrho, double qtrisk) ; 10 | void setqco(double *qc, double aa, double bb) ; 11 | void setqscores(Indiv *indx, double *gg, double *qsc) ; 12 | void freecc() ; 13 | double genqval(double *qcc, double theta, double mu, int a) ; 14 | double lnordens(double xval, double sig) ; 15 | double qqlike(double qval, double *cc, double theta, double mu, int a) ; 16 | void probit(double *xout, double *xin, int n) ; 17 | void doprobit(Indiv **indm, int numind) ; 18 | double qqltot(Indiv **indivmarkers, int numindivs, double *ccr, double qmu) ; 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | -------------------------------------------------------------------------------- /src/regsubs.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | double regressit(double *ans, double *eq, double *rhs, int m, int n) ; 11 | void regressitall(char **vname, double *eq, double *rhs, int m, int n) ; 12 | void add1(int *a, int *b, int n) ; 13 | 14 | void ptoz(double *p, double *z, int n) ; 15 | void ztop(double *p, double *z, int n) ; 16 | double logregressit(double *ans, double *eq, double **rhs, int neq, int nv) ; 17 | double logrscore(double *eq, double **rhs, int neq, int nv) ; 18 | 19 | void calcgh(double *grad, double *hess, double *eq, double *z, 20 | double *n0, double *n1, int neq, int nv) ; 21 | 22 | double zlike(double *eq, double *n0, double *n1, 23 | double *ans, int neq, int nv) ; 24 | 25 | void squish(double *xmat, double *mat, int nrow, int oldc, int newc) ; 26 | void rcsquish (double *xmat, double *mat, int *cols, int oldn, int newn) ; 27 | 28 | void 29 | calcres(double *res, double *ans, double *eq, double *rhs, 30 | int neq, int nv) ; 31 | 32 | #ifdef __cplusplus 33 | } 34 | #endif 35 | double qwmax (double *co, double *ans, int n) ; 36 | void hgrad(double ff(double *xx, int nn), double *x, int n, double step, double *fval, double *grad, double *hess) ; 37 | void xline(double *line, double *coeff, int m, int n) ; 38 | -------------------------------------------------------------------------------- /src/script/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | build 3 | nick_hash.c 4 | nick_hash.html 5 | nick_hash.cpython*.so 6 | -------------------------------------------------------------------------------- /src/script/convertf.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import subprocess 3 | import tempfile 4 | from pathlib import Path 5 | 6 | def buildParFile(fileName, _input, _output, _format, hashCheck = False, **options): 7 | """Builds parameter file for convertf""" 8 | parFileText = f'''genotypename: {_input}.geno 9 | snpname: {_input}.snp 10 | indivname: {_input}.ind 11 | outputformat: {_format} 12 | genotypeoutname: {_output}.geno 13 | snpoutname: {_output}.snp 14 | indivoutname: {_output}.ind''' 15 | if not hashCheck: 16 | parFileText += f'\nhashcheck: NO' 17 | for option, value in options.items(): 18 | parFileText += f'\n{option}: {value}' 19 | parFileText += '\n' 20 | 21 | with open(fileName, 'w') as f: 22 | f.write(parFileText) 23 | 24 | # par_directory gives the option of using a permanent directory instead of a temporary one 25 | # This is if you want to save the par file. 26 | def convertf(executable_directory, input_stem, output_stem, geno_format, par_directory=None, **options): 27 | executable = Path(executable_directory) / 'convertf' 28 | with tempfile.TemporaryDirectory() as temp_directory: 29 | directory = par_directory if par_directory else temp_directory 30 | temp_par = str(Path(directory) / 'convertf.par') 31 | buildParFile(temp_par, input_stem, output_stem, geno_format, True, **options) 32 | subprocess.run([str(executable), '-p', temp_par], check=True) 33 | 34 | if __name__ == "__main__": 35 | parser = argparse.ArgumentParser(description="Wrapper of Nick's convertf utility") 36 | 37 | parser.add_argument('--convertf_dir', help="directory containing convertf", default=Path(__file__).resolve().parent.parent) # executable is up one level 38 | parser.add_argument('-i', '--input_stem', help="Input stem for geno,ind,snp files", required=True) 39 | parser.add_argument('-o', '--output_stem', help="Output stem for geno,ind,snp files", required=True) 40 | parser.add_argument('-f', '--geno_format', help="Format for output genotype file", choices=['eigenstrat', 'packedancestrymap', 'transpose_packed', 'packedped'], default='transpose_packed') 41 | parser.add_argument('-d', '--directory', help="par file working directory", default=None) 42 | parser.add_argument('options', nargs='*') 43 | 44 | args = parser.parse_args() 45 | 46 | keys = args.options[::2] 47 | values = args.options[1::2] 48 | if len(keys) != len(values): 49 | raise ValueError('additional options need to be key-value pairs') 50 | option_dictionary = dict(zip(keys, values)) 51 | 52 | convertf(args.convertf_dir, args.input_stem, args.output_stem, args.geno_format, args.directory, **option_dictionary) 53 | -------------------------------------------------------------------------------- /src/script/geno_header.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | import re 4 | 5 | def geno_header(filename, individual_hash=None, snp_hash=None, header_size=48, **kwargs): 6 | mode = 'rb' 7 | rewrite_header = individual_hash is not None or snp_hash is not None 8 | if rewrite_header: 9 | mode = 'rb+' 10 | with open(filename, mode) as f: 11 | original_header = f.read(header_size) 12 | 13 | truncated_header = original_header.decode('utf-8').strip('\x00') 14 | fields = truncated_header.split() 15 | file_format = fields[0] 16 | if file_format != 'GENO' and file_format != 'TGENO': 17 | raise ValueError(f'Unsupported file format {file_format}. Only GENO and TGENO are supported.') 18 | 19 | num_samples = int(fields[1]) 20 | num_snps = int(fields[2]) 21 | individual_hash_int = int(fields[3], 16) 22 | snp_hash_int = int(fields[4], 16) 23 | 24 | print(truncated_header) 25 | #print(len(truncated_header)) 26 | 27 | if rewrite_header: 28 | if individual_hash is not None: 29 | individual_hash_int = int(individual_hash, 16) 30 | if snp_hash is not None: 31 | snp_hash_int = int(snp_hash, 16) 32 | 33 | header_string = f'{file_format} {num_samples:7d} {num_snps:7d} {individual_hash_int:x} {snp_hash_int:x}' 34 | 35 | print(header_string) 36 | header = bytes(header_string, 'utf-8') 37 | bytes_to_pad = header_size - len(header) 38 | if bytes_to_pad < 0: 39 | raise ValueError(f'Required header size is too big {len(header)} for fixed size header {header_size}') 40 | else: 41 | f.seek(0) 42 | f.write(header) 43 | # pad with null 44 | if bytes_to_pad > 0: 45 | f.write(bytes('\x00' * bytes_to_pad, 'utf-8')) 46 | return file_format, num_samples, num_snps, f'{individual_hash_int:x}', f'{snp_hash_int:x}' 47 | 48 | def add_to_kwargs(kwargs, args, property_name): 49 | try: 50 | value = getattr(args, property_name) 51 | kwargs[property_name] = value 52 | except AttributeError: 53 | pass 54 | 55 | if __name__ == "__main__": 56 | parser = argparse.ArgumentParser(description="Inspect a genotype header file. Optionally replace the individual or SNP file hashes in place.") 57 | 58 | parser.add_argument('geno', help='Genotype file to have its header inspected or overwritten. File should be in packed ancestry map or transpose packed format.') 59 | parser.add_argument('-i', '--individual_hash', help="hex string to replace Nick's 32 bit hash of the individuals from the ind file") 60 | parser.add_argument('-s', '--snp_hash', help="hex string to replace Nick's 32 bit hash of the snp file") 61 | parser.add_argument('-n', '--header_size', type=int, default=48, help="size of header to overwrite") 62 | 63 | args = parser.parse_args() 64 | 65 | kwargs = {} 66 | for argument_name in ['individual_hash', 'snp_hash', 'header_size']: 67 | add_to_kwargs(kwargs, args, argument_name) 68 | 69 | geno_header(args.geno, **kwargs) 70 | -------------------------------------------------------------------------------- /src/script/grepcol: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | ($col, $grepstr) = @ARGV ; 4 | 5 | die "Usage: grepcol col val < in >out" unless defined ($grepstr) ; 6 | 7 | $num = 0 ; 8 | for (;;) { 9 | last if (eof(STDIN)) ; 10 | $line = ; 11 | chomp $line ; 12 | @XX = split " ", $line ; 13 | next unless(defined $XX[$col]) ; 14 | if ($XX[0] =~ /^\#/) { 15 | print "$line\n" ; 16 | next ; 17 | } 18 | $tcol = $col ; 19 | if ($col < 0) { 20 | $n = @XX ; 21 | $tcol = $n + $col ; 22 | } 23 | next unless ($XX[$tcol] eq $grepstr) ; 24 | print "$line\n" ; 25 | ++$num ; 26 | } 27 | print STDERR "$num lines written\n" ; 28 | -------------------------------------------------------------------------------- /src/script/grepcolf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | ($col, $file) = @ARGV ; 4 | die "usage grepcolf col file" unless defined($file) ; 5 | 6 | %X = () ; 7 | open (FF, $file) || die "can't open $file\n" ; 8 | @L = () ; 9 | chomp @L ; 10 | foreach $line (@L) { 11 | ($a) = split " ", $line ; 12 | next unless (defined $a) ; 13 | next if ($a =~ /\#/) ; 14 | $X{$a} = 1 ; 15 | } 16 | $num = 0 ; 17 | foreach $line () { 18 | chomp $line ; 19 | @Z = split " ", $line ; 20 | $a = $Z[0] ; 21 | next unless (defined $a) ; 22 | next if ($a =~ /\#/) ; 23 | $b = $Z[$col] ; 24 | next unless (defined $b) ; 25 | next unless (defined $X{$b}) ; 26 | print "$line" ; 27 | print "\n" ; 28 | ++$num ; 29 | } 30 | print STDERR "$num lines written\n" ; 31 | -------------------------------------------------------------------------------- /src/script/hash_checks.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | from geno_header import geno_header 4 | from pathlib import Path 5 | from nick_hash_file import ind_hash, snp_hash 6 | 7 | # returns True if hash checks pass 8 | def hash_checks(geno_file, ind_file, snp_file): 9 | individual_file_hash = ind_hash(ind_file) 10 | snp_file_hash = snp_hash(snp_file) 11 | 12 | file_format, num_samples, num_snps, geno_ind_hash, geno_snp_hash = geno_header(geno_file) 13 | mismatch = False 14 | if individual_file_hash != geno_ind_hash: 15 | raise ValueError(f'individual hash mismatch {individual_file_hash} {geno_ind_hash}') 16 | if snp_file_hash != geno_snp_hash: 17 | raise ValueError(f'snp hash mismatch {snp_file_hash} {geno_snp_hash}') 18 | 19 | if __name__ == "__main__": 20 | parser = argparse.ArgumentParser(description="Inspect ind and snp hashes for genotype file.") 21 | 22 | parser.add_argument('geno', help='Genotype file for hash checks. File should be in packed ancestry map or transpose packed format.') 23 | parser.add_argument('-i', '--individual_file', help='Only needed if file name stem is different or filename does not end in .ind') 24 | parser.add_argument('-s', '--snp_file', help='Only needed if file name stem is different or filename does not end in .snp') 25 | 26 | args = parser.parse_args() 27 | 28 | geno_path = Path(args.geno) 29 | parent_path = geno_path.resolve().parent 30 | 31 | individual_file = args.individual_file if args.individual_file else (parent_path / (geno_path.stem + '.ind')) 32 | snp_file = args.snp_file if args.snp_file else (parent_path / (geno_path.stem + '.snp')) 33 | 34 | hash_checks(args.geno, individual_file, snp_file) 35 | print('hashes match') 36 | -------------------------------------------------------------------------------- /src/script/link_identical_files.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | import subprocess 4 | import sys 5 | 6 | def hash_program(f): 7 | hash_function = 'sha256sum' 8 | result = subprocess.run([hash_function, f], check=True, universal_newlines=True, stdout=subprocess.PIPE) # for python 3.7, universal_newlines -> text 9 | hash_value, filename = result.stdout.strip().split() 10 | return hash_value 11 | 12 | def link_identical_files(reference, to_compare, verbose=False): 13 | reference_hash = hash_program(reference) 14 | reference_path = Path(reference) 15 | if verbose: 16 | print(f'reference hash {reference_hash}', file=sys.stderr) 17 | for f in to_compare: 18 | comparison_hash = hash_program(f) 19 | if verbose: 20 | print(f'comparison hash {comparison_hash}', file=sys.stderr) 21 | if reference_hash == comparison_hash: 22 | print(f'symlinking {f}', file=sys.stderr) 23 | path = Path(f) 24 | path.unlink() 25 | path.symlink_to(reference_path) 26 | 27 | if __name__ == "__main__": 28 | parser = argparse.ArgumentParser(description="Compare file(s) to a reference, and if the same, then symbolic link compared files. This is originally designed for snp files, which are output upon every pulldown") 29 | 30 | parser.add_argument('-r', '--ref', required=True, help="Reference file") 31 | parser.add_argument('-v', '--verbose', action='store_true', help="verbose: print hashes") 32 | parser.add_argument('files', nargs='*', help='Files to compare to reference') 33 | 34 | args = parser.parse_args() 35 | 36 | link_identical_files(args.ref, args.files, args.verbose) 37 | -------------------------------------------------------------------------------- /src/script/mktmp: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | ($lo, $tag) = @ARGV ; 4 | 5 | $tag = "tt" unless (defined $tag) ; 6 | 7 | if (defined $ENV{'STMP'}) { 8 | $E = $ENV{'STMP'} ; 9 | } 10 | 11 | else { 12 | $E="/tmp" ; 13 | } 14 | 15 | printf "%s\n", "$E/$tag:$lo.$$" ; 16 | 17 | -------------------------------------------------------------------------------- /src/script/nick_hash_file.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import subprocess 3 | from pathlib import Path 4 | 5 | inferred_executable = Path(__file__).resolve().parent.parent / 'nickhash' 6 | 7 | # Nick's implementation hashes the first column and ignores everything else 8 | def hash_single_file(filename, executable=inferred_executable): 9 | result = subprocess.run([executable, '-i', filename], check=True, stdout=subprocess.PIPE, universal_newlines=True) 10 | hash_str = result.stdout.strip() 11 | hash_value = int(hash_str, 16) 12 | return f'{hash_value:x}' 13 | 14 | def ind_hash(filename, executable=inferred_executable): 15 | return hash_single_file(filename, executable) 16 | 17 | def snp_hash(filename, executable=inferred_executable): 18 | return hash_single_file(filename, executable) 19 | 20 | if __name__ == "__main__": 21 | parser = argparse.ArgumentParser(description="Compute Nick's custom hash on a ind or snp file.") 22 | parser.add_argument('--exe', help=f"path to nickhash executable", default=inferred_executable) 23 | parser.add_argument('files', help="Individual (ind) file to compute hash", nargs='+') 24 | 25 | args = parser.parse_args() 26 | 27 | for f in args.files: 28 | hash_value = hash_single_file(f, args.exe) 29 | print(f'{f}\t{hash_value}') 30 | -------------------------------------------------------------------------------- /src/script/read_group_categories.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import subprocess 3 | import sys 4 | 5 | def is_read_group_line(line): 6 | return line.startswith('@RG') 7 | 8 | def samtools_read_group_header(bam_file): 9 | result = subprocess.run(['samtools', 'view', '-H', bam_file], stdout=subprocess.PIPE, check=True, universal_newlines=True) 10 | read_group_lines = filter(is_read_group_line, result.stdout.split('\n')) 11 | return read_group_lines 12 | 13 | # we are only interested in the read group id and library parts of the read group header 14 | def parse_id_and_library(read_group_line): 15 | read_group_id = None 16 | library = None 17 | 18 | fields = read_group_line.split() 19 | for field in fields: 20 | if field.startswith('ID:'): 21 | if read_group_id: 22 | raise ValueError(f'multiple read group id fields {read_group_id}, {field}') 23 | read_group_id = field[3:] 24 | elif field.startswith('LB:'): 25 | if library: 26 | raise ValueError(f'multiple library id fields {library}, {field}') 27 | library = field[3:] 28 | return read_group_id, library 29 | 30 | 31 | def samtools_read_group_ids_and_libraries(bam_file): 32 | return [parse_id_and_library(line) for line in samtools_read_group_header(bam_file)] 33 | 34 | def output_lines(output_filename, lines): 35 | if output_filename is not None and len(output_filename) > 0: 36 | with open(output_filename, 'w') as f: 37 | for line in lines: 38 | print(line, file=f) 39 | else: 40 | for line in lines: 41 | print(line) 42 | 43 | def read_group_categories(bam, output_filename, restrict_to=None): 44 | lines = [] 45 | for read_group_id, library in samtools_read_group_ids_and_libraries(bam): 46 | if restrict_to is None or read_group_id in restrict_to: 47 | lines.append(f'{read_group_id} {len(lines)}') 48 | output_lines(output_filename, lines) 49 | return len(lines) 50 | 51 | def read_group_categories_by_library(bam, output_filename, restrict_to=None): 52 | libraries = {} 53 | lines = [] 54 | for read_group_id, library in samtools_read_group_ids_and_libraries(bam): 55 | if restrict_to is None or read_group_id in restrict_to: 56 | if library not in libraries: 57 | libraries[library] = len(libraries) 58 | lines.append(f'{read_group_id} {libraries[library]}') 59 | output_lines(output_filename, lines) 60 | return len(libraries) 61 | 62 | if __name__ == "__main__": 63 | parser = argparse.ArgumentParser(description="""Divide the readgroups for a bam into separate categories for treatment by histmake. 64 | Default uses one category per read group. 65 | This requires samtools. 66 | """) 67 | 68 | parser.add_argument('bam', help='bam to examine read groups') 69 | parser.add_argument('-o', '--output_file', help='Write output to this file. If None, write to stdout') 70 | parser.add_argument('-l', '--library', action='store_true', help='Combine read groups for the same LB library into the same stack') 71 | parser.add_argument('-r', '--restrict', nargs='+', help='Use only these read groups ids') 72 | 73 | args = parser.parse_args() 74 | 75 | if args.library: 76 | read_group_categories_by_library(args.bam, args.output_file, args.restrict) 77 | else: 78 | read_group_categories(args.bam, args.output_file, args.restrict) 79 | -------------------------------------------------------------------------------- /src/script/xtractcol: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | ($COLS, $xflag) = @ARGV ; 4 | 5 | die "no columns\n" unless (defined $COLS) ; 6 | if ($COLS =~ /:/) { 7 | @CC = split ":", $COLS ; 8 | } 9 | else { 10 | $CC[0] = $COLS ; 11 | } 12 | 13 | for (;;) { 14 | last if (eof(STDIN)) ; 15 | $line = ; 16 | chomp $line ; 17 | @XX = split " ", $line ; 18 | if (!defined $XX[0]) {print "$line\n" if ($xflag); next; } ## skip blanks 19 | $f = $XX[0] ; 20 | if ($f =~ /\#/) {print "$line\n" if ($xflag); next; } 21 | $ncc = @CC ; 22 | foreach $x (1..$ncc) { 23 | $nn = $CC[$x-1] ; 24 | $n = $nn ; 25 | $xx = @XX ; 26 | $n = $xx + $nn if ($nn<0) ; ## -1 is last column 27 | die "bad col $n :: $line\n" unless (defined $XX[$n]) ; 28 | print "$XX[$n]" ; 29 | print " " unless ($x == $ncc) ; 30 | } 31 | print "\n" ; 32 | } 33 | -------------------------------------------------------------------------------- /src/simpjack2.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | static int verbose = NO ; 8 | #define VERSION "121" 9 | 10 | // -m flag now works (first col 0-n, or 1-n + -m flag 11 | 12 | #define N 2 13 | #define NX 3 14 | 15 | char *iname = "outf3" ; 16 | double xmean = -1.0e20 ; 17 | int vflag = NO ; 18 | 19 | void readcommands(int argc, char **argv) ; 20 | 21 | int main(int argc, char **argv) 22 | { 23 | 24 | #define MAXSTR 512 25 | #define MAXFF 10 26 | #define MAXNUM 60000 27 | #define MAXBLOCKS 10000 28 | #define BSIZE 5000000 29 | 30 | char line[MAXSTR] ; 31 | int lastc, lastb ; 32 | char *sx ; 33 | char *hpos ; 34 | char *spt[MAXFF] ; 35 | int slo = 0, shi = 19, klo = 20, khi = 29, clo = 30, chi = 49 ; 36 | int nsplit , n, t, cc ; 37 | FILE *fff ; 38 | 39 | int chrom, pos, bnum ; 40 | 41 | double wjack[MAXBLOCKS] ; 42 | double jmean[MAXBLOCKS] ; 43 | double jwt[MAXBLOCKS] ; 44 | 45 | int numblocks = 0, block ; 46 | int num, k, nline, x, xmax ; 47 | double wt, val, twt ; 48 | double y1, y2, est, sig, mean, tnum, tden ; 49 | double **xx, *xvals, *xwt ; 50 | int *xind ; 51 | 52 | readcommands(argc, argv) ; 53 | /** 54 | iname has 3 columns 55 | block # weight jackknife value. 56 | First row (block 0) is global mean 57 | */ 58 | 59 | openit(iname, &fff, "r") ; 60 | 61 | nline = numlines(iname) ; 62 | xx = initarray_2Ddouble(3, nline, 0) ; 63 | bnum = nline = getxx(xx, nline, 3, iname) ; 64 | 65 | ZALLOC(xwt, nline+1, double) ; 66 | ZALLOC(xvals, nline+1, double) ; 67 | ZALLOC(xind, nline+1, int) ; 68 | 69 | xind[0] = -1 ; 70 | 71 | if (xmean > -1.0e15) { 72 | xwt[0] = 0 ; 73 | mean = xvals[0] = xmean ; 74 | xind[0] = 0 ; 75 | } 76 | for (k=0; knline)) { 79 | fatalx("bad index %d %d\n", x, nline) ; 80 | } 81 | xind[x] = x ; 82 | xwt[x] = xx[1][k] ; 83 | xvals[x] = xx[2][k] ; 84 | xmax = x ; 85 | } 86 | 87 | t = xind[0] ; 88 | if (t!=0) fatalx("bad global row %d\n", t) ; 89 | 90 | mean = xvals[0] ; 91 | 92 | for (k=1; k<=xmax; ++k) { 93 | jmean[k-1] = xvals[k] ; 94 | jwt[k-1] = xwt[k] ; 95 | } 96 | 97 | 98 | weightjack(&est, &sig, mean, jmean, jwt, xmax) ; 99 | 100 | if (vflag == NO) { 101 | printf("## simpjack2: %s ", iname) ; 102 | printf("%12.6f %12.6f %12.6f %9.3f", est, mean, sig, est/sig) ; 103 | printnl() ; 104 | } 105 | 106 | else { 107 | printf("mean diff(a-b): %12.6f\n", mean) ; 108 | printf("Jackknife mean est: %12.6f\n", est) ; 109 | printf("std. err. %12.6f\n", sig) ; 110 | printf("Z: %9.3f\n", est/(sig+1.0e-20)) ; 111 | } 112 | 113 | return 0 ; 114 | } 115 | 116 | 117 | void readcommands(int argc, char **argv) 118 | 119 | { 120 | int i,haploid=0; 121 | 122 | while ((i = getopt (argc, argv, "i:m:v")) != -1) { 123 | 124 | switch (i) 125 | { 126 | 127 | case 'i': 128 | iname = strdup(optarg) ; 129 | break; 130 | 131 | case 'm': 132 | xmean = atof(optarg) ; 133 | break; 134 | 135 | case 'v': 136 | vflag = YES ; 137 | break; 138 | 139 | } 140 | 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /src/test/H.geno: -------------------------------------------------------------------------------- 1 | 0 2 | 0 3 | 0 4 | 0 5 | 1 6 | 1 7 | 1 8 | 1 9 | 2 10 | 9 11 | 9 12 | 1 13 | 2 14 | 1 15 | 1 16 | 2 17 | 9 18 | -------------------------------------------------------------------------------- /src/test/H.ind: -------------------------------------------------------------------------------- 1 | H M zz 2 | -------------------------------------------------------------------------------- /src/test/J.geno: -------------------------------------------------------------------------------- 1 | 2 2 | 1 3 | 2 4 | 1 5 | 1 6 | 1 7 | 9 8 | 0 9 | 9 10 | 9 11 | 9 12 | 9 13 | 2 14 | 1 15 | 2 16 | 1 17 | 9 18 | -------------------------------------------------------------------------------- /src/test/J.ind: -------------------------------------------------------------------------------- 1 | J U LM 2 | -------------------------------------------------------------------------------- /src/test/K.geno: -------------------------------------------------------------------------------- 1 | 9 2 | 9 3 | 9 4 | 9 5 | 9 6 | 9 7 | 9 8 | 9 9 | 2 10 | 9 11 | 9 12 | 9 13 | 9 14 | 9 15 | 9 16 | 2 17 | 9 18 | -------------------------------------------------------------------------------- /src/test/K.ind: -------------------------------------------------------------------------------- 1 | K M ex 2 | -------------------------------------------------------------------------------- /src/test/ignore/expected.geno: -------------------------------------------------------------------------------- 1 | 02 2 | 01 3 | 02 4 | 01 5 | 11 6 | 11 7 | 19 8 | 10 9 | 29 10 | 99 11 | 99 12 | 19 13 | 22 14 | 11 15 | 12 16 | 21 17 | 99 18 | -------------------------------------------------------------------------------- /src/test/ignore/expected.ind: -------------------------------------------------------------------------------- 1 | H M zz 2 | J U LM 3 | -------------------------------------------------------------------------------- /src/test/ignore/start.geno: -------------------------------------------------------------------------------- 1 | 0101290 2 | 0002190 3 | 2002290 4 | 1002190 5 | 1011191 6 | 2212192 7 | 9219991 8 | 0210092 9 | 0920921 10 | 1190992 11 | 2292992 12 | 1210991 13 | 1120299 14 | 1110190 15 | 0210299 16 | 9920122 17 | 1299999 18 | -------------------------------------------------------------------------------- /src/test/ignore/start.ind: -------------------------------------------------------------------------------- 1 | F F Ignore 2 | G F Ignore 3 | H M zz 4 | I U Ignore 5 | J U LM 6 | K M Ignore 7 | L F Ignore 8 | -------------------------------------------------------------------------------- /src/test/ignore/start.snp: -------------------------------------------------------------------------------- 1 | 1_9484678 1 0.185784 9484678 A T 2 | rs376694687 1 2.371394 227538701 A C 3 | rs13403822 2 1.811318 172903124 T C 4 | rs16834024 3 1.344171 122872153 G A 5 | rs72674783 4 1.299763 125143064 A C 6 | rs58422659 5 1.394293 136197950 C A 7 | 6_134035567 6 1.366794 134035567 T G 8 | rs12534938 7 1.809543 155765548 C T 9 | rs10961597 9 0.321297 14584627 G T 10 | rs77431051 10 1.123419 89473434 C T 11 | rs2159354 12 0.120725 4824503 G C 12 | rs148724525 13 0.841040 91169638 T C 13 | rs72758108 15 1.121528 95408319 G A 14 | rs114826028 18 0.413575 19641425 G A 15 | rs59820829 21 0.252528 28173544 C T 16 | rs143560832 23 0.859405 78419160 A G 17 | rs112571135 24 0 13491583 T C 18 | -------------------------------------------------------------------------------- /src/test/test1.geno: -------------------------------------------------------------------------------- 1 | 01290 2 | 00129 3 | 90012 4 | 29001 5 | 12900 6 | 00000 7 | 11111 8 | 22222 9 | 99999 10 | 92100 11 | 09210 12 | 00921 13 | 10092 14 | 21009 15 | 00000 16 | 21101 17 | 22222 18 | -------------------------------------------------------------------------------- /src/test/test1.ind: -------------------------------------------------------------------------------- 1 | A M AG 2 | B F BG 3 | C U zz 4 | D M zz 5 | E F zz 6 | -------------------------------------------------------------------------------- /src/test/test1.snp: -------------------------------------------------------------------------------- 1 | 1_9484678 1 0.185784 9484678 A T 2 | rs376694687 1 2.371394 227538701 A C 3 | rs13403822 2 1.811318 172903124 T C 4 | rs16834024 3 1.344171 122872153 G A 5 | rs72674783 4 1.299763 125143064 A C 6 | rs58422659 5 1.394293 136197950 C A 7 | 6_134035567 6 1.366794 134035567 T G 8 | rs12534938 7 1.809543 155765548 C T 9 | rs10961597 9 0.321297 14584627 G T 10 | rs77431051 10 1.123419 89473434 C T 11 | rs2159354 12 0.120725 4824503 G C 12 | rs148724525 13 0.841040 91169638 T C 13 | rs72758108 15 1.121528 95408319 G A 14 | rs114826028 18 0.413575 19641425 G A 15 | rs59820829 21 0.252528 28173544 C T 16 | rs143560832 23 0.859405 78419160 A G 17 | rs112571135 24 0 13491583 T C 18 | -------------------------------------------------------------------------------- /src/test/test12.geno: -------------------------------------------------------------------------------- 1 | 012900101290 2 | 001290002190 3 | 900122002290 4 | 290011002190 5 | 129001011191 6 | 000002212192 7 | 111119219991 8 | 222220210092 9 | 999990920921 10 | 921001190992 11 | 092102292992 12 | 009211210991 13 | 100921120299 14 | 210091110190 15 | 000000210299 16 | 211019920122 17 | 222221299999 18 | -------------------------------------------------------------------------------- /src/test/test12.ind: -------------------------------------------------------------------------------- 1 | A M AG 2 | B F BG 3 | C U zz 4 | D M zz 5 | E F zz 6 | F F qq 7 | G F QQ 8 | H M zz 9 | I U BG 10 | J U LM 11 | K M ex 12 | L F LL 13 | -------------------------------------------------------------------------------- /src/test/test123.geno: -------------------------------------------------------------------------------- 1 | 0129001012900000 2 | 0012900021900129 3 | 9001220022909210 4 | 2900110021900291 5 | 1290010111911100 6 | 0000022121920101 7 | 1111192199919999 8 | 2222202100922121 9 | 9999909209211119 10 | 9210011909929002 11 | 0921022929922012 12 | 0092112109912011 13 | 1009211202992009 14 | 2100911101901111 15 | 0000002102999911 16 | 2110199201222012 17 | 2222212999999001 18 | -------------------------------------------------------------------------------- /src/test/test123.ind: -------------------------------------------------------------------------------- 1 | A M AG 2 | B F BG 3 | C U zz 4 | D M zz 5 | E F zz 6 | F F qq 7 | G F QQ 8 | H M zz 9 | I U BG 10 | J U LM 11 | K M ex 12 | L F LL 13 | M F OP 14 | N M TG 15 | O U EM 16 | P F MA 17 | -------------------------------------------------------------------------------- /src/test/test123.snp: -------------------------------------------------------------------------------- 1 | 1_9484678 1 0.185784 9484678 A T 2 | rs376694687 1 2.371394 227538701 A C 3 | rs13403822 2 1.811318 172903124 T C 4 | rs16834024 3 1.344171 122872153 G A 5 | rs72674783 4 1.299763 125143064 A C 6 | rs58422659 5 1.394293 136197950 C A 7 | 6_134035567 6 1.366794 134035567 T G 8 | rs12534938 7 1.809543 155765548 C T 9 | rs10961597 9 0.321297 14584627 G T 10 | rs77431051 10 1.123419 89473434 C T 11 | rs2159354 12 0.120725 4824503 G C 12 | rs148724525 13 0.841040 91169638 T C 13 | rs72758108 15 1.121528 95408319 G A 14 | rs114826028 18 0.413575 19641425 G A 15 | rs59820829 21 0.252528 28173544 C T 16 | rs143560832 23 0.859405 78419160 A G 17 | rs112571135 24 0 13491583 T C 18 | -------------------------------------------------------------------------------- /src/test/test13.geno: -------------------------------------------------------------------------------- 1 | 012900000 2 | 001290129 3 | 900129210 4 | 290010291 5 | 129001100 6 | 000000101 7 | 111119999 8 | 222222121 9 | 999991119 10 | 921009002 11 | 092102012 12 | 009212011 13 | 100922009 14 | 210091111 15 | 000009911 16 | 211012012 17 | 222229001 18 | -------------------------------------------------------------------------------- /src/test/test13.ind: -------------------------------------------------------------------------------- 1 | A M AG 2 | B F BG 3 | C U zz 4 | D M zz 5 | E F zz 6 | M F OP 7 | N M TG 8 | O U EM 9 | P F MA 10 | -------------------------------------------------------------------------------- /src/test/test2.geno: -------------------------------------------------------------------------------- 1 | 0101290 2 | 0002190 3 | 2002290 4 | 1002190 5 | 1011191 6 | 2212192 7 | 9219991 8 | 0210092 9 | 0920921 10 | 1190992 11 | 2292992 12 | 1210991 13 | 1120299 14 | 1110190 15 | 0210299 16 | 9920122 17 | 1299999 18 | -------------------------------------------------------------------------------- /src/test/test2.ind: -------------------------------------------------------------------------------- 1 | F F qq 2 | G F QQ 3 | H M zz 4 | I U BG 5 | J U LM 6 | K M ex 7 | L F LL 8 | -------------------------------------------------------------------------------- /src/test/test2.snp: -------------------------------------------------------------------------------- 1 | 1_9484678 1 0.185784 9484678 A T 2 | rs376694687 1 2.371394 227538701 A C 3 | rs13403822 2 1.811318 172903124 T C 4 | rs16834024 3 1.344171 122872153 G A 5 | rs72674783 4 1.299763 125143064 A C 6 | rs58422659 5 1.394293 136197950 C A 7 | 6_134035567 6 1.366794 134035567 T G 8 | rs12534938 7 1.809543 155765548 C T 9 | rs10961597 9 0.321297 14584627 G T 10 | rs77431051 10 1.123419 89473434 C T 11 | rs2159354 12 0.120725 4824503 G C 12 | rs148724525 13 0.841040 91169638 T C 13 | rs72758108 15 1.121528 95408319 G A 14 | rs114826028 18 0.413575 19641425 G A 15 | rs59820829 21 0.252528 28173544 C T 16 | rs143560832 23 0.859405 78419160 A G 17 | rs112571135 24 0 13491583 T C 18 | -------------------------------------------------------------------------------- /src/test/test3.geno: -------------------------------------------------------------------------------- 1 | 0000 2 | 0129 3 | 9210 4 | 0291 5 | 1100 6 | 0101 7 | 9999 8 | 2121 9 | 1119 10 | 9002 11 | 2012 12 | 2011 13 | 2009 14 | 1111 15 | 9911 16 | 2012 17 | 9001 18 | -------------------------------------------------------------------------------- /src/test/test3.ind: -------------------------------------------------------------------------------- 1 | M F OP 2 | N M TG 3 | O U EM 4 | P F MA 5 | -------------------------------------------------------------------------------- /src/test/test3.snp: -------------------------------------------------------------------------------- 1 | 1_9484678 1 0.185784 9484678 A T 2 | rs376694687 1 2.371394 227538701 A C 3 | rs13403822 2 1.811318 172903124 T C 4 | rs16834024 3 1.344171 122872153 G A 5 | rs72674783 4 1.299763 125143064 A C 6 | rs58422659 5 1.394293 136197950 C A 7 | 6_134035567 6 1.366794 134035567 T G 8 | rs12534938 7 1.809543 155765548 C T 9 | rs10961597 9 0.321297 14584627 G T 10 | rs77431051 10 1.123419 89473434 C T 11 | rs2159354 12 0.120725 4824503 G C 12 | rs148724525 13 0.841040 91169638 T C 13 | rs72758108 15 1.121528 95408319 G A 14 | rs114826028 18 0.413575 19641425 G A 15 | rs59820829 21 0.252528 28173544 C T 16 | rs143560832 23 0.859405 78419160 A G 17 | rs112571135 24 0 13491583 T C 18 | -------------------------------------------------------------------------------- /src/transpose.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "geno.h" 5 | 6 | static const char usage_documentation[] = "Transpose a genotype between a packed ancestry map format and a transpose packed format or vice versa."; 7 | 8 | static struct argp_option options[] = { 9 | {"low-mem", 'm', 0, 0, "Do not allocate memory for the full genotype file. This will substantially reduce memory requirements but will be much slower." }, 10 | { 0 } 11 | }; 12 | 13 | static char args_doc[] = "input_genotype_file output_genotype_file"; 14 | 15 | struct arguments 16 | { 17 | char *input_file; 18 | char *output_file; 19 | int low_mem; 20 | }; 21 | 22 | static error_t 23 | parse_opt (int key, char *arg, struct argp_state *state) 24 | { 25 | struct arguments *arguments = state->input; 26 | 27 | switch (key){ 28 | case 'm': 29 | arguments->low_mem = 1; 30 | break; 31 | 32 | case ARGP_KEY_ARG: 33 | if (state->arg_num >= 2) // too many arguments 34 | argp_usage (state); 35 | else if (state->arg_num == 0) 36 | arguments->input_file = arg; 37 | else if (state->arg_num == 1) 38 | arguments->output_file = arg; 39 | break; 40 | 41 | case ARGP_KEY_END: 42 | if(state->arg_num < 2) 43 | argp_usage(state); 44 | break; 45 | 46 | default: 47 | return ARGP_ERR_UNKNOWN; 48 | } 49 | return 0; 50 | } 51 | 52 | /* Our argp parser. */ 53 | static struct argp argp = { options, parse_opt, args_doc, usage_documentation }; 54 | 55 | int main(int argc, char **argv){ 56 | struct arguments arguments; 57 | 58 | // default 59 | arguments.low_mem = 0; 60 | 61 | argp_parse (&argp, argc, argv, 0, 0, &arguments); 62 | 63 | genotype_matrix geno; 64 | 65 | printf("reading %s\n", arguments.input_file); 66 | genotype_matrix_initialize(&geno); 67 | int result; 68 | if (arguments.low_mem){ 69 | result = genotype_matrix_read_file(&geno, arguments.input_file, 0, NULL, NULL); 70 | } else{ 71 | result = genotype_matrix_read_file_full(&geno, arguments.input_file, 0, NULL, NULL); 72 | } 73 | if (result < 0){ 74 | printf("Error %d", result); 75 | return result; 76 | } 77 | 78 | genotype_matrix_write_packed_file(&geno, arguments.output_file, !geno.transpose); 79 | 80 | genotype_matrix_destructor(&geno); 81 | return 0; 82 | } 83 | -------------------------------------------------------------------------------- /src/wtjack.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use Getopt::Std ; 3 | 4 | getopts('i:m:b:',\%opts) ; 5 | 6 | $BIN ="/groups/reich/pm82/dev/ADMIXTOOLS/bin"; 7 | 8 | # filename of rolloff output file, in parfile output: 9 | if (defined $opts{"i"}) { 10 | $pop = $opts{"i"} ; 11 | } 12 | else { 13 | die "input parameter compulsory\n"; 14 | } 15 | 16 | # SNP file 17 | if (defined $opts{"m"}) { 18 | $map = $opts{"m"} ; 19 | } 20 | else { 21 | die "m parameter compulsory- Enter map file name\n" ; 22 | } 23 | 24 | if (defined $opts{"b"}) { 25 | $bin = $opts{"b"} ; 26 | } 27 | else { 28 | die "path to bin directory required\n"; 29 | } 30 | 31 | 32 | open (IN, "$map") or die "COF mapfile\n"; 33 | @count = (); 34 | $count_total =0; 35 | 36 | #count the number of SNPs on each chromosome. This is used as weights for the weighted jackknife calculation 37 | while (my $line = ) 38 | { 39 | chomp $line; 40 | @data =split(" ",$line); 41 | for (my $i = 1; $i <= 22; $i++) 42 | { 43 | if ($data[1] == $i) 44 | { 45 | $count[$i]++; 46 | $count_total++; 47 | } 48 | } 49 | } 50 | 51 | # open expfit output 52 | $log = "expfit_${pop}.log"; 53 | open (LOG, $log) or die "cannot open $log\n"; 54 | @log = ; 55 | 56 | $flog = "expfit_${pop}.flog"; 57 | open (FLOG, $flog) or die "cannot open $flog\n"; 58 | 59 | # open output file 60 | open (JIN, ">${pop}.jin") or die "COF outfile\n"; 61 | 62 | $chr = 1; 63 | for (my $i = 0; $i < scalar(@log); $i++) 64 | { 65 | $line = $log[$i]; 66 | my $string = "${pop}:${chr}"; 67 | unless ($line =~ /$string/) {next;} 68 | $start[$chr] = $i; 69 | $chr++; 70 | } 71 | push(@start, scalar(@log)); 72 | 73 | printf JIN "%3s %12s %12s\n", CHR, SNPs, Estimated_date; 74 | for my $chr(1..22) 75 | { 76 | for (my $i = $start[$chr]; $i < $start[$chr+1]; $i++) 77 | { 78 | my $line = $log[$i]; 79 | if ($line =~ /mean/) { 80 | chomp $line; 81 | @array = split(/[\t ]+/,$line); 82 | $this = $array[2]; 83 | printf JIN "%3d %12.6f %12.6f\n", $chr, $count[$chr], $this; 84 | 85 | } 86 | } 87 | } 88 | 89 | # Find mean for the full run 90 | while (my $line = ) 91 | { 92 | if ($line =~ /mean/) { 93 | chomp $line; 94 | @array = split(/[\t ]+/,$line); 95 | $mean = $array[2]; 96 | } 97 | } 98 | #print "mean: $mean\n"; 99 | # Run jackknife 100 | $jin = $pop.".jin"; 101 | $jout = $pop.".jout"; 102 | print "\nJackknife summary: ${pop}.jin\n"; 103 | system "$bin/dowtjack -i $jin -m $mean -o $jout"; 104 | $ans = `tail $jout` ; 105 | ($jmean, $serr) = split " ", $ans ; 106 | 107 | printf "Jackknife mean: %9.3f\n", $jmean ; 108 | printf "Jackknife std. err: %9.3f", $serr ; 109 | print "\n" ; 110 | -------------------------------------------------------------------------------- /src/xpsubs.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | #include 5 | #include 6 | #include "admutils.h" 7 | 8 | extern int verbose ; 9 | 10 | double xpest(double **gg, int *gobs, int *na, int *nb, 11 | int neq, double *ppa, double *ppb) ; 12 | 13 | void mk2from3ml(double *xd, double *xc, double p, double pp) ; 14 | void mk2from2(double *xd, double *xc ) ; 15 | 16 | void loadpprob(double *pprob, double pa, double pb) ; 17 | void gen3(double *ww, double a, double b) ; 18 | 19 | double xpest2like(double **gg, int *gobs, int *na, int *nb, 20 | int *iscasearr, 21 | int neq, double ppa, double ppb, double risk) ; 22 | 23 | 24 | // clean up SANS when finalized 25 | 26 | typedef struct { 27 | SNP *cupt ; 28 | int numsamps ; 29 | double admbayessc ; 30 | double *baymodelsc ; 31 | double admfsc ; 32 | double admzscore ; 33 | double admbsc ; 34 | double admfccsc ; 35 | double simpsc[2] ; 36 | double admyl[3] ; 37 | double lrmax ; 38 | double lrsig ; 39 | double maxlod ; 40 | /* now start of fine-mapping scores */ 41 | double gscore ; 42 | double gcheck ; 43 | double gbayes ; 44 | } SANS ; 45 | 46 | #ifdef __cplusplus 47 | } 48 | #endif 49 | -------------------------------------------------------------------------------- /transpose_info/fxtdoc/README: -------------------------------------------------------------------------------- 1 | fxtract -p par2 ## extracts samples by population label 2 | fxtract -p par_fxtract #@ extracts samples by individual name 3 | -------------------------------------------------------------------------------- /transpose_info/fxtdoc/fxtract.info: -------------------------------------------------------------------------------- 1 | fxtract See ~np29/fxtdoc for example 2 | fxtract -p [-V] 3 | 4 | ### sample parfile 5 | ### ~np29//pullsource/tgsrc/testconvertf/fxtdir 6 | DD: /n/groups/reich/DAVID_RELEASE/V58/V58.0 7 | instem: DD/v58_1240k_all 8 | outstem: testz2 9 | poplistname: chb 10 | ##tempdir: junkdir 11 | 12 | OR 13 | 14 | instem: cdx 15 | outstem: cdfxp 16 | ##poplistname: pjunk 17 | indivlistname: p1list 18 | ## list of individual IDs 1 per line 19 | tempdir: junkd 20 | 21 | ### 22 | A wrapper for Matt Mah's fast extract scripts. 23 | fxtract does a very simple job -- extract samples from a 24 | TGENO dataset to make a new TGENO dataset with fewer samples 25 | 26 | 27 | If you run fxtract then ~np29/o2bin should be in your path. 28 | 29 | 30 | ######################################################### 31 | This exact parfile ws above will also work for convertf 32 | *** new convertf feature *** 33 | --- instem and outstem -- stem for .ind .snp .geno files 34 | fxtract is a bare bones script -- a driver for Matt Mah's fast extract on tgeno files 35 | 36 | If you want fancy features of convertf or to make plink files first run fxtract and 37 | then convertf on the output. 38 | 39 | tempdir: usage is a little complicated but intended to be flexible and convenient 40 | If you do nothing, then w scratch working directory is used in /tmp 41 | This is not ideal as on O2 /tmp is sometimes full. 42 | If you set an environment variable $STMP then a scratch directory 43 | is used under that (and then deleted) 44 | I have STMP=/n/scratch/users/n/np29 45 | Alternatively a working directory can be specified explicitly by tempdir: 46 | 47 | Nick 3/13/24 48 | 49 | 50 | -------------------------------------------------------------------------------- /transpose_info/fxtdoc/par2: -------------------------------------------------------------------------------- 1 | DD: ../../convertf 2 | instem: DD/example 3 | outstem: exout3 4 | poplistname: control 5 | 6 | 7 | -------------------------------------------------------------------------------- /transpose_info/fxtdoc/par_fxtract: -------------------------------------------------------------------------------- 1 | DD: ../../convertf 2 | instem: DD/example 3 | outstem: exout 4 | indivlistname: tlist 5 | 6 | 7 | -------------------------------------------------------------------------------- /transpose_info/fxtdoc/tlist: -------------------------------------------------------------------------------- 1 | SAMPLE1 2 | SAMPLE3 3 | --------------------------------------------------------------------------------