├── .DS_Store ├── .Rbuildignore ├── .gitignore ├── DESCRIPTION ├── LICENSE.txt ├── NAMESPACE ├── R ├── beeswarm.plot.R ├── coalescent.sim.R ├── coalescent.tree.sim.R ├── data.R ├── fitch.R ├── fwd.coalescent.sim.R ├── fwd.phen.sim.R ├── fwd.plot.prob.phen.R ├── fwd.snp.sim.R ├── get.sig.snps.R ├── heatmap.DNAbin.R ├── pair.tests.R ├── phen.sim.R ├── plot.phen.R ├── plot.sig.snps.R ├── readCFML.R ├── reconstruct.R ├── simTest.R ├── simultaneous.test.R ├── simultaneous.test.epi.R ├── snp.sim.Q.R ├── snp.sim.Q_old.R ├── snp.sim.R ├── subsequent.test.R ├── terminal.test.R ├── terminal.test.epi.R ├── tree.reconstruct.R ├── treeWAS.R └── utils.R ├── README.md ├── data ├── dist_0.01.rda ├── dist_0.05.rda ├── dist_0.1.rda ├── dist_0.2.rda ├── dist_0.25.rda ├── dist_0.rda ├── phen.cont.rank.rda ├── phen.cont.rda ├── phen.plot.col.rda ├── phen.rda ├── phen.reconstruction.rda ├── snps.assoc.rda ├── snps.rda ├── snps.reconstruction.rda ├── tree.rda └── treeWAS.example.out.rda ├── inst └── CITATION ├── man ├── asr.Rd ├── assoc.test.Rd ├── beeswarmPlot.Rd ├── coalescent.sim.Rd ├── coalescent.tree.sim.Rd ├── dist_0.01.Rd ├── dist_0.05.Rd ├── dist_0.1.Rd ├── dist_0.2.Rd ├── dist_0.25.Rd ├── dist_0.Rd ├── fwd.coalescent.sim.Rd ├── fwd.phen.sim.Rd ├── fwd.snp.sim.Rd ├── get.ancestral.pars.Rd ├── get.assoc.scores.Rd ├── get.binary.snps.Rd ├── get.fitch.n.mts.Rd ├── get.original.loci.Rd ├── get.score3.Rd ├── get.sig.snps.Rd ├── get.tip.order.Rd ├── get.unique.matrix.Rd ├── ggplotbg.Rd ├── heatmap.DNAbin.Rd ├── keepFirstN.Rd ├── keepLastN.Rd ├── manhattan.plot.Rd ├── memfree.Rd ├── pair.tests.Rd ├── phen.Rd ├── phen.cont.Rd ├── phen.cont.rank.Rd ├── phen.plot.col.Rd ├── phen.reconstruction.Rd ├── phen.sim.Rd ├── plot_phen.Rd ├── plot_prob_phen.Rd ├── plot_sig_snps.Rd ├── print.treeWAS.Rd ├── read.CFML.Rd ├── removeFirstN.Rd ├── removeLastN.Rd ├── selectBiallelicSNP.Rd ├── set.args.Rd ├── simTest.Rd ├── simultaneous.test.Rd ├── simultaneous.test.epi.Rd ├── snp.sim.Q.Rd ├── snp.sim.Rd ├── snps.Rd ├── snps.assoc.Rd ├── snps.reconstruction.Rd ├── subsequent.test.Rd ├── table.matrix.Rd ├── terminal.test.Rd ├── terminal.test.epi.Rd ├── tree.Rd ├── tree.reconstruct.Rd ├── treeWAS.Rd ├── treeWAS.example.out.Rd └── write.treeWAS.Rd ├── treeWAS.Rproj └── vignettes ├── .DS_Store ├── figs ├── .DS_Store ├── Eqn_Legend_genotype.JPG ├── Eqn_Legend_genotype.pdf ├── Eqn_Legend_genotype.png ├── plot_hist_phen.pdf ├── plot_hist_phen.png ├── plot_hist_phen_rank.pdf ├── plot_hist_phen_rank.png ├── plot_hist_simultaneous.pdf ├── plot_hist_simultaneous.png ├── plot_hist_subsequent.pdf ├── plot_hist_subsequent.png ├── plot_hist_terminal.pdf ├── plot_hist_terminal.png ├── plot_manhattan_simultaneous.pdf ├── plot_manhattan_simultaneous.png ├── plot_manhattan_subsequent.pdf ├── plot_manhattan_subsequent.png ├── plot_manhattan_terminal.pdf ├── plot_manhattan_terminal.png ├── plot_tree.pdf ├── plot_tree.png ├── plot_tree_parsimony.pdf ├── plot_tree_parsimony.png ├── tree_phen_eg.pdf └── tree_phen_eg.png ├── old ├── README_10_07_2017.md ├── ace.tree.cont.IC.pdf ├── ace.tree.cont.pdf ├── ace_example.R ├── ace_example_phen_R_0.Rdata ├── figsunnamed-chunk-12-1.pdf ├── figsunnamed-chunk-13-1.pdf ├── figsunnamed-chunk-14-1.pdf ├── figsunnamed-chunk-15-1.pdf ├── figsunnamed-chunk-16-1.pdf ├── figsunnamed-chunk-17-1.pdf ├── figsunnamed-chunk-7-1.pdf ├── pagel_example.R ├── phen_cont_skewed.Rdata ├── phen_cont_skewed_rank.Rdata ├── score3_output_example.R ├── treeWAS Vignette.pdf ├── treeWAS_example.R └── treeWAS_vignette_files │ └── figure-markdown_strict │ ├── unnamed-chunk-10-1.png │ ├── unnamed-chunk-12-1.png │ ├── unnamed-chunk-19-1.png │ ├── unnamed-chunk-20-1.png │ ├── unnamed-chunk-21-1.png │ ├── unnamed-chunk-22-1.png │ ├── unnamed-chunk-23-1.png │ ├── unnamed-chunk-24-1.png │ ├── unnamed-chunk-5-1.png │ └── unnamed-chunk-8-1.png ├── treeWAS Vignette_files └── MathJax.js ├── treeWAS_vignette.Rmd └── treeWAS_vignette.html /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/.DS_Store -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^Meta$ 2 | ^doc$ 3 | ^.*\.Rproj$ 4 | ^\.Rproj\.user$ 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Meta 2 | doc 3 | .Rproj.user 4 | .Rhistory 5 | .RData 6 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: treeWAS 2 | Title: Phylogenetic tree-based microbial GWAS 3 | Version: 1.0 4 | Authors@R: c(person("Caitlin", "Collins", email="caitiecollins@gmail.com", role = c("aut", "cre")),person("Xavier", "Didelot",email = "xavier.didelot@gmail.com",role = c("aut"))) 5 | Maintainer: Caitlin Collins 6 | Description: Perform microbial GWAS using phylogenetic trees to correct for population structure and recombination. 7 | Year: 2018 8 | License: GPL (>=2) 9 | LazyData: true 10 | RoxygenNote: 7.2.3 11 | Depends: R (>= 3.0.0), 12 | adegenet, 13 | ape 14 | Imports: ade4, 15 | beeswarm, 16 | ggplot2, 17 | graphics, 18 | grid, 19 | Hmisc, 20 | knitr, 21 | phangorn, 22 | phytools, 23 | png, 24 | pryr, 25 | rmarkdown, 26 | scales, 27 | stats 28 | VignetteBuilder: knitr 29 | Encoding: UTF-8 30 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | treeWAS: A Phylogenetic Tree-Based Tool for Genome-Wide Association Studies in Microbes 2 | Copyright (C) 2017 Caitlin Collins 3 | 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(print,treeWAS) 4 | export(.getFixed) 5 | export(.is.even) 6 | export(.is.integer0) 7 | export(.is.odd) 8 | export(.substrLeft) 9 | export(.substrRight) 10 | export(.switch.phen) 11 | export(asr) 12 | export(assoc.test) 13 | export(beeswarmPlot) 14 | export(coalescent.sim) 15 | export(coalescent.tree.sim) 16 | export(fwd.coalescent.sim) 17 | export(fwd.phen.sim) 18 | export(fwd.snp.sim) 19 | export(get.ancestral.pars) 20 | export(get.assoc.scores) 21 | export(get.binary.snps) 22 | export(get.fitch.n.mts) 23 | export(get.original.loci) 24 | export(get.score3) 25 | export(get.sig.snps) 26 | export(get.tip.order) 27 | export(get.unique.matrix) 28 | export(ggplotbg) 29 | export(heatmap.DNAbin) 30 | export(keepFirstN) 31 | export(keepLastN) 32 | export(manhattan.plot) 33 | export(memfree) 34 | export(pair.tests) 35 | export(phen.sim) 36 | export(plot_phen) 37 | export(plot_prob_phen) 38 | export(plot_sig_snps) 39 | export(read.CFML) 40 | export(removeFirstN) 41 | export(removeLastN) 42 | export(selectBiallelicSNP) 43 | export(set.args) 44 | export(simTest) 45 | export(simultaneous.test) 46 | export(simultaneous.test.epi) 47 | export(snp.sim) 48 | export(snp.sim.Q) 49 | export(subsequent.test) 50 | export(table.matrix) 51 | export(terminal.test) 52 | export(terminal.test.epi) 53 | export(tree.reconstruct) 54 | export(treeWAS) 55 | export(write.treeWAS) 56 | import(adegenet) 57 | import(ape, except = zoom) 58 | import(ggplot2) 59 | importFrom(Hmisc,all.is.numeric) 60 | importFrom(ade4,dudi.pca) 61 | importFrom(adegenet,transp) 62 | importFrom(ape,read.dna) 63 | importFrom(beeswarm,beeswarm) 64 | importFrom(grDevices,col2rgb) 65 | importFrom(grDevices,dev.off) 66 | importFrom(grDevices,heat.colors) 67 | importFrom(grDevices,pdf) 68 | importFrom(grDevices,rgb) 69 | importFrom(graphics,arrows) 70 | importFrom(graphics,axis) 71 | importFrom(graphics,barplot) 72 | importFrom(graphics,box) 73 | importFrom(graphics,hist) 74 | importFrom(graphics,image) 75 | importFrom(graphics,lines) 76 | importFrom(graphics,mtext) 77 | importFrom(graphics,par) 78 | importFrom(graphics,plot.new) 79 | importFrom(graphics,points) 80 | importFrom(graphics,rect) 81 | importFrom(graphics,text) 82 | importFrom(graphics,title) 83 | importFrom(phangorn,acctran) 84 | importFrom(phangorn,ancestral.pml) 85 | importFrom(phangorn,as.phyDat) 86 | importFrom(phangorn,fitch) 87 | importFrom(phangorn,midpoint) 88 | importFrom(phangorn,pace) 89 | importFrom(phangorn,phyDat) 90 | importFrom(phangorn,pml) 91 | importFrom(phangorn,pratchet) 92 | importFrom(phytools,anc.ML) 93 | importFrom(phytools,fastAnc) 94 | importFrom(pryr,mem_used) 95 | importFrom(pryr,object_size) 96 | importFrom(scales,rescale) 97 | importFrom(stats,anova) 98 | importFrom(stats,as.formula) 99 | importFrom(stats,chisq.test) 100 | importFrom(stats,cor) 101 | importFrom(stats,density) 102 | importFrom(stats,dist) 103 | importFrom(stats,ecdf) 104 | importFrom(stats,fisher.test) 105 | importFrom(stats,ftable) 106 | importFrom(stats,glm) 107 | importFrom(stats,lm) 108 | importFrom(stats,mantelhaen.test) 109 | importFrom(stats,p.adjust) 110 | importFrom(stats,quantile) 111 | importFrom(stats,residuals) 112 | importFrom(stats,rexp) 113 | importFrom(stats,rnorm) 114 | importFrom(stats,rpois) 115 | importFrom(utils,combn) 116 | importFrom(utils,str) 117 | importFrom(utils,write.csv) 118 | importFrom(utils,write.table) 119 | -------------------------------------------------------------------------------- /R/coalescent.tree.sim.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | ######################### 4 | ## coalescent.tree.sim ## 5 | ######################### 6 | 7 | 8 | ######################################################################## 9 | 10 | ################### 11 | ## DOCUMENTATION ## 12 | ################### 13 | 14 | #' Short one-phrase description. 15 | #' 16 | #' Longer proper discription of function... 17 | #' 18 | #' @param n.ind An integer specifying the number of terminal nodes desired. 19 | #' @param seed An optional integer controlling the pseudo-random process underlying the tree generation. 20 | #' 21 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com} 22 | #' @examples 23 | #' 24 | #' ## basic use of fn 25 | #' tree <- coalescent.tree.sim(n.ind = 100, seed = 1) 26 | #' 27 | #' ## plot output 28 | #' plot(tree) 29 | #' 30 | #' @rawNamespace import(ape, except = zoom) 31 | #' @importFrom phangorn midpoint 32 | #' 33 | #' @export 34 | 35 | ######################################################################## 36 | # @useDynLib phangorn, .registration = TRUE 37 | 38 | 39 | 40 | coalescent.tree.sim <- function(n.ind=100, seed=NULL){ 41 | 42 | if(!is.null(seed)) set.seed(seed) 43 | 44 | n.nodes <- n.ind + (n.ind-1) # total n.nodes (internal, external) 45 | inds <- c(1:n.ind) # terminal nodes 46 | nodes <- rev(c((n.ind+1):n.nodes)) # internal nodes 47 | tree.params <- list() # to store and update output 48 | 49 | 50 | for(i in 1:(length(inds)-1)){ 51 | ## get inds.ori from last generation: 52 | if(i==1){ 53 | inds.ori <- inds 54 | }else{ 55 | inds.ori <- tree.params[[(i-1)]][["inds.remaining"]] 56 | } 57 | ## get N, the number of individuals (remaining at this generation) 58 | ## from which a random 2 are to be selected for coalescence 59 | N <- length(inds.ori) 60 | 61 | ################### 62 | ## BRANCH LENGTH ## 63 | ################### 64 | ## get lamda, the parameter of the exponential distribution, 65 | ## given the number of individuals at this generation 66 | lambda <- (N*(N-1)) / 2 67 | ## draw x, the length of time to coalescence at this generation 68 | x <- rexp(n=1, rate=lambda) 69 | 70 | ##################### 71 | ## COALESCENT PAIR ## 72 | ##################### 73 | ## get co.pair, the 2 inds to coalesce at this generation 74 | co.pair <- sample(inds.ori, 2) 75 | ## merge these 2 inds, replace with new internal node, 76 | ## update the list of inds to sample at the next generation 77 | inds.remaining <- c(inds.ori[-which(inds.ori %in% co.pair)], nodes[i]) 78 | 79 | ############ 80 | ## OUTPUT ## 81 | ############ 82 | ## store the output in the ith element of our list tree.params: 83 | tree.params[[i]] <- list() 84 | tree.params[[i]][[1]] <- x 85 | tree.params[[i]][[2]] <- co.pair 86 | tree.params[[i]][[3]] <- inds.ori 87 | tree.params[[i]][[4]] <- inds.remaining 88 | names(tree.params[[i]]) <- c("Time", "co.pair", "inds.ori", "inds.remaining") 89 | } # end for loop 90 | 91 | 92 | 93 | 94 | ## get edge.list 95 | to <- as.vector(unlist(sapply(c(1:length(tree.params)), 96 | function(e) tree.params[[e]][["co.pair"]]))) 97 | from <- as.vector(unlist(sapply(c(1:length(nodes)), 98 | function(e) rep(nodes[e], 2)))) 99 | edge.list <- data.frame(from,to) 100 | 101 | ## get edge lengths 102 | times <- as.vector(unlist(sapply(c(1:length(tree.params)), 103 | function(e) tree.params[[e]][["Time"]]))) 104 | 105 | ## make empty edge.lengths vector to store output below: 106 | edge.lengths <- NA 107 | 108 | ## for all the edges in our edge.list data.frame: 109 | for(i in 1:nrow(edge.list)){ 110 | if(edge.list$to[i] %in% inds){ 111 | ## if downstream node = terminal, sum all time intervals til ancestor. 112 | edge.lengths[i] <- sum(times[1:which(nodes==edge.list$from[i])]) 113 | }else{ 114 | ## BUT, if the downstream node = internal, must subtract time btw. 115 | ## downstream node and final generation. 116 | length.total <- sum(times[1:which(nodes==edge.list$from[i])]) 117 | length.toRemove <- sum(times[1:which(nodes==edge.list$to[i])]) 118 | edge.lengths[i] <- length.total - length.toRemove 119 | } 120 | } # end for loop 121 | 122 | ## convert edge.list to matrix 123 | edge.list <- as.matrix(edge.list) 124 | colnames(edge.list) <- NULL 125 | dimnames(edge.list) <- NULL 126 | 127 | ## put output into tree list (phylo format): 128 | tree <- list() 129 | tree$edge <- edge.list 130 | tree$tip.label <- c(1:n.ind) 131 | tree$edge.length <- edge.lengths 132 | tree$Nnode <- as.integer(n.ind - 1) 133 | 134 | ## change class by force 135 | class(tree) <- "phylo" 136 | ## return tree in pruningwise order: 137 | tree <- reorder.phylo(tree, order="pruningwise") 138 | ## root tree: 139 | if(!is.rooted(tree)) tree <- midpoint(tree) 140 | 141 | return(tree) 142 | 143 | } # end coalescent.tree.sim 144 | -------------------------------------------------------------------------------- /R/fitch.R: -------------------------------------------------------------------------------- 1 | 2 | ##################### 3 | ## get.fitch.n.mts ## 4 | ##################### 5 | ## phangorn-based fitch fn 6 | 7 | ######################################################################## 8 | 9 | ################### 10 | ## DOCUMENTATION ## 11 | ################### 12 | 13 | #' Caclulate parsimony scores. 14 | #' 15 | #' Determine parsimony scores for all genetic loci, or a phenotypic variable, along a given tree. 16 | #' An extension of the fitch function available in package phangorn. 17 | #' 18 | #' @param x A numeric matrix or vector containing two unique values with row.names matching tree tip.labels. 19 | #' @param tree A phylo object. 20 | #' 21 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com} 22 | #' 23 | #' @examples 24 | #' \dontrun{ 25 | #' 26 | #' ## generate a tree 27 | #' tree <- ape::rtree(100) 28 | #' ## generate snps, a matrix of 0s and 1s 29 | #' snps <- matrix(sample(c(0,1),100000,TRUE), nrow=100) 30 | #' row.names(snps) <- tree$tip.label 31 | #' 32 | #' ## run function 33 | #' out <- get.fitch.n.mts(x=snps, tree) 34 | #' 35 | #' ## examine output 36 | #' str(out) 37 | #' table(out) 38 | #' hist(out) 39 | #' } 40 | #' 41 | #' @importFrom phangorn fitch 42 | #' @importFrom phangorn as.phyDat 43 | #' 44 | #' @export 45 | 46 | ######################################################################## 47 | # @useDynLib phangorn, .registration = TRUE 48 | 49 | 50 | get.fitch.n.mts <- function(x, tree, snps=NULL){ 51 | 52 | ## load packages 53 | # require(phangorn) 54 | 55 | ## Re-coding snps as x (to allow for phen/vectors). 56 | ## --> snps now deprecated: 57 | X <- NULL 58 | if(!missing(x)){ 59 | X <- x 60 | if(!is.null(snps) & !is.null(x)){ 61 | warning("As 'x' is specified, we ignore the 'snps' argument. \n 62 | (In get.fitch.n.mts the 'snps' argument has now been replaced by an argument named 'x'.)") 63 | } 64 | }else{ 65 | if(!is.null(snps)){ 66 | X <- snps 67 | } 68 | } 69 | ## If ONE of x or snps was specified, continue; else, stop: 70 | if(!is.null(X)){ 71 | x <- X 72 | }else{ 73 | stop("'x' must be specified.") 74 | } 75 | 76 | ## checks 77 | ## do not include NA as a level: 78 | levs <- unique(as.vector(x[!is.na(x)])) 79 | if((!is.numeric(x) & !is.logical(x)) | length(levs[!is.na(levs)])!=2){ 80 | stop("x must be a numeric matrix or vector, with two unique values, excluding NAs 81 | (though we recommend that NAs be in the minority for each column).\n") 82 | } 83 | # levs <- unique(as.vector(x)) 84 | if(any(is.na(levs))){ 85 | if(is.matrix(x)){ 86 | nnas <- sapply(c(1:ncol(x)), function(e) length(which(is.na(x[,e])))/nrow(x)) 87 | toRemove <- which(nnas > 0.5) 88 | if(length(toRemove) > 0){ 89 | cat(length(toRemove), "snps columns are over 50% NAs. 90 | You may want to remove these columns as they are unlikely to be significant 91 | and can generate inappropriate inferences during ancestral state reconstruction.\n") 92 | } 93 | }else{ 94 | nnas <- length(which(is.na(x)))/length(x) 95 | # toRemove <- which(nnas > 0.5) 96 | if(nnas > 0.5){ 97 | cat("x is over 50% NAs. 98 | This may generate inappropriate inferences during ancestral state reconstruction.\n") 99 | } 100 | } 101 | } 102 | 103 | x.levels <- sort(levs, na.last = TRUE) 104 | ## returns only unique patterns... 105 | ## *use levels=states (eg. c(0,1)), but keep NAs in x and use ambiguity=NA 106 | ## to allow NAs without counting them twd parsimony score values. 107 | x.phyDat <- phangorn::as.phyDat(as.matrix(x), 108 | type="USER", levels=x.levels, ambiguity=NA) 109 | ## get index of all original x columns to map to unique pattern 110 | index <- attr(x.phyDat, "index") 111 | 112 | ## get parsimony score for all unique patterns in x 113 | ## NB: For phangorn::fitch, x data must be of class phyDat 114 | fitch.unique <- phangorn::fitch(tree, x.phyDat, site="site") 115 | # table(fitch.unique) 116 | 117 | ## get score for all original sites 118 | fitch.complete <- fitch.unique[index] 119 | return(fitch.complete) 120 | } # end get.fitch.n.mts 121 | 122 | 123 | -------------------------------------------------------------------------------- /R/fwd.coalescent.sim.R: -------------------------------------------------------------------------------- 1 | 2 | ######################## 3 | ## fwd.coalescent.sim ## 4 | ######################## 5 | 6 | ## a function for simulating trees under a fully-linked coalescent model. 7 | ## optional simulation of a phenotype and phenotypically-associated SNPs is implemented. 8 | ## optional use of a distribution to guide the substitution rate of the non-associated SNPs is implemented. 9 | 10 | ## TO DO: 11 | ## 1) (Re-)implement associated SNP randomization procedure... 12 | ## want to implement procedures that combine the above options... 13 | ## 2) Allow phenotypically-associated SNPs simulation to be optionally guided 14 | ## by a user-inputted phenotype for the terminal nodes (--> would need to simulate 15 | ## phenotypic substitutions from the terminal nodes UP to the root, the reverse 16 | ## of the current procedure...) 17 | ## 3) Implement assoc.options (currently using deprecated "all" option without requiring argument, 18 | ## but would like to consider implementing alternative "model" option(s)) 19 | 20 | 21 | ## ARGUMENTS ## 22 | # n.ind <- 10 # n.genomes you want to end up with 23 | # gen.size <- 1000000 # bases 24 | # theta <- gen.size*2 # (if sim.by=="branch")# OR # 1*2 # (if sim.by=="locus") 25 | # biallelic <- TRUE # if TRUE, select ONLY complementary nt; if FALSE, 26 | # select from 3 alternatives (ie. A/C/G/T-current nt) 27 | # seed <- 1 # allow user to control randomization to get reproducible results. 28 | # n.snps.assoc <- 5 29 | # assoc.option <- c("all", "model") # deprecated (only "all" available) 30 | # sim.by <- c("locus", "branch") # deprecated (only "locus" has all current protocols implemented) 31 | 32 | 33 | ## EXAMPLE ## 34 | # out <- coalescent.sim(n.ind=100, gen.size=10000, sim.by="locus", 35 | # theta=1*2, dist=NULL, 36 | # theta_p=15, phen=NULL, 37 | # n.snps.assoc=20, assoc.option="all", assoc.prob=90, 38 | # haploid=TRUE, biallelic=TRUE, seed=1, 39 | # plot=TRUE, heatmap=FALSE, plot2="UPGMA") 40 | 41 | ######################################################################## 42 | 43 | ################### 44 | ## DOCUMENTATION ## 45 | ################### 46 | 47 | #' Short one-phrase description. 48 | #' 49 | #' Longer proper discription of function... 50 | #' 51 | #' @param n.ind An integer specifying the number of individual genomes to simulate 52 | #' (ie. the number of terminal nodes in the tree). 53 | #' @param n.snps An integer specifying the number of genetic loci to simulate. 54 | #' @param n.subs Either an integer or a vector (containing a distribution) that is 55 | #' used to determine the number of substitutions 56 | #' to occur on the phylogenetic tree for each genetic locus (see details). 57 | #' @param n.snps.assoc An optional integer specifying the number of genetic loci 58 | #' @param assoc.prob An optional integer (> 0, <= 100) specifying the strength of the 59 | #' association between the n.snps.assoc loci and the phenotype (see details). 60 | #' @param n.phen.subs An integer specifying the expected number of phenotypic 61 | #' substitutions to occur on the phylogenetic tree (through the same process as 62 | #' the n.subs parameter when n.subs is an integer (see details)). 63 | #' @param phen An optional vector containing a phenotype for each of the 64 | #' n.ind individuals if no phenotypic simulation is desired. 65 | #' @param heatmap A logical indicating whether to produce a heatmap of the genetic distance 66 | #' between the simulated genomes of the n.ind individuals. 67 | #' @param reconstruct Either a logical indicating whether to attempt to reconstruct 68 | #' a phylogenetic tree using the simulated genetic data, or one of c("UPGMA", "nj", "ml") 69 | #' to specify that tree reconstruction is desired by one of these three methods 70 | #' (Unweighted Pair Group Method with Arithmetic Mean, Neighbour-Joining, Maximum-Likelihood). 71 | #' @param seed An optional integer controlling the pseudo-random process of simulation. Two 72 | #' instances of coalescent.sim with the same seed and arguments will produce identical output. 73 | #' 74 | #' @details #### n.subs #### 75 | #' If the value of the n.subs parameter is set to an integer, this integer is 76 | #' used as the parameter of a Poisson distribution from which the number of substitutions to 77 | #' occur on the phylogenetic tree is drawn for each of the n.snps simulated genetic loci. 78 | #' If n.subs is a vector containing a distribution, this is used directly (in proportion to n.snps) 79 | #' to define the number of substitutions per site. For example, if n.subs=c(3000, 900, 70, 20, 0, 10) 80 | #' and n.snps=8000, then 6000 simulated sites will undergo exactly 81 | #' one substitution somewhere on the phylogenetic tree, 1800 will undergo two, 82 | #' 140 three, 40 four, 0 five, and 20 six. 83 | #' #### assoc.prob #### 84 | #' The assoc.prob parameter controls the strength of association through a process analagous to dilution. 85 | #' All n.snps.assoc loci are initially simulated to undergo a substitution 86 | #' every time the phenotype undergoes a substitution (ie. perfect association). 87 | #' The assoc.prob parameter then acts like a dilution factor, removing (100 - assoc.prob)% 88 | #' of the substitutions that occurred during simulation under perfect association. 89 | #' 90 | #' 91 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com} 92 | #' @export 93 | #' 94 | #' @import adegenet 95 | #' @rawNamespace import(ape, except = zoom) 96 | 97 | ######################################################################## 98 | 99 | ############ 100 | ## NOTES: ## 101 | ############ 102 | ## theta_p changed to n.phen.subs (and just n.subs in phen.sim.R) 103 | 104 | 105 | 106 | fwd.coalescent.sim <- function(n.ind=100, 107 | n.snps=10000, n.subs=1, 108 | n.snps.assoc=10, n.subs.assoc=15, 109 | p=1, 110 | heatmap=FALSE, reconstruct=FALSE, 111 | dist.dna.model="JC69", 112 | seed=1){ 113 | ## load packages: 114 | # require(adegenet) 115 | # require(ape) 116 | 117 | if(length(which(c(plot, heatmap, reconstruct)==TRUE))==1){ 118 | par(ask=FALSE) 119 | }else{ 120 | par(ask=TRUE) 121 | } 122 | 123 | ################################ 124 | ## Simulate Phylogenetic Tree ## 125 | ################################ 126 | tree <- coalescent.tree.sim(n.ind = n.ind, seed = seed) 127 | 128 | ################### 129 | ## Simulate SNPs ## 130 | ################### 131 | snps.list <- fwd.snp.sim(n.snps=n.snps, n.subs=n.subs, 132 | n.snps.assoc=n.snps.assoc, n.subs.assoc=n.subs.assoc, 133 | tree=tree, 134 | heatmap=heatmap, reconstruct=reconstruct, 135 | dist.dna.model=dist.dna.model, 136 | seed=seed) 137 | snps <- snps.list$snps 138 | snps.assoc <- snps.list$snps.assoc 139 | 140 | ######################## 141 | ## Simulate Phenotype ## 142 | ######################## 143 | phen <- fwd.phen.sim(tree, snps.assoc=snps[,snps.assoc], p=p) 144 | 145 | ################ 146 | ## Get Output ## 147 | ################ 148 | out <- list(snps, snps.assoc, phen, tree) 149 | names(out) <- c("snps", "snps.assoc", "phen", "tree") 150 | return(out) 151 | 152 | } # end fwd.coalescent.sim 153 | -------------------------------------------------------------------------------- /R/fwd.phen.sim.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | ################## 4 | ## fwd.phen.sim ## 5 | ################## 6 | 7 | ## TO DO ## 8 | ## CAREFUL--phen.sim seems not to be working with trees other than those 9 | ## produced with your coalescent.tree.sim fn (eg. rtree(100))!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 10 | 11 | 12 | ######################################################################## 13 | 14 | ################### 15 | ## DOCUMENTATION ## 16 | ################### 17 | 18 | #' Simulate a phenotype, from root to tips. 19 | #' 20 | #' [*An exploratory function:*] Having already simulated a genotype, 21 | #' this function allows you to simulate an associated phenotype along the tree, from root to tips. 22 | #' 23 | #' @param snps.assoc A matrix created by the \code{fwd.snp.sim} function, 24 | #' which indicates where genotypic substitutions occur on the tree at phenoypically-associated sites. 25 | #' @param p An integer specifying the probability of phenotypic substition, 26 | #' given genotypic substitution (see details). 27 | #' @param tree An phylo object. 28 | #' 29 | #' @details The parameter \code{p} controls the simulation of the phenotype by specifying 30 | #' the expected value of the number of phenotypic substitions to occur on the tree provided, 31 | #' given that a genotypic substitution has occurred on a particular branch of the tree. 32 | #' 33 | #' 34 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com} 35 | #' @examples 36 | #' 37 | #' ## basic use of fn 38 | #' tree <- coalescent.tree.sim(n.ind = 100, seed = 1) 39 | #' 40 | #' ## plot output 41 | #' plot(tree) 42 | #' 43 | #' @export 44 | 45 | ######################################################################## 46 | 47 | ## TO DO: ## 48 | ## Add arg continuous=FALSE --> continuous phen sim. 49 | ## Implement ASR for the snps.assoc only --> get phen for internal nodes too. 50 | 51 | 52 | ## OPTIONS: ## 53 | ## Cumulative probability (eg. if 7/10 SNPs, 70% chance of phen)--may allow for lots of noise... 54 | ## Threshold (eg. must have 7/10 SNPs to have phen)--will allow for lots of noise. 55 | ## Specific combinations (eg. Must have SNPs 1&2 OR 3&4)--will be very hard for treeWAS. 56 | ## Combination of above 57 | 58 | fwd.phen.sim <- function(snps.assoc, p=1, tree=NULL){ 59 | 60 | 61 | n.snps.assoc <- sapply(c(1:nrow(snps.assoc)), 62 | function(e) 63 | length(which(snps.assoc[e,] == 1))) 64 | #################### 65 | ## .get.phen.prob ## 66 | #################### 67 | .get.phen.prob <- function(n.snps.assoc, p){ 68 | if(p == 1){ 69 | ys <- n.snps.assoc/ncol(snps.assoc) 70 | }else{ 71 | ys <- (1-p^n.snps.assoc)/(1-p^ncol(snps.assoc)) 72 | } 73 | return(ys) 74 | } # end .get.phen.prob 75 | 76 | phen.prob <- sapply(c(1:length(n.snps.assoc)), 77 | function(e) 78 | .get.phen.prob(n.snps.assoc[e], p)) 79 | phen <- as.factor( 80 | sapply(c(1:length(phen.prob)), 81 | function(e) 82 | sample(c("A", "B"), 83 | size=1, 84 | replace=TRUE, 85 | prob=c(phen.prob[e], 1-phen.prob[e])))) 86 | 87 | # ############### 88 | # ## HISTOGRAM ## 89 | # ############### 90 | # hist(.get.phen.prob(n.snps.assoc=n.snps.assoc, p=p), 91 | # breaks=10, col="blue", xlim=c(0,1), 92 | # main=paste("Histogram of Pr(phen) 93 | # \n p = ", p, sep="")) 94 | # 95 | # ################ 96 | # ## PROB CURVE ## 97 | # ################ 98 | # plot_prob_phen(p=p, n.snps.assoc=ncol(snps.assoc)) 99 | 100 | 101 | ############### 102 | ## w p = 0.6 ## 103 | ############### 104 | 105 | ########### 106 | ## TABLE ## 107 | ########### 108 | #table(phen) 109 | # A B 110 | # 95 5 111 | 112 | ######################### 113 | ## CORRELATION (SCORE) ## 114 | ######################### 115 | #abs(corr.dat[snps.assoc]) 116 | #0.08 0.20 0.04 0.18 0.26 0.20 0.28 0.40 0.36 0.08 117 | 118 | 119 | ############### 120 | ## w p = 0.8 ## 121 | ############### 122 | 123 | ########### 124 | ## TABLE ## 125 | ########### 126 | #table(phen) 127 | # A B 128 | # 75 25 129 | 130 | ######################### 131 | ## CORRELATION (SCORE) ## 132 | ######################### 133 | #abs(corr.dat[snps.assoc]) 134 | #0.16 0.08 0.08 0.14 0.10 0.12 0.36 0.24 0.32 0.08 135 | 136 | ############# 137 | ## w p = 1 ## 138 | ############# 139 | 140 | ########### 141 | ## TABLE ## 142 | ########### 143 | #table(phen) 144 | # A B 145 | # 49 51 146 | 147 | ######################### 148 | ## CORRELATION (SCORE) ## 149 | ######################### 150 | #abs(corr.dat[snps.assoc]) 151 | #0.04 0.04 0.12 0.02 0.02 0.08 0.00 0.04 0.08 0.12 152 | 153 | ############### 154 | ## w p = 1.2 ## 155 | ############### 156 | 157 | ########### 158 | ## TABLE ## 159 | ########### 160 | #table(phen) 161 | # A B 162 | # 37 63 163 | 164 | ######################### 165 | ## CORRELATION (SCORE) ## 166 | ######################### 167 | #abs(corr.dat[snps.assoc]) 168 | #0.08 0.00 0.12 0.10 0.06 0.04 0.04 0.28 0.16 0.04 169 | 170 | 171 | return(phen) 172 | 173 | } # end fwd.phen.sim 174 | 175 | 176 | -------------------------------------------------------------------------------- /R/fwd.plot.prob.phen.R: -------------------------------------------------------------------------------- 1 | 2 | #################### 3 | ## plot_prob_phen ## 4 | #################### 5 | 6 | ######################################################################## 7 | 8 | ################### 9 | ## DOCUMENTATION ## 10 | ################### 11 | 12 | #' Plot the probability of association, given \code{p} and \code{n.snps.assoc}. 13 | #' 14 | #' [*For use with the 'fwd.-.sim' functions:*] 15 | #' Plot the cumulative probability of association (Pr(phen=1)), with a given value of \code{p}, 16 | #' as the number of associated sites (SNPi=1) increases from i=0 to i=\code{n.snps.assoc}. 17 | #' 18 | #' @param p A numeric value indicating the probability of substitution, at each site, along the tree. 19 | #' @param n.snps.assoc An integer specifying the number of genetic loci that are associated with the phenotype. 20 | #' 21 | #' 22 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com} 23 | #' @examples 24 | #' \dontrun{ 25 | #' ## basic use of fn ## 26 | #' ## compare probability of having phenotype with 10 SNPs at varying p: 27 | #' plot_prob_phen(p=0.8, n.snps.assoc=10) 28 | #' plot_prob_phen(p=0.5, n.snps.assoc=10) 29 | #' plot_prob_phen(p=0.2, n.snps.assoc=10) 30 | #' } 31 | #' @export 32 | 33 | ######################################################################## 34 | 35 | 36 | plot_prob_phen <- function(p=0.5, n.snps.assoc=10){ 37 | 38 | xs <- 0:n.snps.assoc 39 | if(p == 1){ 40 | ys <- xs/n.snps.assoc 41 | }else{ 42 | ys <- (1-p^xs)/(1-p^10) 43 | } 44 | 45 | ## plot ## 46 | plot(xs,ys,xlim=c(0,10),ylim=c(0,1), 47 | main=paste("p = ", p, sep=""), 48 | xlab="Number of associated sites in state 1", 49 | ylab="Cumulative probability of association") 50 | 51 | } # end plot_prob_phen 52 | 53 | 54 | 55 | 56 | ################################# 57 | ## ENABLE ALTERNATE FN NAME: ## 58 | ################################# 59 | # plot.prob.phen <- function(p, n.snps.assoc, ...){ 60 | # return(plot_prob_phen(p, n.snps.assoc, ...)) 61 | # } # end plot.prob.phen 62 | 63 | 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /R/heatmap.DNAbin.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #################### 4 | ## heatmap.DNAbin ## 5 | #################### 6 | 7 | ######################################################################## 8 | 9 | ################### 10 | ## DOCUMENTATION ## 11 | ################### 12 | 13 | #' Short one-phrase description. 14 | #' 15 | #' Longer proper discription of function... 16 | #' 17 | #' @param dna A DNAbin object. 18 | #' @param dist.dna.model A character string specifying the type of model to use in 19 | #' calculating the genetic distance between individual genomes (see ?dist.dna). 20 | #' 21 | #' 22 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com} 23 | #' 24 | #' 25 | #' @rawNamespace import(ape, except = zoom) 26 | #' @export 27 | 28 | ######################################################################## 29 | 30 | heatmap.DNAbin <- function(dna, dist.dna.model="JC69"){ 31 | 32 | # require(ape) 33 | 34 | if(!"DNAbin" %in% class(dna)) dna <- as.DNAbin(dna) 35 | 36 | ############# 37 | ## HEATMAP ## 38 | ############# 39 | ## get a distance matrix between the genomes 40 | D <- dist.dna(dna, model = dist.dna.model) 41 | 42 | mat <- t(as.matrix(D)) 43 | mat <- mat[,ncol(mat):1] 44 | par(mar=c(1,5,5,1)) 45 | image(x=1:ncol(mat), y=1:ncol(mat), mat, 46 | col=rev(heat.colors(100)), 47 | xaxt="n", yaxt="n", xlab="", ylab="") 48 | axis(side=2, at=c(1:ncol(mat)), 49 | labels=rev(names(dna)), las=2, cex.axis=1) 50 | axis(side=3, at=c(1:ncol(mat)), 51 | labels=names(dna), las=1, cex.axis=1) 52 | ## return margin parameter to default: 53 | par(mar=c(5,4,4,2)+0.1) 54 | 55 | } # end heatmap.DNAbin 56 | -------------------------------------------------------------------------------- /R/pair.tests.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ################ 5 | ## pair.tests ## 6 | ################ 7 | 8 | ######################################################################## 9 | 10 | ################### 11 | ## DOCUMENTATION ## 12 | ################### 13 | 14 | #' Pairwise tests for categorical phenotypes 15 | #' 16 | #' Internal function to calculate treeWAS 17 | #' terminal, simultaneous, subsequent tests, 18 | #' and chi-squared p-values for a given snp across pairs of 19 | #' phenotype levels. 20 | #' 21 | #' @param x A contingency table (snps[,i] x phen) for score 1 (\code{terminal.test} 22 | #' with \code{correct.prop = TRUE}, \code{categorical = TRUE}). 23 | #' @param y A vector of values containing pairwise score 2 (\code{simultaneous.test} 24 | #' with \code{categorical = TRUE}) results for snps[,i]. 25 | #' @param z A contingency table (snps.rec[,i] x phen.rec) for score 3 (\code{subsequent.test} 26 | #' with \code{correct.prop = TRUE}, \code{categorical = TRUE}). 27 | #' 28 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com} 29 | #' @export 30 | #' @examples 31 | #' ## Example ## 32 | #' \dontrun{ 33 | #' ## basic use of fn 34 | #' out <- pair.tests(x, y, z) 35 | #' } 36 | #' 37 | #' @importFrom stats chisq.test 38 | #' 39 | 40 | ######################################################################## 41 | 42 | 43 | pair.tests <- function (x, y, z, 44 | method = "bonf", digits = 3){ 45 | n <- nrow(x) 46 | N <- n * (n - 1)/2 47 | df <- data.frame(phen.pair = rep("A", N), stringsAsFactors = FALSE) 48 | p.chisq <- rep(NA, N) 49 | phi <- rep(NA, N) 50 | phi.rec <- rep(NA, N) 51 | k <- 0 52 | 53 | for (a in 1:(n - 1)) { 54 | for (b in (a + 1):n) { 55 | k <- k + 1 56 | ## Get phen pair: 57 | nom.a <- as.character(rownames(x)[a]) 58 | nom.b <- as.character(rownames(x)[b]) 59 | mat <- matrix(c(x[a, ], x[b, ]), nrow = 2, byrow = TRUE) 60 | mat.rec <- matrix(c(z[a, ], z[b, ]), nrow = 2, byrow = TRUE) 61 | df$phen.pair[k] <- paste0(nom.a, " : ", nom.b) 62 | ## Calculate scores 1, 3, chisq.p values: 63 | x2 <- suppressWarnings(chisq.test(mat, correct=FALSE)) 64 | x2.rec <- suppressWarnings(chisq.test(mat.rec, correct=FALSE)) 65 | p.chisq[k] <- signif(x2$p.value, digits = digits) 66 | phi[k] <- signif(sqrt(x2$statistic/sum(mat)), digits = digits) 67 | phi.rec[k] <- signif(sqrt(x2.rec$statistic/sum(mat.rec)), digits = digits) 68 | } # end for (b) loop 69 | } # end for (a) loop 70 | 71 | ## Reorder pairwise score 2: 72 | ox <- rep(NA, length(y)) 73 | for(pp in 1:length(y)){ 74 | noms.pp <- strsplit(names(y)[pp], " : ")[[1]] 75 | ox[pp] <- which(sapply(c(1:nrow(df)), 76 | function(e) 77 | all(strsplit(df$phen.pair[e], " : ")[[1]] %in% noms.pp))) 78 | } # end for (pp) loop 79 | 80 | df$terminal <- phi 81 | df$simultaneous <- y[ox] 82 | df$subsequent <- phi.rec 83 | df$p.chisq <- p.chisq 84 | df$p.adj.chisq <- signif(p.adjust(df$p.chisq, method = method), 85 | digits = digits) 86 | return(df) 87 | 88 | } # end pair.tests 89 | 90 | 91 | ## eg. output: 92 | # PT[[snps.sig[j]]] 93 | # phen.pair terminal simultaneous subsequent p.chisq p.adj.chisq 94 | # 1 chicken : cow 0.876 9 0.852 1.62e-12 4.86e-12 95 | # 2 chicken : human 0.435 -2 0.497 4.53e-04 1.36e-03 96 | # 3 cow : human 0.530 3 0.417 9.25e-06 2.77e-05 97 | -------------------------------------------------------------------------------- /R/phen.sim.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | ############## 4 | ## phen.sim ## 5 | ############## 6 | 7 | ## TO DO ## 8 | ## CAREFUL--phen.sim seems not to be working with trees other than those 9 | ## produced with your coalescent.tree.sim fn (eg. rtree(100))!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 10 | 11 | 12 | ######################################################################## 13 | 14 | ################### 15 | ## DOCUMENTATION ## 16 | ################### 17 | 18 | #' Short one-phrase description. 19 | #' 20 | #' Longer proper discription of function... 21 | #' 22 | #' @param tree An phylo object. 23 | #' @param n.subs An integer controlling the phenotypic substition rate (see details). 24 | #' @param grp.min An optional numeric value < 0.5 specifying the minimum accepted proportion of terminal nodes 25 | #' to be in the minor phenotypic group. It may be useful to specify a \code{grp.min} of, 26 | #' for example, 0.2 (the default) to prevent excessive imbalance in the phenotypic group sizes. However, 27 | #' it is important to note that (at least for the time being) \code{grp.min} values closer to 28 | #' 0.5 are likely to cause the computational time of \code{phen.sim} to increase substantially, 29 | #' as the function will run until acceptable group sizes are randomly generated. 30 | #' @param seed An optional integer used to set the seed and control the pseudo-random process used in 31 | #' \code{phen.sim}, enabling the repeatable regeneration of identical output. 32 | #' 33 | #' @description The parameter n.subs controls the simulation of the phenotype by specifying 34 | #' the expected value of the number of phenotypic substitions to occur on the tree provided. 35 | #' The true number of phenotypic substitions is drawn from a Poisson distribution with parameter n.subs. 36 | #' 37 | #' 38 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com} 39 | #' 40 | #' @examples 41 | #' 42 | #' ## basic use of fn 43 | #' tree <- coalescent.tree.sim(n.ind = 100, seed = 1) 44 | #' 45 | #' ## plot output 46 | #' plot(tree) 47 | #' 48 | #' @importFrom phangorn midpoint 49 | #' 50 | #' @export 51 | 52 | ######################################################################## 53 | # @useDynLib phangorn, .registration = TRUE 54 | 55 | phen.sim <- function(tree, 56 | n.subs = 15, 57 | grp.min = 0.2, 58 | # coaltree = TRUE, 59 | n.subs.var=TRUE, # simulate approximately n.subs subs 60 | seed = NULL){ 61 | 62 | if(!is.null(seed)) set.seed(seed) 63 | 64 | ## HANDLE TREE: ## 65 | ## Always work with trees in "pruningwise" order: 66 | tree <- reorder.phylo(tree, order="pruningwise") 67 | ## Trees must be rooted: 68 | if(!is.rooted(tree)) tree <- midpoint(tree) 69 | 70 | #################################### 71 | ## PHENOTYPE simulation procedure ## ~ sim.by.locus... 72 | #################################### 73 | 74 | ## simulate phenotype for root individual: 75 | if(!is.null(n.subs)){ 76 | phen.root <- "A" 77 | }else{ 78 | phen.root <- NULL 79 | } 80 | 81 | ## store the inputted desired number of phenotypic substitutions 82 | n.phen.subs <- n.subs 83 | 84 | ## make dummy variables in which to store the resulting n.mts variables: 85 | lambda_p <- n.subs <- NA 86 | 87 | ## ensure phen variables start as NULL 88 | phen.branch <- phen.nodes <- phen.leaves <- NULL 89 | 90 | 91 | 92 | ############################################################# 93 | ## If the user has specified a "mt" rate for the phenotype ## 94 | ############################################################# 95 | 96 | ## (indicating that they want to generate a NEW phenotype for the tree provided) 97 | if(!is.null(n.phen.subs)){ 98 | 99 | ## START WHILE LOOP HERE ########### 100 | 101 | toRepeat <- TRUE 102 | 103 | while(toRepeat == TRUE){ 104 | 105 | ## draw the number of substitutions to occur: 106 | ## draw an approximate n.subs given input n.subs 107 | ## (eg to get a distribution around n.subs over multiple sims): 108 | if(n.subs.var == TRUE){ 109 | n.subs <- rpois(n=1, lambda=n.phen.subs) 110 | ## if n.subs==0 or ==1, re-sample 111 | while(n.subs <= 1){ 112 | n.subs <- rpois(n=1, lambda=n.phen.subs) 113 | } 114 | 115 | }else{ 116 | ## or draw exactly n.subs as input: 117 | n.subs <- n.phen.subs 118 | } 119 | 120 | ## draw the branches to which you will assign the 121 | ## n.subs to occur for the phenotype (~ branch length): 122 | phen.loci <- sample(c(1:length(tree$edge.length)), 123 | n.subs, replace=FALSE, prob=tree$edge.length) 124 | ## rearrange phen.loci 125 | phen.loci <- sort(phen.loci, decreasing=TRUE) 126 | 127 | 128 | 129 | ############################### 130 | ## For Loop to get PHENOTYPE ## 131 | ############################### 132 | ## get phenotype for all branches/ nodes in tree 133 | ## (from root node (ie. tree$edge[nrow(tree$edge), 1]) down): 134 | phen.nodes <- phen.branch <- list() 135 | 136 | ## set phenotype for all branches and nodes to be phen.root: 137 | phen.branch[1:length(tree$edge.length)] <- phen.root 138 | names(phen.branch) <- paste("e", c(1:length(phen.branch)), sep=".") 139 | 140 | phen.nodes[1:length(unique(as.vector(unlist(tree$edge))))] <- phen.root 141 | names(phen.nodes) <- paste("n", c(1:length(phen.nodes)), sep=".") 142 | 143 | ############################################################################# 144 | 145 | ############################################################################# 146 | 147 | ## get the node INDICES for all individuals (terminal and internal) 148 | all.inds <- sort(unique(as.vector(unlist(tree$edge)))) # 1:(n.ind*2 - 1) 149 | 150 | #################################################################### 151 | ############################ 152 | ## Get Anc-Des EDGE ORDER ## 153 | ############################ 154 | ## Get sequence from lowest ("root", Nterm+1) to highest ancestral node: 155 | ix <- c(min(tree$edge[,1]):max(tree$edge[,1])) 156 | ## Get for loop index of rows in tree$edge[,1], in pairs, from lowest to highest: 157 | x <- as.vector(unlist(sapply(c(1:length(ix)), function(e) which(tree$edge[,1] == ix[e])))) 158 | #################################################################### 159 | 160 | 161 | 162 | ## get phen of nodes 163 | for(i in 1:length(x)){ 164 | if(x[i] %in% phen.loci){ 165 | phen.nodes[[tree$edge[x[i],2]]] <- .switch.phen(phen.nodes[[tree$edge[x[i],1]]]) 166 | }else{ 167 | ## if no phen subs occur on branch i, set phen of 168 | ## downstream individual to be equal to ancestor's 169 | phen.nodes[[tree$edge[x[i],2]]] <- phen.nodes[[tree$edge[x[i], 1]]] 170 | } 171 | } # end for loop 172 | 173 | ## get phen of TERMINAL nodes (leaves) 174 | # n.ind <- tree$Nnode+1 175 | n.ind <- min(tree$edge[,1])-1 176 | phen.leaves <- as.factor(as.vector(unlist(phen.nodes[c(1:n.ind)]))) 177 | 178 | ## Assign names to phen as tree$tip.labs in original order ## 179 | ## If checks fail, individuals in phen.leaves will be named 1:N (not ideal) 180 | names(phen.leaves) <- c(1:length(phen.leaves)) 181 | ## Check that tip.labs are not NULL: 182 | if(is.null(tree$tip.label)){ 183 | warning("tree$tip.label was NULL. 184 | Assigning individuals names 1:N. Note that these may NOT match sequence labels!") 185 | }else{ 186 | ## Check that tip.labs is of correct length: 187 | if(length(tree$tip.label) != length(phen.leaves)){ 188 | warning("The length of tree$tip.label did not match 189 | the number of terminal node phenotypes simulated. 190 | Assigning individuals names 1:N. Note that these may NOT match sequence labels!") 191 | }else{ 192 | ## If checks passed, assign tip.labs to be names of phen.leaves: 193 | names(phen.leaves) <- tree$tip.label 194 | } 195 | } 196 | 197 | ## CHECK THAT MIN GRP.SIZE >= THRESHOLD ## 198 | if(!is.null(grp.min)){ 199 | tab <- table(phen.leaves) 200 | grp.thresh <- (tree$Nnode+1)*grp.min 201 | if(min(tab) < grp.thresh){ 202 | toRepeat <- TRUE 203 | }else{ 204 | toRepeat <- FALSE 205 | } 206 | }else{ 207 | toRepeat <- FALSE 208 | } 209 | 210 | } # end WHILE LOOP ######### 211 | 212 | 213 | ## get phen of branches 214 | for(i in 1:length(x)){ 215 | ## Branches with ONE phenotype get labelled by that phenotype: 216 | if(length(unique(phen.nodes[tree$edge[x[i],]])) == 1){ 217 | if("A" %in% phen.nodes[tree$edge[x[i],]]){ 218 | phen.branch[[x[i]]] <- "A" 219 | }else{ 220 | phen.branch[[x[i]]] <- "B" 221 | } 222 | }else{ 223 | ## Branches with TWO phenotypes get labelled as such, in ORDER: 224 | temp <- as.vector(unlist(phen.nodes[tree$edge[x[i],]])) 225 | if(temp[1] == "A"){ 226 | phen.branch[[x[i]]] <- c("A", "B") 227 | }else{ 228 | phen.branch[[x[i]]] <- c("B", "A") 229 | } 230 | } 231 | } # end for loop 232 | 233 | } ## end PHEN sim procedure... 234 | 235 | ## convert phen.nodes to factor 236 | phen.nodes <- as.factor(as.vector(unlist(phen.nodes))) 237 | 238 | 239 | ## Assign names to phen.nodes ## 240 | ## ... as c(tree$tip.labs, tree$node.labs) in original order: 241 | ## If checks fail, individuals in phen.leaves will be named 1:N, node.1:node.Ninternal 242 | 243 | ## Assign terminal nodes names same as phen.leaves (ideally = tree$tip.label) 244 | noms.term <- names(phen.leaves) 245 | 246 | ## Assign internal nodes either tree$node.label or node.1:node.N: 247 | int.inds <- c((length(phen.leaves)+1):length(phen.nodes)) 248 | noms.int <- paste("node", int.inds, sep=".") 249 | ## Check that node.labs are not NULL: 250 | if(!is.null(tree$node.label)){ 251 | ## Check that node.labs is of correct length: 252 | if(length(tree$node.label) == length(int.inds)){ 253 | noms.int <- tree$node.label 254 | } 255 | } 256 | 257 | ## Assign these names to phen.nodes: 258 | names(phen.nodes) <- c(noms.term, noms.int) 259 | 260 | 261 | 262 | ## make output list 263 | phen.list <- list(phen.leaves, phen.nodes, phen.branch, phen.loci) 264 | names(phen.list) <- c("phen", "phen.nodes", "phen.edges", "phen.loci") 265 | 266 | return(phen.list) 267 | } # end phen.sim 268 | -------------------------------------------------------------------------------- /R/simultaneous.test.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | ####################### 6 | ## simultaneous.test ## ## SCORE 2 ## 7 | ####################### 8 | 9 | ######################################################################## 10 | 11 | ################### 12 | ## DOCUMENTATION ## 13 | ################### 14 | 15 | #' Simultaneous test 16 | #' 17 | #' Calculates treeWAS score 2, the simultaneous test, as the number of 18 | #' substitutions or changes in genotype (\code{snps.reconstruction}) and phenotype 19 | #' (\code{phen.reconstruction}) that occur simultaneously on the same branches of the tree. 20 | #' 21 | #' @param snps.reconstruction A matrix containing the terminal and reconstructed 22 | #' ancestral states of SNPs for all nodes in the tree. 23 | #' @param phen.reconstruction A vector containing the terminal and reconstructed 24 | #' ancestral states of the phenotype for all nodes in the tree. 25 | #' @param tree A phylo object containing the tree representing the ancestral relationships 26 | #' between the individuals for which snps and phen are known. 27 | #' 28 | #' 29 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com} 30 | #' 31 | #' 32 | #' @importFrom scales rescale 33 | #' @importFrom Hmisc all.is.numeric 34 | #' @importFrom utils combn 35 | #' 36 | #' @export 37 | 38 | ######################################################################## 39 | # @useDynLib phangorn, .registration = TRUE 40 | # @importFrom phangorn midpoint 41 | 42 | simultaneous.test <- function(snps.reconstruction, 43 | phen.reconstruction, 44 | tree, 45 | categorical = FALSE){ 46 | 47 | snps.rec <- snps.reconstruction 48 | phen.rec <- phen.reconstruction 49 | rm(snps.reconstruction) 50 | rm(phen.reconstruction) 51 | 52 | ## Always work with tree in pruningwise order: 53 | tree <- reorder.phylo(tree, order="pruningwise") 54 | ## Trees must be rooted: 55 | # if(!is.rooted(tree)) tree <- midpoint(tree) # require(phangorn) 56 | ## Get tree edges: 57 | edges <- tree$edge 58 | 59 | #################################################################### 60 | ##################### 61 | ## Handle phen.rec ## 62 | ##################### 63 | ## convert phenotype to numeric: 64 | phen.rec.ori <- phen.rec 65 | ## Convert to numeric (required for assoc tests): 66 | na.before <- length(which(is.na(phen.rec))) 67 | 68 | ## NB: can only be binary or continuous at this point... 69 | levs <- unique(as.vector(unlist(phen.rec))) 70 | n.levs <- length(levs[!is.na(levs)]) 71 | if(!is.numeric(phen.rec)){ 72 | if(all.is.numeric(phen.rec)){ 73 | phen.rec <- as.numeric(as.character(phen.rec)) 74 | }else{ 75 | phen.rec <- as.numeric(as.factor(phen.rec)) 76 | if(n.levs > 2){ 77 | if(categorical != TRUE){ 78 | warning("phen.rec has more than 2 levels but is not numeric. 79 | Setting 'categorical' to TRUE.") 80 | categorical <- TRUE 81 | } 82 | } 83 | } 84 | } 85 | ## ensure ind names not lost 86 | names(phen.rec) <- names(phen.rec.ori) 87 | 88 | ## Check that no errors occurred in conversion: 89 | na.after <- length(which(is.na(phen.rec))) 90 | if(na.after > na.before){ 91 | stop("NAs created while converting phen.rec to numeric.") 92 | } 93 | #################################################################### 94 | 95 | ################################################ 96 | ## RE-SCALE NON-BINARY VALUES (phen only ...) ## 97 | ################################################ 98 | ## phen.rec (both Pa and Pd should be on same scale): 99 | if(categorical == FALSE){ 100 | phen.rec <- rescale(phen.rec, to=c(0,1)) # require(scales) 101 | } 102 | 103 | ############################### 104 | ## GET DIFFS ACROSS BRANCHES ## 105 | ############################### 106 | 107 | if(categorical == FALSE){ 108 | ## ORIGINAL SCORE 2: 109 | ## Get SNPs diffs: ## 110 | snps.diffs <- snps.rec[edges[,1], ] - snps.rec[edges[,2], ] 111 | 112 | ## Get phen diffs: ## 113 | phen.diffs <- phen.rec[edges[,1]] - phen.rec[edges[,2]] 114 | 115 | sp.diffs <- snps.diffs * phen.diffs 116 | 117 | ## Return with sign: 118 | score2 <- colSums(sp.diffs, na.rm=TRUE) 119 | # score2 <- abs(score2) 120 | names(score2) <- colnames(snps.rec) 121 | 122 | }else{ 123 | ## CATEGORICAL SCORE 2: 124 | 125 | ## Get SNPs diffs: ## 126 | snps.diffs <- snps.rec[edges[,1], ] - snps.rec[edges[,2], ] 127 | 128 | pairs <- t(combn(unique(phen.rec[!is.na(phen.rec)]), m=2)) 129 | S2 <- list() 130 | for(p in 1:nrow(pairs)){ 131 | 132 | ## Get phen diffs: ## 133 | pr <- phen.rec 134 | pr[which(!pr %in% pairs[p,])] <- NA 135 | pr <- as.numeric(as.factor(as.character(pr)))-1 136 | phen.diffs <- pr[edges[,1]] - pr[edges[,2]] 137 | 138 | sp.diffs <- snps.diffs * phen.diffs 139 | S2[[p]] <- colSums(sp.diffs, na.rm=TRUE) 140 | } # end for (p) loop 141 | 142 | s2 <- do.call(rbind, S2) 143 | score2 <- colSums(abs(s2), na.rm=TRUE) 144 | names(score2) <- colnames(snps.rec) 145 | } 146 | 147 | return(score2) 148 | 149 | } # end simultaneous.test 150 | 151 | 152 | 153 | 154 | -------------------------------------------------------------------------------- /R/simultaneous.test.epi.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | ####################### 6 | ## simultaneous.test ## 7 | ####################### 8 | 9 | ######################################################################## 10 | 11 | ################### 12 | ## DOCUMENTATION ## 13 | ################### 14 | 15 | #' Test for association between genetic loci with Score 2. 16 | #' 17 | #' [*\emph{A work in progress; not curently integrated into treeWAS:}*] 18 | #' Use the simultaneous.test (Score 2) to test for associations between genetic loci, 19 | #' which may indicate an epistatic interaction. 20 | #' This function can be used either to test 21 | #' for pairwise association between all pairs of genetic loci 22 | #' or for associations between a subset of snps and all other snps 23 | #' (recommended for large datasets; see details). 24 | #' 25 | #' @param snps.reconstruction A matrix containing the terminal and reconstructed 26 | #' ancestral states of SNPs for all nodes in the tree. 27 | #' @param tree A phylo object containing the tree representing the ancestral relationships 28 | #' between the individuals for which snps and phen are known. 29 | #' @param snps.subset An optional vector (see details); else, NULL. 30 | #' The snps.subset vector can be a character vector, containing a subset of colnames(snps.rec), 31 | #' a logical vector, using TRUE or FALSE to indicate which columns are to be retained and excluded, 32 | #' or an integer vector, specifying the column indices to be retained. 33 | #' 34 | #' 35 | #' @details The number of pairwise tests between all pairs of snps 36 | #' grows rapidly as the number of snps columns increases. 37 | #' As such, for datasets where ncol(snps.reconstruction) is large, we recommend that 38 | #' the snps.subset argument is used to reduce the number of tests, by 39 | #' indicating which snps to test for association with all other snps. 40 | #' The snps.subset index can be used to select any subset of snps of interest. 41 | #' For example, one may wish to test for interactions between all snps and a subset of snps that 42 | #' had been deemed significantly associated with a particular phenotype in a previous run of treeWAS. 43 | #' 44 | #' 45 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com} 46 | #' 47 | #' 48 | #' @importFrom scales rescale 49 | #' @importFrom Hmisc all.is.numeric 50 | #' 51 | #' @export 52 | 53 | ######################################################################## 54 | # @useDynLib phangorn, .registration = TRUE 55 | # @importFrom phangorn midpoint 56 | 57 | simultaneous.test.epi <- function(snps.reconstruction, # can be snps.REC OR snps.sim.REC matrix ## NOTE: subs.edges no longer required for any version of this test. 58 | tree, 59 | snps.subset=NULL){ 60 | 61 | snps.rec <- snps.reconstruction 62 | rm(snps.reconstruction) 63 | 64 | 65 | ## Always work with tree in pruningwise order: 66 | tree <- reorder.phylo(tree, order="pruningwise") 67 | ## Trees must be rooted: 68 | # if(!is.rooted(tree)) tree <- midpoint(tree) # require(phangorn) 69 | ## Get tree edges: 70 | edges <- tree$edge 71 | 72 | 73 | ################################################ 74 | ## GET SUBSET of SNPS (logical/names/indices) ## 75 | ################################################ 76 | toKeep <- NULL 77 | if(!is.null(snps.subset)){ 78 | if(!is.vector(snps.subset)){ 79 | stop("snps.subset must be a vector (either a logical or numerical index vector, 80 | or a vector of snps.rec column names, indicating which columns are to be kept as a subset") 81 | }else{ 82 | ## LOGICAL (snps.subset = T/F toKeep) ## 83 | if(is.logical(snps.subset)){ 84 | toKeep <- which(snps.subset == TRUE) 85 | }else{ 86 | ## NUMERIC (snps.subset = indices toKeep) ## 87 | if(is.numeric(snps.subset)){ 88 | if(!all(snps.subset %in% c(1:ncol(snps.rec)))){ 89 | stop("not all snps.subset correspond to indices in 1:ncol(snps.rec)") 90 | }else{ 91 | toKeep <- snps.subset 92 | } 93 | }else{ 94 | ## CHARACTER (snps.subset = colnames toKeep) ## 95 | if(!all(snps.subset %in% colnames(snps.rec))){ 96 | stop("not all snps.subset are in colnames(snps.rec)") 97 | }else{ 98 | toKeep <- which(colnames(snps.rec) %in% snps.subset) 99 | } 100 | } 101 | } 102 | # snps.rec <- snps.rec[,toKeep] 103 | # toKeep <- which(colnames(snps.rec) %in% snps.subset) # where (snps.subset = sig.snps.names) 104 | } 105 | } 106 | 107 | #################################################################### 108 | 109 | ############################### 110 | ## GET DIFFS ACROSS BRANCHES ## 111 | ############################### 112 | 113 | ## Get SNPs diffs: ## 114 | snps.diffs <- snps.rec[edges[,1], ] - snps.rec[edges[,2], ] 115 | 116 | ## Get snp1:snp2 diffs: ## 117 | s1s2.diffs <- SCORE2 <- list() 118 | 119 | ## If no snps.subset, run test over all columns... 120 | if(is.null(toKeep)) toKeep <- 1:ncol(snps.diffs) 121 | for(i in 1:length(toKeep)){ 122 | s1s2.diffs[[i]] <- snps.diffs[,toKeep[i]] * snps.diffs 123 | ## Return with sign: 124 | SCORE2[[i]] <- colSums(s1s2.diffs[[i]], na.rm=TRUE) 125 | # SCORE2 <- abs(SCORE2) 126 | names(SCORE2[[i]]) <- paste(colnames(snps.rec)[toKeep[i]], colnames(snps.rec), sep="/") 127 | } # end for loop 128 | 129 | ####################### 130 | score2 <- unlist(SCORE2) 131 | ####################### 132 | noms <- strsplit(names(score2), "/") 133 | str(noms) 134 | mat <- rep(NA, length(noms)) 135 | mat <- cbind(mat, mat) 136 | for(i in 1:length(noms)){ 137 | if(length(noms[[i]]) > 2){ 138 | x <- noms[[i]][1] 139 | x[2] <- paste(noms[[i]][2:length(noms[[i]])], collapse="/") 140 | noms[[i]] <- x 141 | } 142 | mat[i,] <- noms[[i]] 143 | } # end for loop 144 | noms <- do.call(rbind, noms) 145 | # str(noms) 146 | ####################### 147 | # noms.ori <- names(score2) 148 | attr(score2, "snps1") <- noms[,1] 149 | attr(score2, "snps2") <- noms[,2] 150 | # names(score2) ## still there, just not visible w str(score2) 151 | ####################### 152 | 153 | return(score2) 154 | 155 | } # end simultaneous.test.epi 156 | #################################################################################### 157 | #################################################################################### 158 | 159 | 160 | 161 | 162 | 163 | 164 | # 165 | 166 | 167 | 168 | 169 | 170 | 171 | -------------------------------------------------------------------------------- /R/subsequent.test.R: -------------------------------------------------------------------------------- 1 | 2 | ##################### 3 | ## subsequent.test ## ## SCORE 3 ## 4 | ##################### 5 | 6 | ######################################################################## 7 | 8 | ################### 9 | ## DOCUMENTATION ## 10 | ################### 11 | 12 | #' Subsequent test 13 | #' 14 | #' Calculates treeWAS score 3, the subsequent test. 15 | #' 16 | #' @param tree A phylo object. 17 | #' 18 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com} 19 | #' 20 | #' @examples 21 | #' 22 | #' ## basic use of fn 23 | #' tree <- coalescent.tree.sim(n.ind = 100, seed = 1) 24 | #' 25 | #' @importFrom scales rescale 26 | #' @importFrom Hmisc all.is.numeric 27 | #' @export 28 | 29 | ######################################################################## 30 | # @useDynLib phangorn, .registration = TRUE 31 | # @importFrom phangorn midpoint 32 | 33 | subsequent.test <- function(snps.reconstruction, 34 | phen.reconstruction, 35 | tree, 36 | correct.prop = FALSE, 37 | categorical = FALSE){ 38 | 39 | snps.rec <- snps.reconstruction 40 | phen.rec <- phen.reconstruction 41 | rm(snps.reconstruction) 42 | rm(phen.reconstruction) 43 | 44 | ## Always work with tree in pruningwise order: 45 | tree <- reorder.phylo(tree, order="pruningwise") 46 | ## Trees must be rooted: 47 | # if(!is.rooted(tree)) tree <- midpoint(tree) # require(phangorn) 48 | ## get tree edges: 49 | edges <- tree$edge 50 | 51 | #################################################################### 52 | ##################### 53 | ## Handle phen.rec ## 54 | ##################### 55 | ## convert phenotype to numeric: 56 | phen.rec.ori <- phen.rec 57 | ## Convert to numeric (required for assoc tests): 58 | na.before <- length(which(is.na(phen.rec))) 59 | 60 | ## NB: can only be binary or continuous at this point... 61 | levs <- unique(as.vector(unlist(phen.rec))) 62 | n.levs <- length(levs[!is.na(levs)]) 63 | if(!is.numeric(phen.rec)){ 64 | if(all.is.numeric(phen.rec)){ 65 | phen.rec <- as.numeric(as.character(phen.rec)) 66 | }else{ 67 | phen.rec <- as.numeric(as.factor(phen.rec)) 68 | if(n.levs > 2){ 69 | if(categorical != TRUE){ 70 | warning("phen.rec has more than 2 levels but is not numeric. 71 | Setting 'categorical' to TRUE.") 72 | categorical <- TRUE 73 | } 74 | } 75 | } 76 | } 77 | ## ensure ind names not lost 78 | names(phen.rec) <- names(phen.rec.ori) 79 | 80 | ## Check that no errors occurred in conversion: 81 | na.after <- length(which(is.na(phen.rec))) 82 | if(na.after > na.before){ 83 | stop("NAs created while converting phen.rec to numeric.") 84 | } 85 | #################################################################### 86 | 87 | ################################################ 88 | ## RE-SCALE NON-BINARY VALUES (phen only ...) ## 89 | ################################################ 90 | ## phen.rec (both Pa and Pd should be on same scale): 91 | if(categorical == FALSE){ 92 | phen.rec <- rescale(phen.rec, to=c(0,1)) # require(scales) 93 | } 94 | 95 | ############################### 96 | ## GET SCORE ACROSS BRANCHES ## 97 | ############################### 98 | 99 | ## Get snps, phen values for all internal+terminal nodes: 100 | Sx <- snps.rec 101 | Px <- phen.rec 102 | 103 | ################################################################# ##### 104 | ############### 105 | ## SCORE 3.0 ## 106 | ############### 107 | if(categorical == FALSE){ 108 | if(correct.prop == FALSE){ 109 | ## Get snps, phen values for ancestral & descendant nodes: 110 | Pa <- phen.rec[edges[,1]] 111 | Pd <- phen.rec[edges[,2]] 112 | Sa <- snps.rec[edges[,1], ] 113 | Sd <- snps.rec[edges[,2], ] 114 | bl <- tree$edge.length 115 | 116 | ## ORIGINAL INTEGRAL-BASED SCORE3 (without edge length): 117 | score3 <- get.score3(Pa = Pa, Pd = Pd, Sa = Sa, Sd = Sd, l = NULL) 118 | 119 | ## Return with sign: 120 | score3 <- colSums(score3, na.rm=TRUE) 121 | }else{ 122 | ## MARGINAL-CORRECTED SCORE 1 (Phi): 123 | score3 <- ((colSums((1 - Px)*(1 - Sx), na.rm=TRUE)*colSums(Px*Sx, na.rm=TRUE)) - 124 | (colSums((1 - Px)*Sx, na.rm=TRUE)*colSums(Px*(1 - Sx), na.rm=TRUE))) / 125 | (sqrt(colSums(1 - Sx, na.rm=TRUE)*colSums(Sx, na.rm=TRUE)*sum((1 - Px), na.rm=TRUE)*sum(Px, na.rm=TRUE))) 126 | } 127 | }else{ 128 | ## CATEGORICAL SCORE 3 (Phi): 129 | score3 <- suppressWarnings(sqrt(sapply(c(1:ncol(Sx)), function(e) 130 | chisq.test(x=Px, y=Sx[,e], correct=F)$statistic)/length(Px))) 131 | } 132 | 133 | # score3 <- abs(score3) 134 | names(score3) <- colnames(snps.rec) 135 | 136 | return(score3) 137 | 138 | } # end subsequent.test 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | ################ 148 | ## get.score3 ## 149 | ################ 150 | 151 | ######################################################################## 152 | 153 | ################### 154 | ## DOCUMENTATION ## 155 | ################### 156 | 157 | #' Short one-phrase description. 158 | #' 159 | #' Longer proper discription of function... 160 | #' 161 | #' @param Pa A numeric value containing either the state, 162 | #' or the probability of the state, of the phenotype at a given \emph{ancestral} node. 163 | #' @param Pd A numeric value containing either the state, 164 | #' or the probability of the state, of the phenotype at a given \emph{descendant} node. 165 | #' @param Sa A numeric value containing either the state, 166 | #' or the probability of the state, of SNPi at a given \emph{ancestral} node. 167 | #' @param Sd A numeric value containing either the state, 168 | #' or the probability of the state, of SNPi at a given \emph{descendant} node. 169 | #' @param l A numeric value specifying the length of the branch in the phylogenetic tree 170 | #' that joins the ancestral and descendant node. 171 | #' 172 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com} 173 | #' @export 174 | #' @examples 175 | #' ## Example ## 176 | #' \dontrun{ 177 | #' ## basic use of fn 178 | #' tree <- coalescent.tree.sim(n.ind = 100, seed = 1) 179 | #' } 180 | 181 | ######################################################################## 182 | 183 | get.score3 <- function(Pa, Pd, Sa, Sd, l=NULL){ 184 | 185 | score3 <- NULL 186 | 187 | if(!is.null(l)){ 188 | ## NEW integral-based score (WITH edge-length!)... 189 | score3 <- (l*(((4/3)*Pa*Sa) + 190 | ((2/3)*Pa*Sd) + 191 | ((2/3)*Pd*Sa) + 192 | ((4/3)*Pd*Sd) - 193 | Pa - 194 | Pd - 195 | Sa - 196 | Sd + 197 | 1))/sum(l) 198 | }else{ 199 | ## NEW integral-based score (WITHOUT edge-length!)... 200 | score3 <- (((4/3)*Pa*Sa) + 201 | ((2/3)*Pa*Sd) + 202 | ((2/3)*Pd*Sa) + 203 | ((4/3)*Pd*Sd) - 204 | Pa - 205 | Pd - 206 | Sa - 207 | Sd + 208 | 1)/length(Pa) 209 | } 210 | 211 | return(score3) 212 | 213 | } # end get.score3 214 | 215 | 216 | 217 | ########################################################################################### 218 | 219 | ########################################################################################### 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | # 235 | -------------------------------------------------------------------------------- /R/terminal.test.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ################### 5 | ## terminal.test ## ## SCORE 1 ## 6 | ################### 7 | 8 | ######################################################################## 9 | 10 | ################### 11 | ## DOCUMENTATION ## 12 | ################### 13 | 14 | #' Terminal test 15 | #' 16 | #' Calculates treeWAS score 1, the terminal test. 17 | #' 18 | #' @param tree A phylo object. 19 | #' 20 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com} 21 | #' @export 22 | #' @examples 23 | #' ## Example ## 24 | #' \dontrun{ 25 | #' ## basic use of fn 26 | #' out <- terminal.test(snps, phen) 27 | #' } 28 | #' 29 | #' @importFrom scales rescale 30 | #' @importFrom Hmisc all.is.numeric 31 | #' 32 | 33 | ######################################################################## 34 | 35 | 36 | terminal.test <- function(snps, 37 | phen, 38 | correct.prop = FALSE, 39 | categorical = FALSE){ 40 | 41 | #################################################################### 42 | ################# 43 | ## Handle phen ## 44 | ################# 45 | ## convert phenotype to numeric: 46 | phen.ori <- phen 47 | ## Convert to numeric (required for assoc tests): 48 | na.before <- length(which(is.na(phen))) 49 | 50 | ## NB: can only be binary or continuous at this point... 51 | levs <- unique(as.vector(unlist(phen))) 52 | n.levs <- length(levs[!is.na(levs)]) 53 | if(!is.numeric(phen)){ 54 | if(all.is.numeric(phen)){ 55 | phen <- as.numeric(as.character(phen)) 56 | }else{ 57 | phen <- as.numeric(as.factor(phen)) 58 | if(n.levs > 2){ 59 | if(categorical != TRUE){ 60 | warning("phen has more than 2 levels but is not numeric. 61 | Setting 'categorical' to TRUE.") 62 | categorical <- TRUE 63 | } 64 | } 65 | } 66 | } 67 | ## ensure ind names not lost 68 | names(phen) <- names(phen.ori) 69 | 70 | ## Check that no errors occurred in conversion: 71 | na.after <- length(which(is.na(phen))) 72 | if(na.after > na.before){ 73 | stop("NAs created while converting phen to numeric.") 74 | } 75 | #################################################################### 76 | 77 | ################################## 78 | ## GET SCORE 1 @ TERMINAL NODES ## 79 | ################################## 80 | 81 | Pd <- phen # .rec[edges[,2]] 82 | Sd <- snps # .rec[edges[,2], ] 83 | 84 | ################################################ 85 | ## RE-SCALE NON-BINARY VALUES (phen only (?)) ## 86 | ################################################ 87 | Pd.ori <- Pd 88 | if(categorical == FALSE){ 89 | Pd <- rescale(Pd, to=c(0,1)) ## require(scales) 90 | } 91 | 92 | ################################################################# ##### 93 | ############# 94 | ## SCORE 1 ## 95 | ############# 96 | if(categorical == FALSE){ 97 | if(correct.prop == FALSE){ 98 | ## ORIGINAL TERMINAL SCORE 1: 99 | score1 <- (Pd*Sd - (1 - Pd)*Sd - Pd*(1 - Sd) + (1 - Pd)*(1 - Sd)) ## CALCULATE SCORE 1 EQUATION 100 | 101 | ## Return with sign: 102 | score1 <- colSums(score1, na.rm=TRUE)/length(Pd) 103 | }else{ 104 | ## MARGINAL-CORRECTED SCORE 1 (Phi): 105 | score1 <- ((colSums((1 - Pd)*(1 - Sd), na.rm=TRUE)*colSums(Pd*Sd, na.rm=TRUE)) - 106 | (colSums((1 - Pd)*Sd, na.rm=TRUE)*colSums(Pd*(1 - Sd), na.rm=TRUE))) / 107 | (sqrt(colSums(1 - Sd, na.rm=TRUE)*colSums(Sd, na.rm=TRUE)*sum((1 - Pd), na.rm=TRUE)*sum(Pd, na.rm=TRUE))) 108 | } 109 | }else{ 110 | ## CATEGORICAL SCORE 1 (Phi): 111 | score1 <- suppressWarnings(sqrt(sapply(c(1:ncol(Sd)), function(e) 112 | chisq.test(x=Pd, y=Sd[,e], correct=F)$statistic)/length(Pd))) 113 | } 114 | 115 | # score1 <- abs(score1) 116 | names(score1) <- colnames(snps) 117 | 118 | return(score1) 119 | 120 | } # end terminal.test 121 | -------------------------------------------------------------------------------- /R/terminal.test.epi.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ################### 5 | ## terminal.test ## 6 | ################### 7 | 8 | 9 | ######################################################################## 10 | 11 | ################### 12 | ## DOCUMENTATION ## 13 | ################### 14 | 15 | #' Test for epistasis between genetic loci with Score 1. 16 | #' 17 | #' [*\emph{A work in progress; not curently integrated into treeWAS:}*] 18 | #' Use the terminal.test (Score 1) to test for associations between genetic loci, 19 | #' which may indicate an epistatic interaction. 20 | #' This function can be used either to test 21 | #' for pairwise association between all pairs of genetic loci 22 | #' or for associations between a subset of snps and all other snps 23 | #' (recommended for large datasets; see details). 24 | #' 25 | #' @param snps A matrix containing the states of SNPs (in columns) for all individuals (in rows). 26 | #' @param snps.subset An optional vector (see details); else, NULL. 27 | #' The snps.subset vector can be a character vector, containing a subset of colnames(snps.rec), 28 | #' a logical vector, using TRUE or FALSE to indicate which columns are to be retained and excluded, 29 | #' or an integer vector, specifying the column indices to be retained. 30 | #' 31 | #' 32 | #' @details The number of pairwise tests between all pairs of snps 33 | #' grows rapidly as the number of snps columns increases. 34 | #' As such, for datasets where ncol(snps.reconstruction) is large, we recommend that 35 | #' the snps.subset argument is used to reduce the number of tests, by 36 | #' indicating which snps to test for association with all other snps. 37 | #' The snps.subset index can be used to select any subset of snps of interest. 38 | #' For example, one may wish to test for interactions between all snps and a subset of snps that 39 | #' had been deemed significantly associated with a particular phenotype in a previous run of treeWAS. 40 | #' 41 | #' 42 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com} 43 | #' 44 | #' 45 | #' @importFrom scales rescale 46 | #' @importFrom Hmisc all.is.numeric 47 | #' 48 | #' @export 49 | 50 | ######################################################################## 51 | 52 | 53 | terminal.test.epi <- function(snps, 54 | snps.subset=NULL){ 55 | 56 | ################################################ 57 | ## GET SUBSET of SNPS (logical/names/indices) ## 58 | ################################################ 59 | toKeep <- NULL 60 | if(!is.null(snps.subset)){ 61 | # if(length(snps.subset) != ncol(snps)){ 62 | # stop("snps.subset must be of length ncol(snps).") 63 | # }else{ 64 | if(is.logical(snps.subset)){ 65 | toKeep <- snps.subset 66 | }else{ 67 | if(all(snps.subset %in% colnames(snps))) toKeep <- which(colnames(snps) %in% snps.subset) 68 | } 69 | # snps.rec <- snps[,toKeep] 70 | toKeep <- which(colnames(snps) %in% snps.subset) # where (snps.subset = sig.snps.names) 71 | # } 72 | } 73 | 74 | ################################################ 75 | ## RE-SCALE NON-BINARY VALUES (phen only (?)) ## 76 | ################################################ 77 | # Pd <- phen 78 | # Pd.ori <- Pd 79 | # # if(n.levs > 2) 80 | # Pd <- rescale(Pd, to=c(0,1)) ## require(scales) 81 | 82 | ################################################################# ##### 83 | ############# 84 | ## SCORE 1 ## 85 | ############# 86 | ####################### 87 | ## ORIGINAL TERMINAL SCORE 1: 88 | # score1 <- (Pd*Sd - (1 - Pd)*Sd - Pd*(1 - Sd) + (1 - Pd)*(1 - Sd)) ## CALCULATE SCORE 1 EQUATION 89 | ####################### 90 | 91 | ## Get snp1:snp2 diffs: ## 92 | s1s2 <- SCORE1 <- list() 93 | Sd <- snps 94 | ## If no snps.subset, run test over all columns... 95 | if(is.null(toKeep)) toKeep <- 1:ncol(snps) 96 | for(i in 1:length(toKeep)){ 97 | Pd <- snps[,toKeep[i]] 98 | s1s2[[i]] <- (Pd*Sd - (1 - Pd)*Sd - Pd*(1 - Sd) + (1 - Pd)*(1 - Sd)) ## CALCULATE SCORE 1 EQUATION 99 | 100 | ## Return with sign: 101 | SCORE1[[i]] <- colSums(s1s2[[i]], na.rm=TRUE)/length(Pd) 102 | # SCORE1 <- abs(SCORE1) 103 | names(SCORE1[[i]]) <- paste(colnames(snps)[toKeep[i]], colnames(snps), sep="/") 104 | } # end for loop 105 | 106 | score1 <- unlist(SCORE1) 107 | ####################### 108 | 109 | # names(score1) <- colnames(snps) 110 | ####################### 111 | noms <- strsplit(names(score1), "/") 112 | # str(noms) 113 | mat <- rep(NA, length(noms)) 114 | mat <- cbind(mat, mat) 115 | for(i in 1:length(noms)){ 116 | if(length(noms[[i]]) > 2){ 117 | x <- noms[[i]][1] 118 | x[2] <- paste(noms[[i]][2:length(noms[[i]])], collapse="/") 119 | noms[[i]] <- x 120 | } 121 | mat[i,] <- noms[[i]] 122 | } # end for loop 123 | noms <- do.call(rbind, noms) 124 | # str(noms) 125 | ####################### 126 | # noms.ori <- names(score1) 127 | attr(score1, "snps1") <- noms[,1] 128 | attr(score1, "snps2") <- noms[,2] 129 | # names(score1) ## still there, just not visible w str(score1) 130 | ####################### 131 | 132 | 133 | return(score1) 134 | 135 | } # end terminal.test.epi 136 | -------------------------------------------------------------------------------- /R/tree.reconstruct.R: -------------------------------------------------------------------------------- 1 | 2 | ###################### 3 | ## tree.reconstruct ## 4 | ###################### 5 | 6 | 7 | ######################################################################## 8 | 9 | ################### 10 | ## DOCUMENTATION ## 11 | ################### 12 | 13 | #' Short one-phrase description. 14 | #' 15 | #' Longer proper discription of function... 16 | #' 17 | #' @param dna A matrix or DNAbin object containing genomes for (only) 18 | #' the terminal nodes of the tree to be reconstructed. 19 | #' Individuals should be in the rows and loci in the columns; rows and columns should be labelled. 20 | #' @param method A character string specifying the method of phylogenetic reconstruction: 21 | #' one of \code{"NJ"}, \code{"BIONJ"} (the default), or \code{"parsimony"}; 22 | #' or, if NAs are present in the distance matrix, one of: \code{"NJ*"} or \code{"BIONJ*"}. 23 | #' @param dist.dna.model A character string specifying the type of model to use in 24 | #' calculating the genetic distance between individual genomes (see ?dist.dna). 25 | #' @param plot A logical specifying whether to plot the reconstructed phylogenetic tree. 26 | #' 27 | #' 28 | #' 29 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com} 30 | #' 31 | #' 32 | #' 33 | #' @rawNamespace import(ape, except = zoom) 34 | #' @importFrom phangorn as.phyDat 35 | #' @importFrom phangorn midpoint 36 | #' @importFrom phangorn pratchet 37 | #' @importFrom phangorn acctran 38 | #' 39 | #' 40 | #' @export 41 | 42 | ######################################################################## 43 | # @import phangorn 44 | # @useDynLib phangorn as.phyDat midpoint optim.pml pml, .registration = TRUE 45 | # @useDynLib phangorn, .registration = TRUE 46 | # @importFrom phangorn optim.pml 47 | # @importFrom phangorn pml 48 | # @importFrom phangorn optim.parsimony 49 | 50 | ############ 51 | ## TO DO: ## 52 | ############ 53 | ## add all the options from hclust (stats) as available methods..? 54 | ## change all methods to either upper or lower case (or add to lower check). 55 | 56 | 57 | tree.reconstruct <- function(dna, 58 | method= c("BIONJ", "NJ", "parsimony", "BIONJ*", "NJ*"), 59 | dist.dna.model="JC69", 60 | plot=TRUE){ 61 | 62 | ################### 63 | ## LOAD PACKAGES ## 64 | ################### 65 | # require(ape) 66 | # require(phangorn) 67 | 68 | ############ 69 | ## CHECKS ## 70 | ############ 71 | ## DNA ## 72 | if(class(dna)[1] != "DNAbin"){ 73 | # if(class(dna) == "genind"){ 74 | # dna <- dna@tab ## might be problems w ploidy... 75 | # } 76 | if(is.matrix(dna)){ 77 | # dna <- as.DNAbin(dna) 78 | sp <- matrix(as.character(dna), nrow=nrow(dna), ncol=ncol(dna)) 79 | rownames(sp) <- rownames(dna) 80 | colnames(sp) <- colnames(dna) 81 | 82 | ## Check/convert levels: 83 | levs <- unique(as.vector(unlist(sp))) 84 | nts <- c("a", "c", "g", "t") 85 | if(length(levs[!is.na(levs)]) > 4){ 86 | stop("There must be no more than 4 unique values in dna, excluding NAs.") 87 | } 88 | if(!all(levs %in% nts)){ 89 | for(i in 1:length(levs)){ 90 | sp <- replace(sp, which(sp == levs[i]), nts[i]) 91 | } # end for loop 92 | } # end levs conversion 93 | 94 | dna <- as.DNAbin(sp) 95 | rownames(dna) <- rownames(sp) 96 | colnames(dna) <- colnames(sp) 97 | }else{ 98 | stop("dna should be of class DNAbin or matrix") 99 | } 100 | } 101 | ## TREE REC METHOD ## 102 | method <- tolower(method) 103 | if(method == "njs") method <- "nj*" 104 | if(method == "bionjs") method <- "bionj*" 105 | method <- match.arg(arg = method, 106 | choices = c("bionj", "nj", "parsimony", "nj*", "bionj*"), 107 | several.ok = FALSE) 108 | if(!any(c("nj", "bionj", "parsimony", "nj*", "bionj*") %in% method)){ 109 | warning("method should be one of 'nj', 'bionj', 'parsimony', 'nj*', 'bionj*'. Choosing 'BIONJ'.") 110 | method <- "bionj" 111 | }else{ 112 | ## use first arg if more than 1 present: 113 | if(length(method) > 1){ 114 | method <- method[1] 115 | } 116 | } 117 | 118 | # if(method == "upgma"){ 119 | # warning("UPGMA enforces ultrametricity, which can bias treeWAS results. 120 | # NJ or BIONJ may give more reliable results.") 121 | # } 122 | 123 | tree <- NULL 124 | 125 | 126 | ########################## 127 | ## Get distance matrix: ## 128 | ########################## 129 | D <- dist.dna(dna, model = dist.dna.model) 130 | 131 | ## Handle MISSING data: 132 | ## NOTE: hclust not able to handle NAs/NaNs in D.. 133 | ## NB: NAs are ok in dna, but NAs in D arise when dist.dna cannot find a dist btw. any 2 individuals, 134 | ## e.g., there is an NA at all loci in at least one of the 2 inds. 135 | ## --> Use phylo methods that can handle NAs in D (eg. NJ* and BIONJ*, from ape). 136 | if(any(is.na(D))){ 137 | if(!method %in% c("nj*", "bionj*")){ 138 | if(method == "nj"){ 139 | method <- "nj*" 140 | }else{ 141 | method <- "bionj*" 142 | } 143 | cat("NAs in distance matrix. Replacing method of phylo estimation with ", method, ".", sep="") 144 | } 145 | } 146 | 147 | ################################### 148 | ## Methods with NO missing data: ## ##### ##### ##### ##### ##### ##### ##### ##### ##### 149 | ################################### 150 | 151 | ######## 152 | ## NJ ## 153 | ######## 154 | if(method=="nj"){ 155 | tree <- nj(D) 156 | #tree <- midpoint(ladderize(tree)) 157 | ## Always work with tree in pruningwise order: 158 | tree <- reorder.phylo(tree, order="pruningwise") 159 | ## Trees must be rooted: 160 | if(!is.rooted(tree)) tree <- midpoint(tree) 161 | if(plot==TRUE){ 162 | plot(tree, edge.width=2, cex=0.5) 163 | title("Neighbour-joining tree") 164 | axisPhylo() 165 | } 166 | } 167 | ########### 168 | ## BIONJ ## 169 | ########### 170 | if(method=="bionj"){ 171 | tree <- bionj(D) 172 | #tree <- midpoint(ladderize(tree)) 173 | ## Always work with tree in pruningwise order: 174 | tree <- reorder.phylo(tree, order="pruningwise") 175 | ## Trees must be rooted: 176 | if(!is.rooted(tree)) tree <- midpoint(tree) 177 | if(plot==TRUE){ 178 | plot(tree, edge.width=2, cex=0.5) 179 | title("BIONJ tree") 180 | axisPhylo() 181 | } 182 | } 183 | 184 | ########### 185 | ## UPGMA ## 186 | ########### 187 | # if(method=="upgma"){ 188 | # tree <- hclust(D, method="average") 189 | # tree <- as.phylo(tree) 190 | # #tree <- midpoint(ladderize(tree)) 191 | # ## Always work with tree in pruningwise order: 192 | # tree <- reorder.phylo(tree, order="pruningwise") 193 | # ## Trees must be rooted: 194 | # if(!is.rooted(tree)) tree <- midpoint(tree) 195 | # if(plot==TRUE){ 196 | # plot(tree, edge.width=2, cex=0.5) 197 | # title("UPGMA tree") 198 | # } 199 | # } 200 | 201 | ############### 202 | ## parsimony ## ## a bit slow if many unique columns 203 | ############### 204 | if(method=="parsimony"){ 205 | ## as.phyDat warns if NAs present (& doesn't include these...) 206 | dna4 <- suppressWarnings(as.phyDat(dna)) 207 | ## get pars tree: 208 | # tre.ini <- nj(D) 209 | # tree <- optim.parsimony(tre.ini, dna4) 210 | tre.ini <- pratchet(dna4, trace=0) # better (can also return set of treeS) 211 | # , maxit=400, k=8) 212 | ## add edge lengths w ACCTRAN: 213 | tree <- acctran(tre.ini, dna4) # edge lengths in n.subs (but relative lengths still fine). 214 | ## Always work with tree in pruningwise order: 215 | tree <- reorder.phylo(tree, order="pruningwise") 216 | ## Convert edge.lengths from parsimony cost to n.subs-per-site 217 | ## (s.t. parsimony lengths ~ lengths via NJ or UPGMA): 218 | tree$edge.length <- tree$edge.length/ncol(dna) 219 | ## Trees must be rooted: 220 | if(!is.rooted(tree)) tree <- midpoint(tree) 221 | if(plot==TRUE){ 222 | plot(tree, edge.width=2, cex=0.5) 223 | title("Parsimony tree") 224 | axisPhylo() 225 | } 226 | } 227 | 228 | ######## 229 | ## ML ## ## discontinued: too slow 230 | ######## 231 | # if(method=="ml"){ 232 | # dna4 <- suppressWarnings(as.phyDat(dna)) 233 | # tre.ini <- nj(D) 234 | # fit.ini <- pml(tre.ini, dna4, k=nrow(dna)) 235 | # fit <- optim.pml(fit.ini, optNni = TRUE, optBf = TRUE, 236 | # optQ = TRUE, optGamma = TRUE) 237 | # 238 | # ## NOTE--you may want to store these in a results.ml list 239 | # ## and return it with your results instead of printing 240 | # ## OR at least print a message 241 | # ## (eg. "Printing maximum-likelihood calculations...") 242 | # ## before printing these numbers... 243 | # 244 | # # anova(fit.ini, fit) 245 | # # AIC(fit.ini) 246 | # # AIC(fit) 247 | # 248 | # tree <- fit$tree 249 | # #tree <- midpoint(ladderize(tree)) 250 | # ## Always work with tree in pruningwise order: 251 | # tree <- reorder.phylo(tree, order="pruningwise") 252 | # ## Trees must be rooted: 253 | # if(!is.rooted(tree)) tree <- midpoint(tree) 254 | # if(plot==TRUE){ 255 | # plot(tree, show.tip=TRUE, edge.width=2) 256 | # title("Maximum-likelihood tree") 257 | # axisPhylo() 258 | # } 259 | # } 260 | 261 | 262 | ###################################### 263 | ## Methods with MISSING data (in D) ## ##### ##### ##### ##### ##### ##### ##### ##### ##### 264 | ###################################### 265 | 266 | ######### 267 | ## NJ* ## 268 | ######### 269 | if(method=="nj*"){ 270 | tree <- njs(D) 271 | #tree <- midpoint(ladderize(tree)) 272 | ## Always work with tree in pruningwise order: 273 | tree <- reorder.phylo(tree, order="pruningwise") 274 | ## Trees must be rooted: 275 | if(!is.rooted(tree)) tree <- midpoint(tree) 276 | if(plot==TRUE){ 277 | plot(tree, edge.width=2) 278 | title("Neighbour-joining* tree") 279 | axisPhylo() 280 | } 281 | } 282 | ############ 283 | ## BIONJ* ## 284 | ############ 285 | if(method=="bionj*"){ 286 | tree <- bionjs(D) 287 | #tree <- midpoint(ladderize(tree)) 288 | ## Always work with tree in pruningwise order: 289 | tree <- reorder.phylo(tree, order="pruningwise") 290 | ## Trees must be rooted: 291 | if(!is.rooted(tree)) tree <- midpoint(tree) 292 | if(plot==TRUE){ 293 | plot(tree, edge.width=2) 294 | title("BIONJ* tree") 295 | axisPhylo() 296 | } 297 | } 298 | 299 | 300 | par(ask=FALSE) 301 | 302 | return(tree) 303 | } # end tree.reconstruct 304 | 305 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 9 | 10 | # *treeWAS*: A phylogenetic tree-based approach to genome-wide association studies in microbes 11 | 12 | 13 | 14 | ## Introduction 15 | 16 | 17 | The *treeWAS* R package allows users to apply our phylogenetic tree-based appraoch to Genome-Wide Association Studies (GWAS) to microbial genetic and phenotypic data. 18 | In short, *treeWAS* measures the statistical association between a phenotype of interest and the genotype at all loci, with the aim of identifying significant associations, while correcting for the confounding effects of clonal population structure and homologous recombination. 19 | *treeWAS* is applicable to both bacterial and viral genetic data from both the core and accessory genomes, and to both binary and continuous phenotypes.The approach adopted within *treeWAS* is described fully in our paper, available in [PLOS Computational Biology](http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1005958). 20 | 21 | 22 | 23 | *** 24 | 25 | 26 | ## Installation 27 | 28 | 29 | *treeWAS* is currently hosted on GitHub at . 30 | 31 | 32 | The most up-to-date version of *treeWAS* can be easily installed directly within R, using the `devtools` package: 33 | 34 | 35 | ```{r, eval=FALSE, highlight=TRUE} 36 | ## install devtools, if necessary: 37 | install.packages("devtools", dep=TRUE) 38 | library(devtools) 39 | 40 | ## install treeWAS from github: 41 | install_github("caitiecollins/treeWAS", build_vignettes = TRUE) 42 | library(treeWAS) 43 | ``` 44 | 45 | 46 | *** 47 | 48 | 49 | ## Documentation 50 | 51 | 52 | Documentation on how to use *treeWAS* can be found on GitHub in [the Wiki](https://github.com/caitiecollins/treeWAS/wiki). 53 | 54 | 55 | The Wiki contains sections on [The Method](https://github.com/caitiecollins/treeWAS/wiki/1.-How-treeWAS-Works) behind *treeWAS*, 56 | the [Data & Data Cleaning](https://github.com/caitiecollins/treeWAS/wiki/2.-Data-&-Data-Cleaning) required, 57 | the [treeWAS Function & Arguments](https://github.com/caitiecollins/treeWAS/wiki/3.-treeWAS-Function-&-Arguments), 58 | a guide to [Interpreting Output](https://github.com/caitiecollins/treeWAS/wiki/4.-Interpreting-Output) returned by *treeWAS*, 59 | functions to facilitate [Integration with ClonalFrameML](https://github.com/caitiecollins/treeWAS/wiki/5.-ClonalFrameML-Integration), 60 | and information describing how to flag [Bugs & Features](https://github.com/caitiecollins/treeWAS/wiki/6.-Bugs-&-Features). 61 | 62 | 63 | 64 | Once you have installed and loaded the *treeWAS* package, you can also find this information in the vignette. 65 | To open the vignette from within R (recommended if any formatted elements are not rendering properly in the wiki), 66 | run `browseVignettes` and click on the `HTML` hyperlink: 67 | 68 | 69 | ```{r, eval=FALSE} 70 | browseVignettes("treeWAS") 71 | ``` 72 | 73 | You may also find useful tips and relevant discussions among the [Issues](https://github.com/caitiecollins/treeWAS/issues) posted by other users (including [Closed Issues](https://github.com/caitiecollins/treeWAS/issues?q=is%3Aissue+is%3Aclosed)). 74 | 75 | 76 | *** 77 | -------------------------------------------------------------------------------- /data/dist_0.01.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/dist_0.01.rda -------------------------------------------------------------------------------- /data/dist_0.05.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/dist_0.05.rda -------------------------------------------------------------------------------- /data/dist_0.1.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/dist_0.1.rda -------------------------------------------------------------------------------- /data/dist_0.2.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/dist_0.2.rda -------------------------------------------------------------------------------- /data/dist_0.25.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/dist_0.25.rda -------------------------------------------------------------------------------- /data/dist_0.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/dist_0.rda -------------------------------------------------------------------------------- /data/phen.cont.rank.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/phen.cont.rank.rda -------------------------------------------------------------------------------- /data/phen.cont.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/phen.cont.rda -------------------------------------------------------------------------------- /data/phen.plot.col.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/phen.plot.col.rda -------------------------------------------------------------------------------- /data/phen.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/phen.rda -------------------------------------------------------------------------------- /data/phen.reconstruction.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/phen.reconstruction.rda -------------------------------------------------------------------------------- /data/snps.assoc.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/snps.assoc.rda -------------------------------------------------------------------------------- /data/snps.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/snps.rda -------------------------------------------------------------------------------- /data/snps.reconstruction.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/snps.reconstruction.rda -------------------------------------------------------------------------------- /data/tree.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/tree.rda -------------------------------------------------------------------------------- /data/treeWAS.example.out.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/treeWAS.example.out.rda -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("To cite the treeWAS package:") 2 | 3 | ## Method reference: 4 | citEntry( 5 | entry="Article", 6 | title = "A phylogenetic method to perform genome-wide association studies in microbes that accounts for population structure and recombination", 7 | journal= "PLOS Computational Biology", 8 | year = "2018", 9 | author = c(person(c("Caitlin"), "Collins"), 10 | person(c("Xavier"), "Didelot")), 11 | volume = "14", 12 | number = "2", 13 | pages = "1-21", 14 | doi = "10.1371/journal.pcbi.1005958", 15 | textVersion = "Collins C, Didelot X (2018). 'A phylogenetic method to perform genome-wide association studies in 16 | microbes that accounts for population structure and recombination.' PLOS Computational Biology, 17 | *14*(2), 1-21. doi:10.1371/journal.pcbi.1005958 ." 18 | ) 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /man/asr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/reconstruct.R 3 | \name{asr} 4 | \alias{asr} 5 | \title{Ancestral state reconstruction} 6 | \usage{ 7 | asr( 8 | var, 9 | tree, 10 | type = c("parsimony", "ML", "ace"), 11 | method = c("discrete", "continuous"), 12 | unique.cols = FALSE 13 | ) 14 | } 15 | \arguments{ 16 | \item{var}{Either a matrix or a vector containing the state of a variable (eg. SNPs or a phenotype) 17 | for all individuals (ie. for all terminal nodes in the tree).} 18 | 19 | \item{tree}{A phylo object containing the tree representing the ancestral relationships 20 | between the individuals for which snps and phen are known.} 21 | 22 | \item{type}{A character string specifying whether ancestral state reconstruction should be 23 | performed by \code{parsimony} or \code{ML} (as performed by the \code{ace} function in package \emph{ape}).} 24 | 25 | \item{method}{A character string specifying the type of ASR method to implement, 26 | either \code{'discrete'} or \code{'continuous'} (only used if \code{type} is set to "ML").} 27 | 28 | \item{unique.cols}{A logical indicating whether only unique column patterns are present in \code{var}, 29 | if \code{var} is a matrix (if so (\code{TRUE}), a time-consuming step can be skipped); 30 | by default, \code{FALSE}.} 31 | } 32 | \value{ 33 | Depending on the dimensions of the input \code{var} object, 34 | either a matrix or a vector containing \emph{both} the known states 35 | of the variable at the terminal nodes (in positions 1:Nterminal) and the 36 | inferred states at internal nodes (in positions (Nterminal+1):Ntotal). 37 | } 38 | \description{ 39 | Reconstruct the ancestral states of a vector or matrix object by using either 40 | parsimony or maximum-likelihood methods to infer the states 41 | at the internal nodes of a phylogenetic tree. 42 | } 43 | \author{ 44 | Caitlin Collins \email{caitiecollins@gmail.com} 45 | } 46 | -------------------------------------------------------------------------------- /man/assoc.test.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get.sig.snps.R 3 | \name{assoc.test} 4 | \alias{assoc.test} 5 | \title{Run a test of association between SNPs and a phenotype.} 6 | \usage{ 7 | assoc.test( 8 | snps, 9 | phen, 10 | tree = NULL, 11 | test = c("terminal", "simultaneous", "subsequent"), 12 | correct.prop = FALSE, 13 | categorical = FALSE 14 | ) 15 | } 16 | \arguments{ 17 | \item{snps}{A matrix containing the real snps.} 18 | 19 | \item{phen}{A factor or vector containing the phenotype (only allowed to contain two levels for now).} 20 | 21 | \item{test}{A character string or vector containing one or more of the following available tests of association: 22 | "terminal", "simultaneous", "subsequent", "cor", "fisher". By default, the first three tests are run. 23 | See details for more information on what these tests do and when they may be appropriate.} 24 | } 25 | \description{ 26 | Run one of five tests of association between each column of a SNPs matrix and a phenotype 27 | (some tests only implemented for \emph{binary} SNPs and phenotype). 28 | } 29 | \author{ 30 | Caitlin Collins \email{caitiecollins@gmail.com} 31 | } 32 | -------------------------------------------------------------------------------- /man/beeswarmPlot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/beeswarm.plot.R 3 | \name{beeswarmPlot} 4 | \alias{beeswarmPlot} 5 | \title{Beeswarm-and-Box-Plot.} 6 | \usage{ 7 | beeswarmPlot( 8 | y = "sensitivity", 9 | x = "test", 10 | df, 11 | y.lab = NULL, 12 | pt.size = 4, 13 | x.text = FALSE, 14 | x.text.size = 15, 15 | y.text.size = 15, 16 | y.title.size = 20, 17 | y.lim = c(-0.002, 1.02), 18 | mean = TRUE 19 | ) 20 | } 21 | \arguments{ 22 | \item{y}{A character string specifying the label of the (numeric) column 23 | in data frame \code{df} to be plotted along the y-axis.} 24 | } 25 | \description{ 26 | Wrapper combining the beeswarm and box plot functions from packages \code{beeswarm} and \code{ggplot2}. 27 | } 28 | \author{ 29 | Caitlin Collins \email{caitiecollins@gmail.com} 30 | } 31 | -------------------------------------------------------------------------------- /man/coalescent.sim.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/coalescent.sim.R 3 | \name{coalescent.sim} 4 | \alias{coalescent.sim} 5 | \title{Simulate a tree, phenotype, and genetic data.} 6 | \usage{ 7 | coalescent.sim( 8 | n.ind = 100, 9 | n.snps = 10000, 10 | n.subs = 1, 11 | n.snps.assoc = 0, 12 | assoc.prob = 100, 13 | n.phen.subs = 15, 14 | phen = NULL, 15 | plot = TRUE, 16 | heatmap = FALSE, 17 | reconstruct = FALSE, 18 | dist.dna.model = "JC69", 19 | grp.min = 0.25, 20 | row.names = TRUE, 21 | set = 1, 22 | tree = NULL, 23 | coaltree = TRUE, 24 | s = 20, 25 | af = 10, 26 | filename.plot = NULL, 27 | seed = NULL 28 | ) 29 | } 30 | \arguments{ 31 | \item{n.ind}{An integer specifying the number of individual genomes to simulate 32 | (ie. the number of terminal nodes in the tree).} 33 | 34 | \item{n.snps}{An integer specifying the number of genetic loci to simulate.} 35 | 36 | \item{n.subs}{Either an integer or a vector (containing a distribution) that is 37 | used to determine the number of substitutions 38 | to occur on the phylogenetic tree for each genetic locus (see details).} 39 | 40 | \item{n.snps.assoc}{An optional integer specifying the number of genetic loci} 41 | 42 | \item{assoc.prob}{An optional integer (> 0, <= 100) specifying the strength of the 43 | association between the n.snps.assoc loci and the phenotype (see details).} 44 | 45 | \item{n.phen.subs}{An integer specifying the expected number of phenotypic 46 | substitutions to occur on the phylogenetic tree (through the same process as 47 | the n.subs parameter when n.subs is an integer (see details)).} 48 | 49 | \item{phen}{An optional vector containing a phenotype for each of the 50 | n.ind individuals if no phenotypic simulation is desired.} 51 | 52 | \item{plot}{A logical indicating whether to generate a plot of the phylogenetic tree (\code{TRUE}) or not (\code{FALSE}, the default).} 53 | 54 | \item{heatmap}{A logical indicating whether to produce a heatmap of the genetic distance 55 | between the simulated genomes of the n.ind individuals.} 56 | 57 | \item{reconstruct}{Either a logical indicating whether to attempt to reconstruct 58 | a phylogenetic tree using the simulated genetic data, or one of c("UPGMA", "nj", "ml") 59 | to specify that tree reconstruction is desired by one of these three methods 60 | (Unweighted Pair Group Method with Arithmetic Mean, Neighbour-Joining, Maximum-Likelihood).} 61 | 62 | \item{dist.dna.model}{A character string specifying the type of model to use in reconstructing the phylogenetic tree for 63 | calculating the genetic distance between individual genomes, only used if \code{tree} is 64 | a character string (see ?dist.dna).} 65 | 66 | \item{grp.min}{An optional number between 0.1 and 0.9 to control the proportional size of the smaller phenotypic group.} 67 | 68 | \item{row.names}{An optional vector containing row names for the individuals to be simulated.} 69 | 70 | \item{set}{An integer (1, 2, or 3) required to select the method of generating associated loci if \code{n.snps.assoc} is not zero.} 71 | 72 | \item{coaltree}{A logical indicating whether to generate a coalescent tree (\code{TRUE}, the default), 73 | or an rtree-type tree (\code{FALSE}, see ?rtree).} 74 | 75 | \item{s}{If \code{set} is 3, the \code{s} parameter controls a baseline number of substitutions to be 76 | experienced by the phenotype and associated loci: by default, 20.} 77 | 78 | \item{af}{If \code{set} is 3, the \code{af} parameter provides an association factor, 79 | controlling the preference for association over non-association at associated loci: by default, 10 (for a 10x preference).} 80 | 81 | \item{filename.plot}{An optional character string denoting the file location for saving any plots produced; else \code{NULL}.} 82 | 83 | \item{seed}{An optional integer to control the pseudo-randomisation process and allow for identical repeat runs of the function; 84 | else \code{NULL}.} 85 | } 86 | \description{ 87 | This funtion allows the user to simulate a phylogenetic tree, as well as 88 | phenotypic and genetic data, including associated and unassociated loci. 89 | } 90 | \details{ 91 | \strong{Homoplasy Distribution} 92 | 93 | The homoplasy distribution contains the number of substitutions per site. 94 | 95 | If the value of the \code{n.subs} parameter is set to an integer, this integer is 96 | used as the parameter of a Poisson distribution from which the number of substitutions to 97 | occur on the phylogenetic tree is drawn for each of the \code{n.snps} simulated genetic loci. 98 | 99 | The \code{n.subs} argument can also be used to provide a distribution 100 | to define the number of substitutions per site. 101 | 102 | It must be in the form of a \emph{named} vector (or table), or a vector in which the \emph{i}'th element 103 | contains the number of \emph{loci} that have been estimated to undergo \emph{i} substitutions on the tree. 104 | The vector must be of length \emph{max n.subs}, and "empty" indices must contain zeros. 105 | For example: the vector \code{n.subs = c(1833, 642, 17, 6, 1, 0, 0, 1)}, 106 | could be used to define the homoplasy distribution for a dataset with 2500 loci, 107 | where the maximum number of substitutions to be undergone on the tree by any locus is 8, 108 | and no loci undergo either 6 or 7 substitutions. 109 | 110 | 111 | \strong{Association Probability} 112 | 113 | The \code{assoc.prob} parameter is only functional when \code{set} is set to 1. 114 | If so, \code{assoc.prob} controls the strength of association through a process analagous to dilution. 115 | All \code{n.snps.assoc} loci are initially simulated to undergo a substitution 116 | every time the phenotype undergoes a substitution (ie. perfect association). 117 | The assoc.prob parameter then acts like a dilution factor, removing \code{(100 - assoc.prob)\%} 118 | of the substitutions that occurred during simulation under perfect association. 119 | } 120 | \examples{ 121 | \dontrun{ 122 | ## load example homoplasy distribution 123 | data(dist_0) 124 | str(dist_0) 125 | 126 | ## simulate a matrix with 10 associated loci: 127 | dat <- coalescent.sim(n.ind = 100, 128 | n.snps = 1000, 129 | n.subs = dist_0, 130 | n.snps.assoc = 10, 131 | assoc.prob = 90, 132 | n.phen.subs = 15, 133 | phen = NULL, 134 | plot = TRUE, 135 | heatmap = FALSE, 136 | reconstruct = FALSE, 137 | dist.dna.model = "JC69", 138 | grp.min = 0.25, 139 | row.names = NULL, 140 | coaltree = TRUE, 141 | s = NULL, 142 | af = NULL, 143 | filename = NULL, 144 | set = 1, 145 | seed = 1) 146 | 147 | ## examine output: 148 | str(dat) 149 | 150 | ## isolate elements of output: 151 | snps <- dat$snps 152 | phen <- dat$phen 153 | snps.assoc <- dat$snps.assoc 154 | tree <- dat$tree 155 | } 156 | } 157 | \author{ 158 | Caitlin Collins \email{caitiecollins@gmail.com} 159 | } 160 | -------------------------------------------------------------------------------- /man/coalescent.tree.sim.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/coalescent.tree.sim.R 3 | \name{coalescent.tree.sim} 4 | \alias{coalescent.tree.sim} 5 | \title{Short one-phrase description.} 6 | \usage{ 7 | coalescent.tree.sim(n.ind = 100, seed = NULL) 8 | } 9 | \arguments{ 10 | \item{n.ind}{An integer specifying the number of terminal nodes desired.} 11 | 12 | \item{seed}{An optional integer controlling the pseudo-random process underlying the tree generation.} 13 | } 14 | \description{ 15 | Longer proper discription of function... 16 | } 17 | \examples{ 18 | 19 | ## basic use of fn 20 | tree <- coalescent.tree.sim(n.ind = 100, seed = 1) 21 | 22 | ## plot output 23 | plot(tree) 24 | 25 | } 26 | \author{ 27 | Caitlin Collins \email{caitiecollins@gmail.com} 28 | } 29 | -------------------------------------------------------------------------------- /man/dist_0.01.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{dist_0.01} 5 | \alias{dist_0.01} 6 | \title{Nsubs per site with limited recombination (R = 0.01; M = 0.01).} 7 | \format{ 8 | A named vector of length 15. 9 | } 10 | \usage{ 11 | data(dist_0.01) 12 | } 13 | \description{ 14 | This vector contains a homoplasy distribution, 15 | representing the relative number of substitutions per site 16 | that occurred along a phylogenetic tree when evolution was 17 | simulated with a mutation rate of M = 0.01 and a recombination rate of R = 0.01 18 | (\emph{r/m} = 1). 19 | } 20 | \details{ 21 | A per-site mutation rate of M = 0.01 22 | indicates that each site, on average, undergoes 0.01 23 | substitutions due to mutation along the phylogenetic tree. 24 | A per-site recombination rate of R = 0.01 25 | indicates that each site, on average, undergoes 0.01 26 | substitutions per site due to within-species recombination. 27 | 28 | Each element of the vector indicates the number of genetic loci 29 | that have undergone the number of substitutions indicated by the name of that element (Nsub = i). 30 | 31 | If visualised as a bar plot (with \code{barplot(dist_0.01)}), 32 | one would see that the Nsub distribution is arranged as if it were the counts of a histogram 33 | with index names along the x-axis, corresponding to Nsub (the number of substitutions per site), 34 | and cell counts along the y-axis, showing Nloci (the number of genetic sites undergoing Nsub=i substitutions along the tree). 35 | } 36 | \author{ 37 | Caitlin Collins \email{caitiecollins@gmail.com} 38 | } 39 | \keyword{data} 40 | \keyword{datasets} 41 | -------------------------------------------------------------------------------- /man/dist_0.05.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{dist_0.05} 5 | \alias{dist_0.05} 6 | \title{Nsubs per site with recombination (R = 0.05; M = 0.01).} 7 | \format{ 8 | A named vector of length 26. 9 | } 10 | \usage{ 11 | data(dist_0.05) 12 | } 13 | \description{ 14 | This vector contains a homoplasy distribution, 15 | representing the relative number of substitutions per site 16 | that occurred along a phylogenetic tree when evolution was 17 | simulated with a mutation rate of M = 0.01 and a recombination rate of R = 0.05 18 | (\emph{r/m} = 5). 19 | } 20 | \details{ 21 | A per-site mutation rate of M = 0.01 22 | indicates that each site, on average, undergoes 0.01 23 | substitutions due to mutation along the phylogenetic tree. 24 | A per-site recombination rate of R = 0.05 25 | indicates that each site, on average, undergoes 0.05 26 | substitutions per site due to within-species recombination. 27 | 28 | Each element of the vector indicates the number of genetic loci 29 | that have undergone the number of substitutions indicated by the name of that element (Nsub = i). 30 | 31 | If visualised as a bar plot (with \code{barplot(dist_0.05)}), 32 | one would see that the Nsub distribution is arranged as if it were the counts of a histogram 33 | with index names along the x-axis, corresponding to Nsub (the number of substitutions per site), 34 | and cell counts along the y-axis, showing Nloci (the number of genetic sites undergoing Nsub=i substitutions along the tree). 35 | } 36 | \author{ 37 | Caitlin Collins \email{caitiecollins@gmail.com} 38 | } 39 | \keyword{data} 40 | \keyword{datasets} 41 | -------------------------------------------------------------------------------- /man/dist_0.1.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{dist_0.1} 5 | \alias{dist_0.1} 6 | \title{Nsubs per site with recombination (R = 0.1; M = 0.01).} 7 | \format{ 8 | A named vector of length 31. 9 | } 10 | \usage{ 11 | data(dist_0.1) 12 | } 13 | \description{ 14 | This vector contains a homoplasy distribution, 15 | representing the relative number of substitutions per site 16 | that occurred along a phylogenetic tree when evolution was 17 | simulated with a mutation rate of M = 0.01 and a recombination rate of R = 0.1 18 | (\emph{r/m} = 10). 19 | } 20 | \details{ 21 | A per-site mutation rate of M = 0.01 22 | indicates that each site, on average, undergoes 0.01 23 | substitutions due to mutation along the phylogenetic tree. 24 | A per-site recombination rate of R = 0.1 25 | indicates that each site, on average, undergoes 0.1 26 | substitutions per site due to within-species recombination. 27 | 28 | Each element of the vector indicates the number of genetic loci 29 | that have undergone the number of substitutions indicated by the name of that element (Nsub = i). 30 | 31 | If visualised as a bar plot (with \code{barplot(dist_0.1)}), 32 | one would see that the Nsub distribution is arranged as if it were the counts of a histogram 33 | with index names along the x-axis, corresponding to Nsub (the number of substitutions per site), 34 | and cell counts along the y-axis, showing Nloci (the number of genetic sites undergoing Nsub=i substitutions along the tree). 35 | } 36 | \author{ 37 | Caitlin Collins \email{caitiecollins@gmail.com} 38 | } 39 | \keyword{data} 40 | \keyword{datasets} 41 | -------------------------------------------------------------------------------- /man/dist_0.2.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{dist_0.2} 5 | \alias{dist_0.2} 6 | \title{Nsubs per site with considerable recombination (R = 0.2; M = 0.01).} 7 | \format{ 8 | A named vector of length 30. 9 | } 10 | \usage{ 11 | data(dist_0.2) 12 | } 13 | \description{ 14 | This vector contains a homoplasy distribution, 15 | representing the relative number of substitutions per site 16 | that occurred along a phylogenetic tree when evolution was 17 | simulated with a mutation rate of M = 0.01 and a recombination rate of R = 0.2 18 | (\emph{r/m} = 20). 19 | } 20 | \details{ 21 | A per-site mutation rate of M = 0.01 22 | indicates that each site, on average, undergoes 0.01 23 | substitutions due to mutation along the phylogenetic tree. 24 | A per-site recombination rate of R = 0.2 25 | indicates that each site, on average, undergoes 0.2 26 | substitutions per site due to within-species recombination. 27 | 28 | Each element of the vector indicates the number of genetic loci 29 | that have undergone the number of substitutions indicated by the name of that element (Nsub = i). 30 | 31 | If visualised as a bar plot (with \code{barplot(dist_0.2)}), 32 | one would see that the Nsub distribution is arranged as if it were the counts of a histogram 33 | with index names along the x-axis, corresponding to Nsub (the number of substitutions per site), 34 | and cell counts along the y-axis, showing Nloci (the number of genetic sites undergoing Nsub=i substitutions along the tree). 35 | } 36 | \author{ 37 | Caitlin Collins \email{caitiecollins@gmail.com} 38 | } 39 | \keyword{data} 40 | \keyword{datasets} 41 | -------------------------------------------------------------------------------- /man/dist_0.25.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{dist_0.25} 5 | \alias{dist_0.25} 6 | \title{Nsubs per site with considerable recombination (R = 0.25; M = 0.01).} 7 | \format{ 8 | A named vector of length 34. 9 | } 10 | \usage{ 11 | data(dist_0.25) 12 | } 13 | \description{ 14 | This vector contains a homoplasy distribution, 15 | representing the relative number of substitutions per site 16 | that occurred along a phylogenetic tree when evolution was 17 | simulated with a mutation rate of M = 0.01 and a recombination rate of R = 0.25 18 | (\emph{r/m} = 25). 19 | } 20 | \details{ 21 | A per-site mutation rate of M = 0.01 22 | indicates that each site, on average, undergoes 0.01 23 | substitutions due to mutation along the phylogenetic tree. 24 | A per-site recombination rate of R = 0.25 25 | indicates that each site, on average, undergoes 0.25 26 | substitutions per site due to within-species recombination. 27 | 28 | Each element of the vector indicates the number of genetic loci 29 | that have undergone the number of substitutions indicated by the name of that element (Nsub = i). 30 | 31 | If visualised as a bar plot (with \code{barplot(dist_0.25)}), 32 | one would see that the Nsub distribution is arranged as if it were the counts of a histogram 33 | with index names along the x-axis, corresponding to Nsub (the number of substitutions per site), 34 | and cell counts along the y-axis, showing Nloci (the number of genetic sites undergoing Nsub=i substitutions along the tree). 35 | } 36 | \author{ 37 | Caitlin Collins \email{caitiecollins@gmail.com} 38 | } 39 | \keyword{data} 40 | \keyword{datasets} 41 | -------------------------------------------------------------------------------- /man/dist_0.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{dist_0} 5 | \alias{dist_0} 6 | \title{Nsubs per site with no recombination (R = 0; M = 0.01).} 7 | \format{ 8 | A named vector of length 4. 9 | } 10 | \usage{ 11 | data(dist_0) 12 | } 13 | \description{ 14 | This vector contains a homoplasy distribution, 15 | representing the relative number of substitutions per site 16 | that occurred along a phylogenetic tree when evolution was 17 | simulated with a mutation rate of M = 0.01 and a recombination rate of R = 0 18 | (\emph{r/m} = 0). 19 | } 20 | \details{ 21 | A per-site mutation rate of M = 0.01 22 | indicates that each site, on average, undergoes 0.01 23 | substitutions due to mutation along the phylogenetic tree. 24 | A per-site recombination rate of R = 0 25 | indicates that no recombination occurred. 26 | 27 | Each element of the vector indicates the number of genetic loci 28 | that have undergone the number of substitutions indicated by the name of that element (Nsub = i). 29 | 30 | If visualised as a bar plot (with \code{barplot(dist_0)}), 31 | one would see that the Nsub distribution is arranged as if it were the counts of a histogram 32 | with index names along the x-axis, corresponding to Nsub (the number of substitutions per site), 33 | and cell counts along the y-axis, showing Nloci (the number of genetic sites undergoing Nsub=i substitutions along the tree). 34 | } 35 | \author{ 36 | Caitlin Collins \email{caitiecollins@gmail.com} 37 | } 38 | \keyword{data} 39 | \keyword{datasets} 40 | -------------------------------------------------------------------------------- /man/fwd.coalescent.sim.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fwd.coalescent.sim.R 3 | \name{fwd.coalescent.sim} 4 | \alias{fwd.coalescent.sim} 5 | \title{Short one-phrase description.} 6 | \usage{ 7 | fwd.coalescent.sim( 8 | n.ind = 100, 9 | n.snps = 10000, 10 | n.subs = 1, 11 | n.snps.assoc = 10, 12 | n.subs.assoc = 15, 13 | p = 1, 14 | heatmap = FALSE, 15 | reconstruct = FALSE, 16 | dist.dna.model = "JC69", 17 | seed = 1 18 | ) 19 | } 20 | \arguments{ 21 | \item{n.ind}{An integer specifying the number of individual genomes to simulate 22 | (ie. the number of terminal nodes in the tree).} 23 | 24 | \item{n.snps}{An integer specifying the number of genetic loci to simulate.} 25 | 26 | \item{n.subs}{Either an integer or a vector (containing a distribution) that is 27 | used to determine the number of substitutions 28 | to occur on the phylogenetic tree for each genetic locus (see details).} 29 | 30 | \item{n.snps.assoc}{An optional integer specifying the number of genetic loci} 31 | 32 | \item{heatmap}{A logical indicating whether to produce a heatmap of the genetic distance 33 | between the simulated genomes of the n.ind individuals.} 34 | 35 | \item{reconstruct}{Either a logical indicating whether to attempt to reconstruct 36 | a phylogenetic tree using the simulated genetic data, or one of c("UPGMA", "nj", "ml") 37 | to specify that tree reconstruction is desired by one of these three methods 38 | (Unweighted Pair Group Method with Arithmetic Mean, Neighbour-Joining, Maximum-Likelihood).} 39 | 40 | \item{seed}{An optional integer controlling the pseudo-random process of simulation. Two 41 | instances of coalescent.sim with the same seed and arguments will produce identical output.} 42 | 43 | \item{assoc.prob}{An optional integer (> 0, <= 100) specifying the strength of the 44 | association between the n.snps.assoc loci and the phenotype (see details).} 45 | 46 | \item{n.phen.subs}{An integer specifying the expected number of phenotypic 47 | substitutions to occur on the phylogenetic tree (through the same process as 48 | the n.subs parameter when n.subs is an integer (see details)).} 49 | 50 | \item{phen}{An optional vector containing a phenotype for each of the 51 | n.ind individuals if no phenotypic simulation is desired.} 52 | } 53 | \description{ 54 | Longer proper discription of function... 55 | } 56 | \details{ 57 | #### n.subs #### 58 | If the value of the n.subs parameter is set to an integer, this integer is 59 | used as the parameter of a Poisson distribution from which the number of substitutions to 60 | occur on the phylogenetic tree is drawn for each of the n.snps simulated genetic loci. 61 | If n.subs is a vector containing a distribution, this is used directly (in proportion to n.snps) 62 | to define the number of substitutions per site. For example, if n.subs=c(3000, 900, 70, 20, 0, 10) 63 | and n.snps=8000, then 6000 simulated sites will undergo exactly 64 | one substitution somewhere on the phylogenetic tree, 1800 will undergo two, 65 | 140 three, 40 four, 0 five, and 20 six. 66 | #### assoc.prob #### 67 | The assoc.prob parameter controls the strength of association through a process analagous to dilution. 68 | All n.snps.assoc loci are initially simulated to undergo a substitution 69 | every time the phenotype undergoes a substitution (ie. perfect association). 70 | The assoc.prob parameter then acts like a dilution factor, removing (100 - assoc.prob)% 71 | of the substitutions that occurred during simulation under perfect association. 72 | } 73 | \author{ 74 | Caitlin Collins \email{caitiecollins@gmail.com} 75 | } 76 | -------------------------------------------------------------------------------- /man/fwd.phen.sim.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fwd.phen.sim.R 3 | \name{fwd.phen.sim} 4 | \alias{fwd.phen.sim} 5 | \title{Simulate a phenotype, from root to tips.} 6 | \usage{ 7 | fwd.phen.sim(snps.assoc, p = 1, tree = NULL) 8 | } 9 | \arguments{ 10 | \item{snps.assoc}{A matrix created by the \code{fwd.snp.sim} function, 11 | which indicates where genotypic substitutions occur on the tree at phenoypically-associated sites.} 12 | 13 | \item{p}{An integer specifying the probability of phenotypic substition, 14 | given genotypic substitution (see details).} 15 | 16 | \item{tree}{An phylo object.} 17 | } 18 | \description{ 19 | [*An exploratory function:*] Having already simulated a genotype, 20 | this function allows you to simulate an associated phenotype along the tree, from root to tips. 21 | } 22 | \details{ 23 | The parameter \code{p} controls the simulation of the phenotype by specifying 24 | the expected value of the number of phenotypic substitions to occur on the tree provided, 25 | given that a genotypic substitution has occurred on a particular branch of the tree. 26 | } 27 | \examples{ 28 | 29 | ## basic use of fn 30 | tree <- coalescent.tree.sim(n.ind = 100, seed = 1) 31 | 32 | ## plot output 33 | plot(tree) 34 | 35 | } 36 | \author{ 37 | Caitlin Collins \email{caitiecollins@gmail.com} 38 | } 39 | -------------------------------------------------------------------------------- /man/fwd.snp.sim.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fwd.snp.sim.R 3 | \name{fwd.snp.sim} 4 | \alias{fwd.snp.sim} 5 | \title{Short one-phrase description.} 6 | \usage{ 7 | fwd.snp.sim( 8 | n.snps = 10000, 9 | n.subs = 1, 10 | n.snps.assoc = 0, 11 | n.subs.assoc = 15, 12 | tree = coalescent.tree.sim(100), 13 | heatmap = FALSE, 14 | reconstruct = FALSE, 15 | dist.dna.model = "JC69", 16 | seed = 1 17 | ) 18 | } 19 | \arguments{ 20 | \item{snps}{description.} 21 | } 22 | \description{ 23 | Longer proper discription of function... 24 | } 25 | \author{ 26 | Caitlin Collins \email{caitiecollins@gmail.com} 27 | } 28 | -------------------------------------------------------------------------------- /man/get.ancestral.pars.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/reconstruct.R 3 | \name{get.ancestral.pars} 4 | \alias{get.ancestral.pars} 5 | \title{Ancestral sequence reconstruction via parsimony} 6 | \usage{ 7 | get.ancestral.pars(var, tree, unique.cols = FALSE) 8 | } 9 | \arguments{ 10 | \item{var}{A matrix or vector containing a variable whose state at ancestral nodes we want to infer.} 11 | 12 | \item{tree}{A phylo object containing a phylogenetic tree whose tips contain the same individuals as are 13 | in the elements of \code{var}, if \code{var} is a vector, 14 | or in the rows of \code{var}, if \code{var} is a matrix.} 15 | } 16 | \description{ 17 | A wrapper for the \code{ancestral.pars} function from \emph{ape}. Can perform 18 | parsimonious ASR for variables in matrix or vector form. 19 | } 20 | \details{ 21 | Note that the (row)names of \code{var} should match the tip.labels of \code{tree}. 22 | } 23 | \author{ 24 | Caitlin Collins \email{caitiecollins@gmail.com} 25 | } 26 | -------------------------------------------------------------------------------- /man/get.assoc.scores.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get.sig.snps.R 3 | \name{get.assoc.scores} 4 | \alias{get.assoc.scores} 5 | \title{Get significant SNPs, according to a given test of association.} 6 | \usage{ 7 | get.assoc.scores( 8 | snps, 9 | snps.sim, 10 | phen, 11 | tree, 12 | test = "terminal", 13 | correct.prop = FALSE, 14 | categorical = FALSE, 15 | snps.reconstruction = NULL, 16 | snps.sim.reconstruction = NULL, 17 | phen.reconstruction = NULL, 18 | unique.cols = FALSE 19 | ) 20 | } 21 | \arguments{ 22 | \item{snps}{A matrix containing the real snps.} 23 | 24 | \item{snps.sim}{A matrix or list of matrices containing simulated snps.} 25 | 26 | \item{phen}{A factor or vector containing the phenotype (only allowed to contain two levels for now).} 27 | 28 | \item{tree}{A phylo object containing a phylogenetic tree in which the number of tips is equal to the 29 | length of \code{phen} and the number of rows of \code{snps} and \code{snps.sim}.} 30 | 31 | \item{test}{A character string or vector containing one or more of the following available tests of association: 32 | "terminal", "simultaneous", "subsequent", "cor", "fisher". By default, the terminal test is run 33 | (note that within treeWAS, the first three tests are run in a loop by default). 34 | See details for more information on what these tests do and when they may be appropriate.} 35 | 36 | \item{correct.prop}{A logical indicating whether the \code{"terminal"} and \code{"subsequent"} tests will be corrected for 37 | phenotypic class imbalance. Recommended if the proportion of individuals varies significantly across 38 | the levels of the phenotype (if binary) or if the phenotype is skewed (if continuous). 39 | If \code{correct.prop} is \code{FALSE} (the default), 40 | the original versions of each test will be run as described in our 41 | \href{http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1005958}{PLOS Computational Biology paper}. 42 | If \code{TRUE}, an alternate association metric (based on the phi correlation coefficient) is calculated 43 | across the terminal and all (internal and terminal) nodes, respectively.} 44 | 45 | \item{categorical}{A logical indicating whether \code{phen} should be treated as a nominal categorical variable 46 | whose unique values should be treated as levels rather than as meaningful numbers.} 47 | } 48 | \description{ 49 | Identify which SNPs are deemed to be significantly associated with a phenotype, 50 | according to a given test of association and p-value. 51 | (Serves as the treeWAS association testing function; 52 | runs the \code{assoc.test} function internally.) 53 | } 54 | \author{ 55 | Caitlin Collins \email{caitiecollins@gmail.com} 56 | } 57 | -------------------------------------------------------------------------------- /man/get.binary.snps.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{get.binary.snps} 4 | \alias{get.binary.snps} 5 | \title{Reduce a genetic data matrix to only necessary columns.} 6 | \usage{ 7 | get.binary.snps(snps, force = FALSE) 8 | } 9 | \arguments{ 10 | \item{snps}{A genetic data matrix.} 11 | } 12 | \description{ 13 | Function to reduce a genetic data matrix containing multiple columns per locus 14 | to one column for each binary locus and N columns for each N-allelic non-binary locus. 15 | } 16 | \details{ 17 | This funtion identifies the number of alleles at each locus by assuming that 18 | the allele of each column is contained in the last two characters of each column name. 19 | We recommend that the columns of \code{snps} be labelled using the following four suffixes: 20 | ".a", ".c", ".g", ".t" (e.g., "Locus_123243.a", "Locus_123243.g"). 21 | If you are using an alternative naming convention, 22 | but the allele is also always being denoted using the last two characters 23 | (e.g., "Locus_123243_1", "Locus_123243_2"), 24 | the function will still work if you set the argument \code{force = TRUE}. 25 | Please also be careful not to accidentally remove any purposeful duplications with repeated names; 26 | for example, if you have deliberately duplicated unique columns 27 | (e.g., by expanding according to an index returned by ClonalFrameML). 28 | } 29 | \author{ 30 | Caitlin Collins \email{caitiecollins@gmail.com} 31 | } 32 | -------------------------------------------------------------------------------- /man/get.fitch.n.mts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fitch.R 3 | \name{get.fitch.n.mts} 4 | \alias{get.fitch.n.mts} 5 | \title{Caclulate parsimony scores.} 6 | \usage{ 7 | get.fitch.n.mts(x, tree, snps = NULL) 8 | } 9 | \arguments{ 10 | \item{x}{A numeric matrix or vector containing two unique values with row.names matching tree tip.labels.} 11 | 12 | \item{tree}{A phylo object.} 13 | } 14 | \description{ 15 | Determine parsimony scores for all genetic loci, or a phenotypic variable, along a given tree. 16 | An extension of the fitch function available in package phangorn. 17 | } 18 | \examples{ 19 | \dontrun{ 20 | 21 | ## generate a tree 22 | tree <- ape::rtree(100) 23 | ## generate snps, a matrix of 0s and 1s 24 | snps <- matrix(sample(c(0,1),100000,TRUE), nrow=100) 25 | row.names(snps) <- tree$tip.label 26 | 27 | ## run function 28 | out <- get.fitch.n.mts(x=snps, tree) 29 | 30 | ## examine output 31 | str(out) 32 | table(out) 33 | hist(out) 34 | } 35 | 36 | } 37 | \author{ 38 | Caitlin Collins \email{caitiecollins@gmail.com} 39 | } 40 | -------------------------------------------------------------------------------- /man/get.original.loci.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/readCFML.R 3 | \name{get.original.loci} 4 | \alias{get.original.loci} 5 | \title{\code{(read.CFML+)} Get original sequence positions of polymorphic loci.} 6 | \usage{ 7 | get.original.loci( 8 | seqs, 9 | dat, 10 | sig.snps.names, 11 | n.bp = 50, 12 | suff.length = 2, 13 | csv = TRUE, 14 | csv.prefix = NULL, 15 | NA.thresh = 0.2 16 | ) 17 | } 18 | \arguments{ 19 | \item{seqs}{A \code{DNAbin} object containing the original sequences 20 | input into ClonalFrameML (see details).} 21 | 22 | \item{dat}{An object containing the output of the \code{read.CFML} function.} 23 | 24 | \item{sig.snps.names}{A character vector containing the names of 25 | polymorphic loci whose original sequence positions you desire (see details).} 26 | 27 | \item{n.bp}{An integer specifying the desired length of the flanking 28 | sequence to be returned; by default, 50 (see details).} 29 | 30 | \item{suff.length}{An integer specifying the suffix length 31 | of \code{snps} elements; by default, 2 (see details).} 32 | 33 | \item{csv}{A logical indicating whether to save the results as a CSV file.} 34 | 35 | \item{csv.prefix}{An optional character vector specifying a directory and 36 | filename prefix for the CSV file (if \code{csv=TRUE}); default name/suffix, "sig_loci.csv". 37 | \emph{Please be careful: Any existing file of that name will be overwritten!}} 38 | 39 | \item{NA.thresh}{A number between 0 and 1 indicating the max allowable 40 | proportion of NAs that the output sequence fragments can contain. 41 | (if a sequence fragment from row 1 exceeds this threshold, 42 | a sufficiently complete sequence fragment will be sought in subsequent rows); by default, 0.2.} 43 | } 44 | \value{ 45 | \code{get.original.loci} returns a list containing: 46 | \enumerate{ 47 | \item \code{loci}: The original sequence positions for all polymorphic loci in \code{seqs}. 48 | \item \code{loci.sig}: The original sequence positions for all polymorphic loci in \code{sig.snps.names}. 49 | \item \code{seq.sig}: A list of length \code{sig.snps.names} containing sequence fragments of length \code{n.bp}. 50 | } 51 | } 52 | \description{ 53 | If you ran \code{read.CFML} on ClonalFrameML output before running \code{treeWAS}, 54 | this function can be used to identify the original sequence positions of your polymorphic loci. 55 | E.g., If \code{treeWAS} identified loci "1417.a" and "2017.g" as significant, \code{get.original.loci} 56 | can identify corresponding sequence positions "1165743" and "1741392" and return 57 | flanking sequence segments. 58 | } 59 | \details{ 60 | \strong{seqs} must contain ClonalFrameML \emph{input*}, 61 | which can be read in from fasta with \code{read.dna("FILENAME.fasta", format="fasta")} 62 | (*not the ClonalFrameML output file "ML_sequence.fasta" or the \code{seqs} element of \code{read.CFML} output).\cr\cr 63 | \strong{sig.snps.names} can contain any set of \code{colnames(snps)}, for example, 64 | the set of significant loci identified by \code{treeWAS} (\code{out$treeWAS.combined$treeWAS.combined}).\cr\cr 65 | \strong{n.bp} specifies the total length of flanking sequence 66 | (drawn from the first row of \code{seqs} only), 67 | half of which will be on either side of each locus in \code{sig.snps.names}. 68 | Each such sequence will be of total length \code{n.bp+1}, arranged (e.g., with \code{n.bp = 50}) as:\cr 69 | <---25bp---><---25bp--->.\cr\cr 70 | \strong{suff.length} tells the \code{removeLastN} function how many characters are used to specify 71 | the allele in \code{sig.snps.names} and \code{colnames(snps)}. For names of the form: 72 | "1234.a", \code{suff.length = 2} (note that the decimal counts as a character). 73 | If \code{snps} names are purely numeric with no alleles indicated 74 | (i.e., they already match names in \code{seqs}), then set \code{suff.length = 0}. 75 | } 76 | \examples{ 77 | ## Example ## 78 | \dontrun{ 79 | fasta <- "./filename.fas" 80 | prefix <- "/filename.fas.out" 81 | 82 | ## read in original fasta sequence: 83 | seqs <- read.dna(fasta, format="fasta") 84 | 85 | ## load saved read.CFML output 86 | dat <- get(load(sprintf('\%s.read.CFML_dat.Rdata', prefix))) 87 | 88 | ## get sig snps from treeWAS results 89 | sig.snps.names <- out$treeWAS.combined$treeWAS.combined 90 | 91 | out <- get.original.loci(seqs, dat, sig.snps.names, n.bp=40, csv=T, csv.prefix="/filename") 92 | } 93 | 94 | } 95 | \author{ 96 | Caitlin Collins \email{caitiecollins@gmail.com} 97 | } 98 | -------------------------------------------------------------------------------- /man/get.score3.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/subsequent.test.R 3 | \name{get.score3} 4 | \alias{get.score3} 5 | \title{Short one-phrase description.} 6 | \usage{ 7 | get.score3(Pa, Pd, Sa, Sd, l = NULL) 8 | } 9 | \arguments{ 10 | \item{Pa}{A numeric value containing either the state, 11 | or the probability of the state, of the phenotype at a given \emph{ancestral} node.} 12 | 13 | \item{Pd}{A numeric value containing either the state, 14 | or the probability of the state, of the phenotype at a given \emph{descendant} node.} 15 | 16 | \item{Sa}{A numeric value containing either the state, 17 | or the probability of the state, of SNPi at a given \emph{ancestral} node.} 18 | 19 | \item{Sd}{A numeric value containing either the state, 20 | or the probability of the state, of SNPi at a given \emph{descendant} node.} 21 | 22 | \item{l}{A numeric value specifying the length of the branch in the phylogenetic tree 23 | that joins the ancestral and descendant node.} 24 | } 25 | \description{ 26 | Longer proper discription of function... 27 | } 28 | \examples{ 29 | ## Example ## 30 | \dontrun{ 31 | ## basic use of fn 32 | tree <- coalescent.tree.sim(n.ind = 100, seed = 1) 33 | } 34 | } 35 | \author{ 36 | Caitlin Collins \email{caitiecollins@gmail.com} 37 | } 38 | -------------------------------------------------------------------------------- /man/get.sig.snps.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get.sig.snps.R 3 | \name{get.sig.snps} 4 | \alias{get.sig.snps} 5 | \title{Get significant SNPs, according to a given test of association.} 6 | \usage{ 7 | get.sig.snps( 8 | corr.dat, 9 | corr.sim, 10 | snps.names, 11 | test = "terminal", 12 | p.value = 0.01, 13 | p.value.correct = "bonf", 14 | p.value.by = "count" 15 | ) 16 | } 17 | \arguments{ 18 | \item{corr.dat}{A vector containing the association score values, for a given association test, for the real data.} 19 | 20 | \item{corr.sim}{A vector containing the association score values, for a given association test, for the simulated data.} 21 | 22 | \item{snps.names}{The column names of the original \code{snps} matrix from which the association score values 23 | in \code{corr.dat} were derived.} 24 | 25 | \item{test}{A character string or vector containing one or more of the following available tests of association: 26 | "terminal", "simultaneous", "subsequent", "cor", "fisher". By default, the terminal test is run 27 | (note that within treeWAS, the first three tests are run in a loop by default). 28 | See details for more information on what these tests do and when they may be appropriate.} 29 | 30 | \item{p.value}{A single number specifying the p.value below which correlations are deemed to be 'significant'.} 31 | 32 | \item{p.value.correct}{Specify if/how to correct for multiple testing: 33 | either FALSE, or one of 'bonf' or 'fdr' (indicating, respectively, 34 | the Bonferroni and False Discovery Rate corrections). By default, 'bonf' is selected} 35 | 36 | \item{p.value.by}{Specify how to determine the location of the p.value threshold: 37 | either 'count' or 'density' (indicating, respectively, that the p.value threshold should 38 | be determined by exact count or with the use of a density function).} 39 | } 40 | \description{ 41 | Identify which SNPs are deemed to be significantly associated with a phenotype, 42 | according to a given test of association and p-value. 43 | (Serves as the treeWAS association testing function; 44 | runs the \code{assoc.test} function internally.) 45 | } 46 | \author{ 47 | Caitlin Collins \email{caitiecollins@gmail.com} 48 | } 49 | -------------------------------------------------------------------------------- /man/get.tip.order.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{get.tip.order} 4 | \alias{get.tip.order} 5 | \title{Get the order of the tip labels of a phylogenetic tree as plotted.} 6 | \usage{ 7 | get.tip.order(tree, original.format = TRUE) 8 | } 9 | \arguments{ 10 | \item{tree}{An object of class phylo containing a tree 11 | whose tip order is desired to be known.} 12 | 13 | \item{original.format}{A logical, indicating whether to use the original 14 | format of this function (kept for consistency's sake) or the new format. 15 | (For now, if you find one isn't giving you sensible output, 16 | please try changing this argument.)} 17 | } 18 | \description{ 19 | Longer proper discription of function... 20 | } 21 | \author{ 22 | Caitlin Collins \email{caitiecollins@gmail.com} 23 | } 24 | -------------------------------------------------------------------------------- /man/get.unique.matrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{get.unique.matrix} 4 | \alias{get.unique.matrix} 5 | \title{Get unique rows/columns of a matrix with an index vector.} 6 | \usage{ 7 | get.unique.matrix(data, MARGIN = 2, silent = TRUE) 8 | } 9 | \arguments{ 10 | \item{data}{A matrix or data.frame, potentially containing 11 | non-unique patterns in its rows or columns.} 12 | 13 | \item{MARGIN}{A single integer specifying the array margin to be held fixed. 14 | (To get unique \emph{rows}, select \code{MARGIN} = 1; 15 | for unique \emph{columns}, select \code{MARGIN} = 2.)} 16 | } 17 | \value{ 18 | A list with the following elements: 19 | \itemize{ 20 | \item{\code{index} \item{An index vector containing the indices (row numbers), 21 | in a matrix composed only of unique rows, 22 | to which each row in the original matrix maps.}} 23 | \item{\code{unique.data} \item{A new matrix 24 | containing only the unique rows of the input matrix.}} 25 | } 26 | } 27 | \description{ 28 | A wrapper for the \code{table.matrix} function that assigns consecutive 29 | row or column names to the output matrix's unique rows or columns. 30 | } 31 | \details{ 32 | An extension of the base \code{unique.matrix} function, 33 | \code{get.unique.matrix} returns a unique matrix 34 | (by removing duplicate rows or columns), as well as 35 | an index vector that maps each row/column in the original matrix 36 | to the corresponding unique row or column in the deduplicated unique matrix. 37 | } 38 | \author{ 39 | Caitlin Collins \email{caitiecollins@gmail.com} 40 | } 41 | -------------------------------------------------------------------------------- /man/ggplotbg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{ggplotbg} 4 | \alias{ggplotbg} 5 | \title{Mimic ggplot2 Background} 6 | \usage{ 7 | ggplotbg( 8 | bg = transp("lightgray", 0.5), 9 | x.ax = FALSE, 10 | y.ax = FALSE, 11 | box = TRUE, 12 | grid = TRUE, 13 | grid.col = "white", 14 | grid.nx = NULL, 15 | grid.ny = NULL, 16 | grid.lwd = 1, 17 | grid.lty = 1 18 | ) 19 | } 20 | \arguments{ 21 | \item{bg}{The background colour, by default ``lightgray'' with 50\% transparency.} 22 | 23 | \item{x.ax}{A logical specifying whether to re-draw the x-axis.} 24 | 25 | \item{y.ax}{A logical specifying whether to re-draw the y-axis.} 26 | 27 | \item{box}{A logical specifying whether to draw a box around the plotting area.} 28 | 29 | \item{grid}{A logical specifying whether to draw a grid across the background within the plotting area.} 30 | 31 | \item{grid.col}{The color of the gridlines, ``white'' by default. Only used if grid is set to TRUE.} 32 | 33 | \item{grid.nx}{An optional integer to specify the number of gridlines to be drawn along the x-axis.} 34 | 35 | \item{grid.ny}{An optional integer to specify the number of gridlines to be drawn along the y-axis.} 36 | 37 | \item{grid.lwd}{An integer specifying the lwd (line weight) of the gridlines; by default, set to 1.} 38 | 39 | \item{grid.lty}{An integer specifying the line type to be used for the gridlines; by default, set to 1 (i.e., solid lines).} 40 | } 41 | \description{ 42 | Get an imitation ggplot2-style background for plots made outside ggplot2 43 | } 44 | \details{ 45 | This function must be sandwiched between two instances 46 | of the function used to generate the (foreground) plot 47 | to which you are hoping to add this background. 48 | \emph{Before} running the \code{ggplot.bg} function, you need to run your plot function 49 | so that \code{ggplot.bg} knnows how to set the axes. 50 | \emph{After} running the \code{ggplot.bg} function, you need to run your plot function 51 | again \emph{with the added argument} \code{add=TRUE} 52 | so that your plot can be overlayed on top of the background. 53 | } 54 | \author{ 55 | Caitlin Collins \email{caitiecollins@gmail.com} 56 | } 57 | -------------------------------------------------------------------------------- /man/heatmap.DNAbin.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/heatmap.DNAbin.R 3 | \name{heatmap.DNAbin} 4 | \alias{heatmap.DNAbin} 5 | \title{Short one-phrase description.} 6 | \usage{ 7 | heatmap.DNAbin(dna, dist.dna.model = "JC69") 8 | } 9 | \arguments{ 10 | \item{dna}{A DNAbin object.} 11 | 12 | \item{dist.dna.model}{A character string specifying the type of model to use in 13 | calculating the genetic distance between individual genomes (see ?dist.dna).} 14 | } 15 | \description{ 16 | Longer proper discription of function... 17 | } 18 | \author{ 19 | Caitlin Collins \email{caitiecollins@gmail.com} 20 | } 21 | -------------------------------------------------------------------------------- /man/keepFirstN.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{keepFirstN} 4 | \alias{keepFirstN} 5 | \title{Truncate to keep only the \emph{first} N characters.} 6 | \usage{ 7 | keepFirstN(x, n) 8 | } 9 | \arguments{ 10 | \item{x}{A vector whose element(s) will be truncated.} 11 | 12 | \item{n}{An integer specifying the number of characters to \emph{keep}.} 13 | } 14 | \description{ 15 | Truncate an element, or each element of a vector, by 16 | removing all but the first N characters of each element. 17 | } 18 | \author{ 19 | Caitlin Collins \email{caitiecollins@gmail.com} 20 | } 21 | -------------------------------------------------------------------------------- /man/keepLastN.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{keepLastN} 4 | \alias{keepLastN} 5 | \title{Truncate to keep only the \emph{last} N characters.} 6 | \usage{ 7 | keepLastN(x, n) 8 | } 9 | \arguments{ 10 | \item{x}{A vector whose element(s) will be truncated.} 11 | 12 | \item{n}{An integer specifying the number of characters to \emph{keep}.} 13 | } 14 | \description{ 15 | Truncate an element, or each element of a vector, by 16 | removing all but the last N characters of each element. 17 | } 18 | \author{ 19 | Caitlin Collins \email{caitiecollins@gmail.com} 20 | } 21 | -------------------------------------------------------------------------------- /man/manhattan.plot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot.sig.snps.R 3 | \name{manhattan.plot} 4 | \alias{manhattan.plot} 5 | \title{Manhattan Plot} 6 | \usage{ 7 | manhattan.plot( 8 | p.vals, 9 | x = c(1:length(p.vals)), 10 | col = "funky", 11 | transp = 0.25, 12 | sig.thresh = NULL, 13 | thresh.col = "red", 14 | snps.assoc = NULL, 15 | snps.assoc.col = "red", 16 | jitter.amount = 1e-05, 17 | min.p = NULL, 18 | log10 = FALSE, 19 | ylab = NULL, 20 | main.title = "Manhattan plot" 21 | ) 22 | } 23 | \arguments{ 24 | \item{p.vals}{A numeric vector containing p-values or association score values for each genetic locus.} 25 | } 26 | \description{ 27 | Generate a Manhattan plot showing the association score values or p-values (y-axis) 28 | for each locus (x-axis) tested by an association test. 29 | } 30 | \author{ 31 | Caitlin Collins \email{caitiecollins@gmail.com} 32 | } 33 | -------------------------------------------------------------------------------- /man/memfree.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{memfree} 4 | \alias{memfree} 5 | \title{Get the current amount of available memory.} 6 | \usage{ 7 | memfree(OS = NULL) 8 | } 9 | \arguments{ 10 | \item{OS}{A character string indicating the operating system of the machine in question. 11 | Can be one of "Windows", "Mac" (or "Darwin"), or "Linux". If OS is NULL (the default), 12 | OS will be set to Sys.info()["sysname"].} 13 | } 14 | \description{ 15 | Function to determine how much memory (in GB) is currently available for use 16 | on your PC. 17 | } 18 | \author{ 19 | Caitlin Collins \email{caitiecollins@gmail.com} 20 | } 21 | -------------------------------------------------------------------------------- /man/pair.tests.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pair.tests.R 3 | \name{pair.tests} 4 | \alias{pair.tests} 5 | \title{Pairwise tests for categorical phenotypes} 6 | \usage{ 7 | pair.tests(x, y, z, method = "bonf", digits = 3) 8 | } 9 | \arguments{ 10 | \item{x}{A contingency table (snps[,i] x phen) for score 1 (\code{terminal.test} 11 | with \code{correct.prop = TRUE}, \code{categorical = TRUE}).} 12 | 13 | \item{y}{A vector of values containing pairwise score 2 (\code{simultaneous.test} 14 | with \code{categorical = TRUE}) results for snps[,i].} 15 | 16 | \item{z}{A contingency table (snps.rec[,i] x phen.rec) for score 3 (\code{subsequent.test} 17 | with \code{correct.prop = TRUE}, \code{categorical = TRUE}).} 18 | } 19 | \description{ 20 | Internal function to calculate treeWAS 21 | terminal, simultaneous, subsequent tests, 22 | and chi-squared p-values for a given snp across pairs of 23 | phenotype levels. 24 | } 25 | \examples{ 26 | ## Example ## 27 | \dontrun{ 28 | ## basic use of fn 29 | out <- pair.tests(x, y, z) 30 | } 31 | 32 | } 33 | \author{ 34 | Caitlin Collins \email{caitiecollins@gmail.com} 35 | } 36 | -------------------------------------------------------------------------------- /man/phen.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{phen} 5 | \alias{phen} 6 | \title{A binary phenotype.} 7 | \format{ 8 | A named vector of length 100. 9 | } 10 | \usage{ 11 | data(phen) 12 | } 13 | \description{ 14 | This vector specifies the phenotype of each individual. 15 | In this case, the phenotype is a binary variable. 16 | Because the phenotypic vector is encoded as a factor 17 | with two possible phenotypic states, "A" and "B", 18 | which may be represented by the numeric values 1 and 2 (as in \code{str(phen)}). 19 | } 20 | \details{ 21 | Each individual in the sample is represented by a unique identifier (name) 22 | which corresponds to the name of one element of the phenotypic vector. 23 | Each element of the phenotypic vector gives the phenotypic value of the named individual. 24 | } 25 | \author{ 26 | Caitlin Collins \email{caitiecollins@gmail.com} 27 | } 28 | \keyword{data} 29 | \keyword{datasets} 30 | -------------------------------------------------------------------------------- /man/phen.cont.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{phen.cont} 5 | \alias{phen.cont} 6 | \title{A continuous phenotype.} 7 | \format{ 8 | A named vector of length 533. 9 | } 10 | \usage{ 11 | data(phen.cont) 12 | } 13 | \description{ 14 | This vector specifies the phenotype of each individual. 15 | In this case, the phenotype is a continuous numeric value. 16 | } 17 | \details{ 18 | Each individual in the sample is represented by a unique identifier (name) 19 | which corresponds to the name of one element of the phenotypic vector. 20 | Each element of the phenotypic vector gives the phenotypic value of the named individual. 21 | 22 | Note that, due to some skew in the distribution of this continuous variable, 23 | it may be useful to transform the phenotype by rank prior to analysis by treeWAS, 24 | as in \code{data(phen.cont.rank)} (see the treeWAS vignette). 25 | % (see \code{vignette("treeWAS")}). 26 | } 27 | \author{ 28 | Caitlin Collins \email{caitiecollins@gmail.com} 29 | } 30 | \keyword{data} 31 | \keyword{datasets} 32 | -------------------------------------------------------------------------------- /man/phen.cont.rank.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{phen.cont.rank} 5 | \alias{phen.cont.rank} 6 | \title{A rank-transformed continuous phenotype.} 7 | \format{ 8 | A named vector of length 533. 9 | } 10 | \usage{ 11 | data(phen.cont.rank) 12 | } 13 | \description{ 14 | This vector specifies the phenotype of each individual. 15 | In this case, the phenotype is a rank, that has been derived by 16 | rank-ordering the elements of the original continuous phenotype (\code{data(phen.cont)}) 17 | from lowest to highest. 18 | Transforming by rank prior to analysis by treeWAS can be useful 19 | for continuous phenotypic variables that are highly skewed or contain significant outliers 20 | (see the treeWAS vignette). 21 | % (see \code{vignette("treeWAS")}). 22 | } 23 | \details{ 24 | Each individual in the sample is represented by a unique identifier (name) 25 | which corresponds to the name of one element of the phenotypic vector. 26 | Each element of the phenotypic vector gives the phenotypic value of the named individual. 27 | } 28 | \author{ 29 | Caitlin Collins \email{caitiecollins@gmail.com} 30 | } 31 | \keyword{data} 32 | \keyword{datasets} 33 | -------------------------------------------------------------------------------- /man/phen.plot.col.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{phen.plot.col} 5 | \alias{phen.plot.col} 6 | \title{Phenotypic tree-colouring schemes.} 7 | \format{ 8 | A list of length 5. 9 | } 10 | \usage{ 11 | data(phen.plot.col) 12 | } 13 | \description{ 14 | A list containing the colour values that \code{plot_phen} generates to represent 15 | the states and substitutions of the phenotypic variable (\code{data(phen)}) 16 | along the phylogenetic tree (\code{data(tree)}), with \code{plot_phen(tree, phen.nodes=phen)}. 17 | You are unlikely to have to interact with this list, 18 | as the colours are automatically plotted by the \code{plot_phen} function. 19 | } 20 | \details{ 21 | The five elements of this list give the colour schemes used to indicate the phenotypic state at: 22 | edge.labels, edges, all.nodes, internal.nodes, and tip.labels. 23 | } 24 | \author{ 25 | Caitlin Collins \email{caitiecollins@gmail.com} 26 | } 27 | \keyword{data} 28 | \keyword{datasets} 29 | -------------------------------------------------------------------------------- /man/phen.reconstruction.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{phen.reconstruction} 5 | \alias{phen.reconstruction} 6 | \title{The ancestral state reconstruction of a binary phenotype.} 7 | \format{ 8 | A named vector of length 199. 9 | } 10 | \usage{ 11 | data(phen.reconstruction) 12 | } 13 | \description{ 14 | This vector contains the terminal and ancestral states of a binary phenotypic variable (\code{data(phen)}). 15 | The observed phenotypic states of sampled individuals 16 | (i.e., those represented at the terminal nodes of a phylogenetic tree) 17 | are presented first, in elements 1:N (here 1:100). 18 | The unobserved ancestral states of the phenotype at internal nodes have been 19 | inferred via ancestral state reconstruction, using \code{asr(phen, tree)}. 20 | } 21 | \details{ 22 | Like the original phenotypic vector (\code{data(phen)}), 23 | \code{phen.reconstruction} is a binary variable that is encoded as a factor 24 | with two possible phenotypic states, "A" and "B", 25 | which may be represented by the numeric values 1 and 2 (as in \code{str(phen.reconstruction)}). 26 | 27 | Each individual in the sample is represented by a unique identifier (name) 28 | which corresponds to the name of one element of the phenotypic vector. 29 | (Internal node names have been generated during ancestral state reconstruction.) 30 | Each element of the phenotypic vector gives the phenotypic value of the named individual. 31 | } 32 | \author{ 33 | Caitlin Collins \email{caitiecollins@gmail.com} 34 | } 35 | \keyword{data} 36 | \keyword{datasets} 37 | -------------------------------------------------------------------------------- /man/phen.sim.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/phen.sim.R 3 | \name{phen.sim} 4 | \alias{phen.sim} 5 | \title{Short one-phrase description.} 6 | \usage{ 7 | phen.sim(tree, n.subs = 15, grp.min = 0.2, n.subs.var = TRUE, seed = NULL) 8 | } 9 | \arguments{ 10 | \item{tree}{An phylo object.} 11 | 12 | \item{n.subs}{An integer controlling the phenotypic substition rate (see details).} 13 | 14 | \item{grp.min}{An optional numeric value < 0.5 specifying the minimum accepted proportion of terminal nodes 15 | to be in the minor phenotypic group. It may be useful to specify a \code{grp.min} of, 16 | for example, 0.2 (the default) to prevent excessive imbalance in the phenotypic group sizes. However, 17 | it is important to note that (at least for the time being) \code{grp.min} values closer to 18 | 0.5 are likely to cause the computational time of \code{phen.sim} to increase substantially, 19 | as the function will run until acceptable group sizes are randomly generated.} 20 | 21 | \item{seed}{An optional integer used to set the seed and control the pseudo-random process used in 22 | \code{phen.sim}, enabling the repeatable regeneration of identical output.} 23 | } 24 | \description{ 25 | The parameter n.subs controls the simulation of the phenotype by specifying 26 | the expected value of the number of phenotypic substitions to occur on the tree provided. 27 | The true number of phenotypic substitions is drawn from a Poisson distribution with parameter n.subs. 28 | } 29 | \details{ 30 | Longer proper discription of function... 31 | } 32 | \examples{ 33 | 34 | ## basic use of fn 35 | tree <- coalescent.tree.sim(n.ind = 100, seed = 1) 36 | 37 | ## plot output 38 | plot(tree) 39 | 40 | } 41 | \author{ 42 | Caitlin Collins \email{caitiecollins@gmail.com} 43 | } 44 | -------------------------------------------------------------------------------- /man/plot_phen.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot.phen.R 3 | \name{plot_phen} 4 | \alias{plot_phen} 5 | \title{Plot the states of a phenotype or genotype along a phylogenetic tree.} 6 | \usage{ 7 | plot_phen( 8 | tree, 9 | phen.nodes, 10 | snp.nodes = NULL, 11 | plot = TRUE, 12 | RTL = FALSE, 13 | LTR.snp = FALSE, 14 | main.title = NULL, 15 | align.tip.label = FALSE, 16 | show.axis = TRUE, 17 | ... 18 | ) 19 | } 20 | \arguments{ 21 | \item{tree}{A phylo object.} 22 | 23 | \item{phen.nodes}{A vector containing the phenotypic state of either 24 | (i) only terminal nodes in tree or 25 | (ii) all nodes, terminal and internal in tree.} 26 | 27 | \item{snp.nodes}{An optional vector containing the states of 28 | a second variable (e.g., a genotypic variable) for either 29 | the terminal nodes or all nodes in the tree.} 30 | 31 | \item{plot}{A logical specifying whether to display a plot 32 | of the inputted phylogenetic tree with edges coloured to show the 33 | simulated phenotypic substitution process.} 34 | 35 | \item{RTL}{A logical variable indicating whether to plot the 36 | first or only tree from right to left (TRUE), 37 | or left to right (FALSE, the default).} 38 | 39 | \item{LTR.snp}{A logical variable indicating whether to plot the 40 | optional second tree from left to right (TRUE), 41 | or right to left (FALSE, the default).} 42 | 43 | \item{main.title}{Either NULL or a character vector specifying a main title for the plot.} 44 | 45 | \item{align.tip.label}{A logical indicating whether to align tip labels with each other (TRUE) or 46 | to place tip labels at terminal nodes (FALSE, the default).} 47 | 48 | \item{show.axis}{A logical indicating whether to add an axis showing the scale of branch lengths 49 | at the foot of the plot with \code{axisPhylo} (TRUE, the default) or not (FALSE).} 50 | } 51 | \description{ 52 | This function is designed to visualise the reconstructed ancestral states of a variable along a phylogenetic tree. 53 | It uses colour to represent the states of terminal and internal nodes (if available), 54 | indicating changes between states by grey branches (except in the case of truly continuous variables). 55 | } 56 | \details{ 57 | Ancestral states must be inferred in advance, for example, using function \code{asr}. 58 | States are then shown in the colour of terminal node labels and the colour of the edges of the tree. 59 | If only terminal states are available, these can be plotted along the tips of the tree. 60 | If desired, a second variable, for example, a particular SNP or genetic locus, can be shown along 61 | a second phylogeny. In this case, the second variable will be shown on a toplogically identical tree, 62 | which will be plotted from right to left, mirroring the first tree along the vertical axis of the plotting window. 63 | The \code{RTL} and \code{LTR.snp} arguments can be used to change the 64 | orientation/direction of the first and/or second tree. 65 | } 66 | \examples{ 67 | 68 | ## Example 1 ## 69 | \dontrun{ 70 | ## load phylogenetic and phenotypic data: 71 | data(tree) 72 | data(phen) 73 | 74 | ## reconstruct phenotypic ancestral states: 75 | phen.rec <- asr(var=phen, tree=tree, type="parsimony", method="discrete") 76 | 77 | ## plot phenotype along tree: 78 | plot_phen(tree, phen.nodes=phen.rec) 79 | } 80 | 81 | 82 | ## Example 2 ## 83 | \dontrun{ 84 | ## load phylogenetic and phenotypic data: 85 | data(tree) 86 | data(phen) 87 | 88 | ## load genotypic data: 89 | data(snps) 90 | 91 | ## reconstruct phenotypic ancestral states: 92 | phen.rec <- asr(var=phen, tree=tree, type="parsimony", method="discrete") 93 | 94 | ## reconstruct genotypic ancestral states: 95 | snps.rec <- asr(var=snps, tree=tree, type="parsimony", method="discrete") 96 | 97 | ## plot both the phenotype and a genotype along tree: 98 | plot_phen(tree, phen.nodes=phen.rec, snp.nodes=snps.rec[,1]) 99 | } 100 | 101 | } 102 | \author{ 103 | Caitlin Collins \email{caitiecollins@gmail.com} 104 | } 105 | -------------------------------------------------------------------------------- /man/plot_prob_phen.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fwd.plot.prob.phen.R 3 | \name{plot_prob_phen} 4 | \alias{plot_prob_phen} 5 | \title{Plot the probability of association, given \code{p} and \code{n.snps.assoc}.} 6 | \usage{ 7 | plot_prob_phen(p = 0.5, n.snps.assoc = 10) 8 | } 9 | \arguments{ 10 | \item{p}{A numeric value indicating the probability of substitution, at each site, along the tree.} 11 | 12 | \item{n.snps.assoc}{An integer specifying the number of genetic loci that are associated with the phenotype.} 13 | } 14 | \description{ 15 | [*For use with the 'fwd.-.sim' functions:*] 16 | Plot the cumulative probability of association (Pr(phen=1)), with a given value of \code{p}, 17 | as the number of associated sites (SNPi=1) increases from i=0 to i=\code{n.snps.assoc}. 18 | } 19 | \examples{ 20 | \dontrun{ 21 | ## basic use of fn ## 22 | ## compare probability of having phenotype with 10 SNPs at varying p: 23 | plot_prob_phen(p=0.8, n.snps.assoc=10) 24 | plot_prob_phen(p=0.5, n.snps.assoc=10) 25 | plot_prob_phen(p=0.2, n.snps.assoc=10) 26 | } 27 | } 28 | \author{ 29 | Caitlin Collins \email{caitiecollins@gmail.com} 30 | } 31 | -------------------------------------------------------------------------------- /man/plot_sig_snps.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot.sig.snps.R 3 | \name{plot_sig_snps} 4 | \alias{plot_sig_snps} 5 | \title{Plot null distribution and significant sites.} 6 | \usage{ 7 | plot_sig_snps( 8 | corr.dat, 9 | corr.sim, 10 | corr.sim.subset = NULL, 11 | sig.corrs = NULL, 12 | sig.snps = NULL, 13 | sig.thresh = NULL, 14 | test = NULL, 15 | sig.snps.col = "blue", 16 | hist.col = rgb(0, 0, 1, 0.5), 17 | hist.subset.col = rgb(1, 0, 0, 0.5), 18 | thresh.col = "seasun", 19 | snps.assoc = NULL, 20 | snps.assoc.col = "red", 21 | bg = "lightgray", 22 | grid = TRUE, 23 | freq = FALSE, 24 | plot.null.dist = TRUE, 25 | plot.dist = FALSE, 26 | main.title = TRUE, 27 | ... 28 | ) 29 | } 30 | \arguments{ 31 | \item{arg}{Description.} 32 | } 33 | \description{ 34 | Plot a histogram of the null distribution, 35 | indicating the significance threshold and 36 | the names and association scores of significant sites. 37 | } 38 | \author{ 39 | Caitlin Collins \email{caitiecollins@gmail.com} 40 | } 41 | -------------------------------------------------------------------------------- /man/print.treeWAS.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/treeWAS.R 3 | \name{print.treeWAS} 4 | \alias{print.treeWAS} 5 | \title{Print \code{treeWAS} output.} 6 | \usage{ 7 | \method{print}{treeWAS}(x, sort.by.p = FALSE, digits = 3) 8 | } 9 | \arguments{ 10 | \item{x}{The output returned by \code{treeWAS}.} 11 | 12 | \item{sort.by.p}{A logical indicating whether to sort the results by decreasing p-value (\code{TRUE}) 13 | or by locus (\code{FALSE}, the default).} 14 | } 15 | \description{ 16 | Print the results of \code{treeWAS}, excluding longer data elements within the output. 17 | } 18 | \author{ 19 | Caitlin Collins \email{caitiecollins@gmail.com} 20 | } 21 | -------------------------------------------------------------------------------- /man/read.CFML.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/readCFML.R 3 | \name{read.CFML} 4 | \alias{read.CFML} 5 | \title{Convert ClonalFrameML output.} 6 | \usage{ 7 | read.CFML(prefix, tree = NULL, plot = TRUE, suff.length = 2) 8 | } 9 | \arguments{ 10 | \item{prefix}{A character string containing the prefix of all file names to be read in.} 11 | } 12 | \value{ 13 | read.CFML returns a list containing: 14 | (i) \code{tree}: The phylogenetic tree. 15 | (ii) \code{snps}: The binary genetic data matrix of polymorphic loci. 16 | (iii) \code{snps.rec}: The genetic data reconstruction matrix. 17 | (iv) \code{seqs}: The genetic data sequences (polymorphic loci only), a \code{DNAbin} object. 18 | (v) \code{index}: The index vector, indicating for each column in \code{seqs} 19 | the unique polymorphic column pattern to which it corresponds (0 = non-polymorphic). 20 | (vi) \code{n.subs}: The distribution of the number of substitutions per site. 21 | Note that all genetic data elements (ii - iv) are returned in expanded form; that is, 22 | they contain both unique and duplicate column patterns for all polymorphic loci as indicated in the \code{index} vector. 23 | } 24 | \description{ 25 | Convert the output of ClonalFrameML into a form usable within \code{treeWAS}. 26 | } 27 | \details{ 28 | The \code{prefix} must be the prefix to three files ending in: 29 | (i) "labelled_tree.newick", (ii) "ML_sequence.fasta", (iii) "position_cross_reference.txt". 30 | } 31 | \examples{ 32 | ## Example ## 33 | \dontrun{ 34 | ## basic use of fn 35 | out <- read.CFML(prefix="./filename_") 36 | } 37 | 38 | } 39 | \author{ 40 | Caitlin Collins \email{caitiecollins@gmail.com} 41 | } 42 | -------------------------------------------------------------------------------- /man/removeFirstN.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{removeFirstN} 4 | \alias{removeFirstN} 5 | \title{Truncate to remove all of the \emph{first} N characters.} 6 | \usage{ 7 | removeFirstN(x, n) 8 | } 9 | \arguments{ 10 | \item{x}{A vector whose element(s) will be truncated.} 11 | 12 | \item{n}{An integer specifying the number of characters to \emph{remove}.} 13 | } 14 | \description{ 15 | Truncate an element, or each element of a vector, by 16 | removing the first N characters of each element. 17 | } 18 | \author{ 19 | Caitlin Collins \email{caitiecollins@gmail.com} 20 | } 21 | -------------------------------------------------------------------------------- /man/removeLastN.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{removeLastN} 4 | \alias{removeLastN} 5 | \title{Truncate to remove all of the \emph{last} N characters.} 6 | \usage{ 7 | removeLastN(x, n) 8 | } 9 | \arguments{ 10 | \item{x}{A vector whose element(s) will be truncated.} 11 | 12 | \item{n}{An integer specifying the number of characters to \emph{remove}.} 13 | } 14 | \description{ 15 | Truncate an element, or each element of a vector, by 16 | removing the last N characters of each element. 17 | } 18 | \author{ 19 | Caitlin Collins \email{caitiecollins@gmail.com} 20 | } 21 | -------------------------------------------------------------------------------- /man/selectBiallelicSNP.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{selectBiallelicSNP} 4 | \alias{selectBiallelicSNP} 5 | \title{Short one-phrase description.} 6 | \usage{ 7 | selectBiallelicSNP(x, DNA = TRUE) 8 | } 9 | \arguments{ 10 | \item{x}{A character vector of length 1 containing a nucleotide to be converted.} 11 | 12 | \item{DNA}{logical; if TRUE (default), uses DNA bases (ACGT), if FALSE, uses RNA bases (ACGU).} 13 | } 14 | \description{ 15 | Longer proper discription of function... 16 | } 17 | \author{ 18 | Caitlin Collins \email{caitiecollins@gmail.com} 19 | } 20 | -------------------------------------------------------------------------------- /man/set.args.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{set.args} 4 | \alias{set.args} 5 | \title{Set a list of arguments.} 6 | \usage{ 7 | set.args(args, envir = sys.frame(which = 0L)) 8 | } 9 | \arguments{ 10 | \item{args}{A named list of arguments.} 11 | 12 | \item{envir}{The environment in which these arguments will set.} 13 | } 14 | \description{ 15 | Function to set a list of arguments without having to remove commas. 16 | Useful for troubleshooting. For example, if attempting to run a function 17 | (particualrly one with many arguments) line by line, 18 | \code{set.args} can be used to set a list of arguments in one go, by copying a 19 | comma-separated set of arguments from an existing function call or a new call to \code{args(fn)}. 20 | } 21 | \details{ 22 | Please note that unless the \code{envir} argument is changed from its default (\code{sys.frame}), 23 | any arguments set with \code{set.args} will \emph{over-ride} any values currently assigned to those names. 24 | } 25 | \author{ 26 | Caitlin Collins \email{caitiecollins@gmail.com} 27 | } 28 | -------------------------------------------------------------------------------- /man/simTest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simTest.R 3 | \name{simTest} 4 | \alias{simTest} 5 | \title{Simulation Testing.} 6 | \usage{ 7 | simTest( 8 | set.number = 3, 9 | n.reps = 1, 10 | set.seed.as = "file.number", 11 | working.dir = "~/", 12 | from.file = FALSE, 13 | file.n = NULL, 14 | Windows = FALSE, 15 | cluster = FALSE, 16 | n.ind = 100, 17 | n.snps = 10000, 18 | n.subs = treeWAS::dist_0.01, 19 | n.phen.subs = 15, 20 | n.snps.assoc = 10, 21 | assoc.prob = 90, 22 | grp.min = 0.25, 23 | s = 20, 24 | af = 10, 25 | coaltree = TRUE, 26 | p.value = 0.01, 27 | p.value.correct = "bonf", 28 | p.value.by = "count", 29 | sim.n.snps = 1e+05, 30 | treeWAS.test = c("terminal", "simultaneous", "subsequent"), 31 | snps.reconstruction = "parsimony", 32 | phen.reconstruction = "parsimony" 33 | ) 34 | } 35 | \arguments{ 36 | \item{test}{A character string or vector containing one or more of the following available tests of association: 37 | "terminal", "simultaneous", "subsequent", "cor", "fisher". By default, the first three tests are run. 38 | See details for more information on what these tests do and when they may be appropriate.} 39 | } 40 | \description{ 41 | Generic simulation-testing function used to validate treeWAS performance on simulated datasets. Not designed for public use! 42 | } 43 | \author{ 44 | Caitlin Collins \email{caitiecollins@gmail.com} 45 | } 46 | -------------------------------------------------------------------------------- /man/simultaneous.test.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simultaneous.test.R 3 | \name{simultaneous.test} 4 | \alias{simultaneous.test} 5 | \title{Simultaneous test} 6 | \usage{ 7 | simultaneous.test( 8 | snps.reconstruction, 9 | phen.reconstruction, 10 | tree, 11 | categorical = FALSE 12 | ) 13 | } 14 | \arguments{ 15 | \item{snps.reconstruction}{A matrix containing the terminal and reconstructed 16 | ancestral states of SNPs for all nodes in the tree.} 17 | 18 | \item{phen.reconstruction}{A vector containing the terminal and reconstructed 19 | ancestral states of the phenotype for all nodes in the tree.} 20 | 21 | \item{tree}{A phylo object containing the tree representing the ancestral relationships 22 | between the individuals for which snps and phen are known.} 23 | } 24 | \description{ 25 | Calculates treeWAS score 2, the simultaneous test, as the number of 26 | substitutions or changes in genotype (\code{snps.reconstruction}) and phenotype 27 | (\code{phen.reconstruction}) that occur simultaneously on the same branches of the tree. 28 | } 29 | \author{ 30 | Caitlin Collins \email{caitiecollins@gmail.com} 31 | } 32 | -------------------------------------------------------------------------------- /man/simultaneous.test.epi.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simultaneous.test.epi.R 3 | \name{simultaneous.test.epi} 4 | \alias{simultaneous.test.epi} 5 | \title{Test for association between genetic loci with Score 2.} 6 | \usage{ 7 | simultaneous.test.epi(snps.reconstruction, tree, snps.subset = NULL) 8 | } 9 | \arguments{ 10 | \item{snps.reconstruction}{A matrix containing the terminal and reconstructed 11 | ancestral states of SNPs for all nodes in the tree.} 12 | 13 | \item{tree}{A phylo object containing the tree representing the ancestral relationships 14 | between the individuals for which snps and phen are known.} 15 | 16 | \item{snps.subset}{An optional vector (see details); else, NULL. 17 | The snps.subset vector can be a character vector, containing a subset of colnames(snps.rec), 18 | a logical vector, using TRUE or FALSE to indicate which columns are to be retained and excluded, 19 | or an integer vector, specifying the column indices to be retained.} 20 | } 21 | \description{ 22 | [*\emph{A work in progress; not curently integrated into treeWAS:}*] 23 | Use the simultaneous.test (Score 2) to test for associations between genetic loci, 24 | which may indicate an epistatic interaction. 25 | This function can be used either to test 26 | for pairwise association between all pairs of genetic loci 27 | or for associations between a subset of snps and all other snps 28 | (recommended for large datasets; see details). 29 | } 30 | \details{ 31 | The number of pairwise tests between all pairs of snps 32 | grows rapidly as the number of snps columns increases. 33 | As such, for datasets where ncol(snps.reconstruction) is large, we recommend that 34 | the snps.subset argument is used to reduce the number of tests, by 35 | indicating which snps to test for association with all other snps. 36 | The snps.subset index can be used to select any subset of snps of interest. 37 | For example, one may wish to test for interactions between all snps and a subset of snps that 38 | had been deemed significantly associated with a particular phenotype in a previous run of treeWAS. 39 | } 40 | \author{ 41 | Caitlin Collins \email{caitiecollins@gmail.com} 42 | } 43 | -------------------------------------------------------------------------------- /man/snp.sim.Q.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/snp.sim.Q.R, R/snp.sim.Q_old.R 3 | \name{snp.sim.Q} 4 | \alias{snp.sim.Q} 5 | \title{Aternative SNPs simulation fn.} 6 | \usage{ 7 | snp.sim.Q( 8 | n.snps = 10000, 9 | n.subs = 1, 10 | snp.root = NULL, 11 | n.snps.assoc = 10, 12 | assoc.prob = 100, 13 | Q = matrix(c(2, 0.75, 0.75, 1, 3, 0.5, 0.25, 3, 3, 0.25, 0.5, 3, 1, 0.75, 0.75, 2), 14 | nrow = 4, byrow = T, dimnames = rep(list(c("0|0", "0|1", "1|0", "1|1")), 2)), 15 | tree = coalescent.tree.sim(100), 16 | n.phen.subs = 15, 17 | phen.loci = NULL, 18 | heatmap = FALSE, 19 | reconstruct = FALSE, 20 | dist.dna.model = "JC69", 21 | grp.min = 0.25, 22 | row.names = NULL, 23 | set = 3, 24 | seed = 1 25 | ) 26 | 27 | snp.sim.Q( 28 | n.snps = 10000, 29 | n.subs = 1, 30 | snp.root = NULL, 31 | n.snps.assoc = 10, 32 | assoc.prob = 100, 33 | Q = matrix(c(2, 0.75, 0.75, 1, 3, 0.5, 0.25, 3, 3, 0.25, 0.5, 3, 1, 0.75, 0.75, 2), 34 | nrow = 4, byrow = T, dimnames = rep(list(c("0|0", "0|1", "1|0", "1|1")), 2)), 35 | tree = coalescent.tree.sim(100), 36 | n.phen.subs = 15, 37 | phen.loci = NULL, 38 | heatmap = FALSE, 39 | reconstruct = FALSE, 40 | dist.dna.model = "JC69", 41 | grp.min = 0.25, 42 | row.names = NULL, 43 | set = 3, 44 | seed = 1 45 | ) 46 | } 47 | \arguments{ 48 | \item{n.snps}{An integer specifying the number of snps columns to be simulated.} 49 | 50 | \item{tree}{A \code{phylo} object containing the phylogenetic tree; or, a character string, 51 | one of \code{"NJ"}, \code{"BIONJ"} (the default), or \code{"parsimony"}; 52 | or, if NAs are present in the distance matrix, one of: \code{"NJ*"} or \code{"BIONJ*"}, 53 | specifying the method of phylogenetic reconstruction.} 54 | 55 | \item{heatmap}{A logical indicating whether to produce a heatmap of the genetic distance 56 | between the simulated genomes of the n.ind individuals.} 57 | 58 | \item{reconstruct}{Either a logical indicating whether to attempt to reconstruct 59 | a phylogenetic tree using the simulated genetic data, or one of c("UPGMA", "nj", "ml") 60 | to specify that tree reconstruction is desired by one of these three methods 61 | (Unweighted Pair Group Method with Arithmetic Mean, Neighbour-Joining, Maximum-Likelihood).} 62 | 63 | \item{dist.dna.model}{A character string specifying the type of model to use in reconstructing the phylogenetic tree for 64 | calculating the genetic distance between individual genomes, only used if \code{tree} is 65 | a character string (see ?dist.dna).} 66 | 67 | \item{grp.min}{(Not yet (re-)implemented in this function.) 68 | An optional number between 0.1 and 0.9 to control the proportional size of the smaller phenotypic group.} 69 | 70 | \item{row.names}{An optional vector containing row names for the individuals to be simulated.} 71 | 72 | \item{seed}{An optional integer to control the pseudo-randomisation process and allow for identical repeat runs of the function; 73 | else \code{NULL}.} 74 | 75 | \item{phen.reconstruction}{Either a character string specifying \code{"parsimony"} (the default) or \code{"ML"} (maximum likelihood) 76 | for the ancestral state reconstruction of the phenotypic variable, 77 | or a vector containing this reconstruction if it has been performed elsewhere.} 78 | 79 | \item{s}{If \code{set} is 3, the \code{s} parameter controls a baseline number of substitutions to be 80 | experienced by the phenotype and associated loci: by default, 20.} 81 | 82 | \item{af}{If \code{set} is 3, the \code{af} parameter provides an association factor, 83 | controlling the preference for association over non-association at associated loci: by default, 10 (for a 10x preference).} 84 | 85 | \item{plot}{A logical indicating whether to generate a plot of the phylogenetic tree (\code{TRUE}) or not (\code{FALSE}, the default).} 86 | } 87 | \description{ 88 | Currently under development. Please use the regular snp.sim function to simulate genetic data. 89 | 90 | NOT currently in use. Please use the regular snp.sim function to simulate genetic data. 91 | } 92 | \examples{ 93 | ## Example ## 94 | 95 | ## Example ## 96 | 97 | } 98 | \author{ 99 | Caitlin Collins \email{caitiecollins@gmail.com} 100 | } 101 | -------------------------------------------------------------------------------- /man/snp.sim.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/snp.sim.R 3 | \name{snp.sim} 4 | \alias{snp.sim} 5 | \title{Short one-phrase description.} 6 | \usage{ 7 | snp.sim( 8 | n.snps = 10000, 9 | n.subs = 1, 10 | snp.root = NULL, 11 | n.snps.assoc = 0, 12 | assoc.prob = 100, 13 | tree = coalescent.tree.sim(100), 14 | phen.loci = NULL, 15 | heatmap = FALSE, 16 | reconstruct = FALSE, 17 | dist.dna.model = "JC69", 18 | row.names = NULL, 19 | set = NULL, 20 | seed = 1 21 | ) 22 | } 23 | \arguments{ 24 | \item{n.snps}{An integer specifying the number of genetic loci to be simulated.} 25 | } 26 | \description{ 27 | Longer proper discription of function... 28 | } 29 | \examples{ 30 | ## Example ## 31 | 32 | } 33 | \author{ 34 | Caitlin Collins \email{caitiecollins@gmail.com} 35 | } 36 | -------------------------------------------------------------------------------- /man/snps.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{snps} 5 | \alias{snps} 6 | \title{A genetic data matrix.} 7 | \format{ 8 | A binary matrix with 100 rows and 20,003 columns. 9 | } 10 | \usage{ 11 | data(snps) 12 | } 13 | \description{ 14 | This binary matrix contains the allelic states of genetic variables, 15 | typically single-nucleotide polymorphisms (SNPs) (or the presence/absence states of accessory genes), 16 | showing individuals in the rows and genetic loci in the columns. 17 | } 18 | \details{ 19 | Each individual in the sample is represented by a unique identifier (name) 20 | which corresponds to the name of one row of the snps matrix. 21 | Each genetic locus is also required to have a unique name. 22 | 23 | In this \code{snps} matrix, redundant columns are present for biallelic loci, 24 | denoting the state of the second allele as the inverse of the previous column 25 | (e.g., compare locus 1.g and locus 1.a). 26 | These biallelic sites can be condensed into a more efficient binary form 27 | by using \code{get.binary.snps(snps)} 28 | (see the treeWAS vignette). 29 | % (see vignette("treeWAS")). 30 | } 31 | \author{ 32 | Caitlin Collins \email{caitiecollins@gmail.com} 33 | } 34 | \keyword{data} 35 | \keyword{datasets} 36 | -------------------------------------------------------------------------------- /man/snps.assoc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{snps.assoc} 5 | \alias{snps.assoc} 6 | \title{The phenotypically-associated sites in the \code{snps} matrix.} 7 | \format{ 8 | A vector of length 10. 9 | } 10 | \usage{ 11 | data(snps.assoc) 12 | } 13 | \description{ 14 | This vector specifies the identities (names) of the loci in the genetic data matrix (see: \code{data(snps)}) 15 | that have been simulated along the phylogenetic tree (see: \code{data(tree)}) 16 | to be in statistical association with the phenotype (see: \code{data(phen)}). 17 | Comparing this vector of snps column names to the set of snps loci identified by treeWAS 18 | allows us to evaluate the performance of the treeWAS GWAS method. 19 | After applying treeWAS to the components of this dataset, 20 | using: \code{treeWAS(snps, phen, tree)}, 21 | we can assess the ability of treeWAS to recover these "known" associated sites 22 | via any of its three association scores 23 | (see the treeWAS vignette). 24 | % (see vignette("treeWAS")). 25 | } 26 | \author{ 27 | Caitlin Collins \email{caitiecollins@gmail.com} 28 | } 29 | \keyword{data} 30 | \keyword{datasets} 31 | -------------------------------------------------------------------------------- /man/snps.reconstruction.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{snps.reconstruction} 5 | \alias{snps.reconstruction} 6 | \title{The ancestral state reconstruction of a genetic data matrix.} 7 | \format{ 8 | A binary matrix with 199 rows and 20,003 columns. 9 | } 10 | \usage{ 11 | data(snps.reconstruction) 12 | } 13 | \description{ 14 | This binary matrix contains the terminal and ancestral allelic states of a set of genetic variables 15 | (for the original genetic data matrix, see: \code{data(snps)}), 16 | showing individuals in the rows and genetic loci in the columns. 17 | The observed genotypic states of sampled individuals 18 | (i.e., those represented at the terminal nodes of a phylogenetic tree) 19 | are presented first, in elements 1:N (here 1:100). 20 | These rows of the matrix are identical to the input \code{snps} matrix (see: \code{data(snps)}). 21 | The unobserved ancestral states of the genotype at internal nodes have been 22 | inferred via ancestral state reconstruction, using \code{asr(snps, tree)}. 23 | } 24 | \details{ 25 | Each individual in the sample is represented by a unique identifier (name) 26 | which corresponds to the name of one row of the snps matrix. 27 | (Internal node names have been generated during ancestral state reconstruction.) 28 | Each genetic locus is also required to have a unique name. 29 | 30 | In this \code{snps.reconstruction} matrix, redundant columns are present for biallelic loci, 31 | denoting the state of the second allele as the inverse of the previous column 32 | (e.g., compare locus 1.g and locus 1.a). 33 | These biallelic sites can be condensed into a more efficient binary form 34 | by using \code{get.binary.snps(snps)} 35 | (see the treeWAS vignette). 36 | % (see vignette("treeWAS")). 37 | } 38 | \author{ 39 | Caitlin Collins \email{caitiecollins@gmail.com} 40 | } 41 | \keyword{data} 42 | \keyword{datasets} 43 | -------------------------------------------------------------------------------- /man/subsequent.test.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/subsequent.test.R 3 | \name{subsequent.test} 4 | \alias{subsequent.test} 5 | \title{Subsequent test} 6 | \usage{ 7 | subsequent.test( 8 | snps.reconstruction, 9 | phen.reconstruction, 10 | tree, 11 | correct.prop = FALSE, 12 | categorical = FALSE 13 | ) 14 | } 15 | \arguments{ 16 | \item{tree}{A phylo object.} 17 | } 18 | \description{ 19 | Calculates treeWAS score 3, the subsequent test. 20 | } 21 | \examples{ 22 | 23 | ## basic use of fn 24 | tree <- coalescent.tree.sim(n.ind = 100, seed = 1) 25 | 26 | } 27 | \author{ 28 | Caitlin Collins \email{caitiecollins@gmail.com} 29 | } 30 | -------------------------------------------------------------------------------- /man/table.matrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{table.matrix} 4 | \alias{table.matrix} 5 | \title{Cross-tabulate the rows or columns of a matrix.} 6 | \usage{ 7 | table.matrix(data, MARGIN = 1) 8 | } 9 | \arguments{ 10 | \item{data}{A matrix or data.frame, potentially containing 11 | non-unique patterns in its rows or columns.} 12 | 13 | \item{MARGIN}{A single integer specifying the array margin to be held fixed. 14 | (To get unique \emph{rows}, select \code{MARGIN} = 1; 15 | for unique \emph{columns}, select \code{MARGIN} = 2.)} 16 | } 17 | \value{ 18 | A list with the following elements: 19 | \itemize{ 20 | \item{\code{table} \item{A contingency table of the counts of the 21 | number of occurrences of each unique row in the matrix.}} 22 | \item{\code{index} \item{An index vector containing the indices (row numbers), 23 | in a matrix composed only of unique rows, 24 | to which each row in the original matrix maps.}} 25 | \item{\code{unique.data} \item{A new matrix 26 | containing only the unique rows of the input matrix.}} 27 | } 28 | } 29 | \description{ 30 | A version of the base \code{table} function designed for matrices. 31 | Taking a matrix as input, \code{table.matrix} returns a contingency table, 32 | index vector, and unique matrix. 33 | } 34 | \details{ 35 | To apply this function to the \emph{columns} of a matrix, simply 36 | transpose the matrix before executing the command, as in: 37 | \code{table.matrix(t(data))}. 38 | } 39 | \examples{ 40 | \dontrun{ 41 | ## load example data: 42 | data("snps.ace") 43 | x <- snps.ace 44 | 45 | ## basic use of fn on rows of x: 46 | tab.out <- table.matrix(x) 47 | 48 | ## apply fn to columns of x: 49 | tab.out <- table.matrix(t(x)) 50 | } 51 | 52 | } 53 | \author{ 54 | Caitlin Collins \email{caitiecollins@gmail.com} 55 | } 56 | -------------------------------------------------------------------------------- /man/terminal.test.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/terminal.test.R 3 | \name{terminal.test} 4 | \alias{terminal.test} 5 | \title{Terminal test} 6 | \usage{ 7 | terminal.test(snps, phen, correct.prop = FALSE, categorical = FALSE) 8 | } 9 | \arguments{ 10 | \item{tree}{A phylo object.} 11 | } 12 | \description{ 13 | Calculates treeWAS score 1, the terminal test. 14 | } 15 | \examples{ 16 | ## Example ## 17 | \dontrun{ 18 | ## basic use of fn 19 | out <- terminal.test(snps, phen) 20 | } 21 | 22 | } 23 | \author{ 24 | Caitlin Collins \email{caitiecollins@gmail.com} 25 | } 26 | -------------------------------------------------------------------------------- /man/terminal.test.epi.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/terminal.test.epi.R 3 | \name{terminal.test.epi} 4 | \alias{terminal.test.epi} 5 | \title{Test for epistasis between genetic loci with Score 1.} 6 | \usage{ 7 | terminal.test.epi(snps, snps.subset = NULL) 8 | } 9 | \arguments{ 10 | \item{snps}{A matrix containing the states of SNPs (in columns) for all individuals (in rows).} 11 | 12 | \item{snps.subset}{An optional vector (see details); else, NULL. 13 | The snps.subset vector can be a character vector, containing a subset of colnames(snps.rec), 14 | a logical vector, using TRUE or FALSE to indicate which columns are to be retained and excluded, 15 | or an integer vector, specifying the column indices to be retained.} 16 | } 17 | \description{ 18 | [*\emph{A work in progress; not curently integrated into treeWAS:}*] 19 | Use the terminal.test (Score 1) to test for associations between genetic loci, 20 | which may indicate an epistatic interaction. 21 | This function can be used either to test 22 | for pairwise association between all pairs of genetic loci 23 | or for associations between a subset of snps and all other snps 24 | (recommended for large datasets; see details). 25 | } 26 | \details{ 27 | The number of pairwise tests between all pairs of snps 28 | grows rapidly as the number of snps columns increases. 29 | As such, for datasets where ncol(snps.reconstruction) is large, we recommend that 30 | the snps.subset argument is used to reduce the number of tests, by 31 | indicating which snps to test for association with all other snps. 32 | The snps.subset index can be used to select any subset of snps of interest. 33 | For example, one may wish to test for interactions between all snps and a subset of snps that 34 | had been deemed significantly associated with a particular phenotype in a previous run of treeWAS. 35 | } 36 | \author{ 37 | Caitlin Collins \email{caitiecollins@gmail.com} 38 | } 39 | -------------------------------------------------------------------------------- /man/tree.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{tree} 5 | \alias{tree} 6 | \title{A phylogenetic tree.} 7 | \format{ 8 | A phylo object with 100 terminal nodes and 99 internal nodes. 9 | } 10 | \usage{ 11 | data(tree) 12 | } 13 | \description{ 14 | This phylogenetic tree is a phylo object (see \code{vignette("Trees", package="phangorn")}) 15 | connecting the individuals represented in 16 | the rows of the example genetic data matrix (see: \code{data(snps)}) 17 | and the elements of the example phenotypic vector (see: \code{data(phen)}). 18 | } 19 | \details{ 20 | In this case, the tree was generated via simulation and used to simulate the genotypic and phenotypic data. 21 | In a typical empirical analysis, however, a phylogenetic tree would represent the inferred 22 | ancestral relationships between individuals, and it would be estimated from the available genetic data. 23 | For example, such a phylogeny could be reconstructed using \code{tree.reconstruct(snps, method="NJ")}, 24 | or automatically generated within treeWAS, according to the \code{tree} argument, as in: 25 | \code{treeWAS(snps, phen, tree="NJ")} 26 | (see the treeWAS vignette). 27 | % (see vignette("treeWAS")). 28 | } 29 | \author{ 30 | Caitlin Collins \email{caitiecollins@gmail.com} 31 | } 32 | \keyword{data} 33 | \keyword{datasets} 34 | -------------------------------------------------------------------------------- /man/tree.reconstruct.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tree.reconstruct.R 3 | \name{tree.reconstruct} 4 | \alias{tree.reconstruct} 5 | \title{Short one-phrase description.} 6 | \usage{ 7 | tree.reconstruct( 8 | dna, 9 | method = c("BIONJ", "NJ", "parsimony", "BIONJ*", "NJ*"), 10 | dist.dna.model = "JC69", 11 | plot = TRUE 12 | ) 13 | } 14 | \arguments{ 15 | \item{dna}{A matrix or DNAbin object containing genomes for (only) 16 | the terminal nodes of the tree to be reconstructed. 17 | Individuals should be in the rows and loci in the columns; rows and columns should be labelled.} 18 | 19 | \item{method}{A character string specifying the method of phylogenetic reconstruction: 20 | one of \code{"NJ"}, \code{"BIONJ"} (the default), or \code{"parsimony"}; 21 | or, if NAs are present in the distance matrix, one of: \code{"NJ*"} or \code{"BIONJ*"}.} 22 | 23 | \item{dist.dna.model}{A character string specifying the type of model to use in 24 | calculating the genetic distance between individual genomes (see ?dist.dna).} 25 | 26 | \item{plot}{A logical specifying whether to plot the reconstructed phylogenetic tree.} 27 | } 28 | \description{ 29 | Longer proper discription of function... 30 | } 31 | \author{ 32 | Caitlin Collins \email{caitiecollins@gmail.com} 33 | } 34 | -------------------------------------------------------------------------------- /man/treeWAS.example.out.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{treeWAS.example.out} 5 | \alias{treeWAS.example.out} 6 | \title{Example output of treeWAS.} 7 | \format{ 8 | A treeWAS class object, comprising a list of length 5. 9 | } 10 | \usage{ 11 | data(treeWAS.example.out) 12 | } 13 | \description{ 14 | This "treeWAS" class object is a list containing the output of a treeWAS analysis. 15 | This GWAS analysis was performed to identify associations between 16 | loci in the example genetic data matrix (see: \code{data(snps)}) 17 | and phenotypic states in the example phenotypic vector (see: \code{data(phen)}), 18 | along the phylogenetic tree (see: \code{data(tree)}). 19 | } 20 | \details{ 21 | This \code{treeWAS} output was returned by the function: 22 | \code{treeWAS(snps, phen, tree)}. 23 | treeWAS output contains elements describing 24 | the significant associations identified by each of the 25 | three association scores applied to all genetic loci. 26 | Additional elements of the output return all data that was 27 | used in the GWAS analysis, including both data input to treeWAS 28 | and all relevant data generated by treeWAS. 29 | 30 | For a detailed description of the elements of this output, 31 | please scroll down to the "Value" section of the \code{treeWAS} function documentation, 32 | which can be accessed with: \code{?treeWAS}. 33 | More information can also be found in the treeWAS vignette. 34 | % (see vignette("treeWAS")). 35 | } 36 | \author{ 37 | Caitlin Collins \email{caitiecollins@gmail.com} 38 | } 39 | \keyword{data} 40 | \keyword{datasets} 41 | -------------------------------------------------------------------------------- /man/write.treeWAS.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/treeWAS.R 3 | \name{write.treeWAS} 4 | \alias{write.treeWAS} 5 | \title{Write \code{treeWAS} output to a CSV file.} 6 | \usage{ 7 | write.treeWAS(x, filename = "./treeWAS_results") 8 | } 9 | \arguments{ 10 | \item{x}{The output returned by \code{treeWAS}.} 11 | 12 | \item{filename}{A character string containing the path and filename to which the .csv file will be saved; 13 | by default, \code{filename = "./treeWAS_results"} and so 14 | would be saved to the current working directory.} 15 | } 16 | \description{ 17 | Save the results of \code{treeWAS} to a CSV file as a summary table of significant findings and scores 18 | (excluding longer data elements within the output). 19 | . 20 | } 21 | \examples{ 22 | ## Example ## 23 | \dontrun{ 24 | ## Load data: 25 | data(snps) 26 | data(phen) 27 | data(tree) 28 | 29 | ## Run treeWAS: 30 | out <- treeWAS(snps, phen, tree, seed = 1) 31 | 32 | ## Save results to home directory: 33 | write.treeWAS(x = out, filename = "~/treeWAS_results") 34 | } 35 | 36 | } 37 | \author{ 38 | Caitlin Collins \email{caitiecollins@gmail.com} 39 | } 40 | -------------------------------------------------------------------------------- /treeWAS.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /vignettes/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/.DS_Store -------------------------------------------------------------------------------- /vignettes/figs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/.DS_Store -------------------------------------------------------------------------------- /vignettes/figs/Eqn_Legend_genotype.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/Eqn_Legend_genotype.JPG -------------------------------------------------------------------------------- /vignettes/figs/Eqn_Legend_genotype.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/Eqn_Legend_genotype.pdf -------------------------------------------------------------------------------- /vignettes/figs/Eqn_Legend_genotype.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/Eqn_Legend_genotype.png -------------------------------------------------------------------------------- /vignettes/figs/plot_hist_phen.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_hist_phen.pdf -------------------------------------------------------------------------------- /vignettes/figs/plot_hist_phen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_hist_phen.png -------------------------------------------------------------------------------- /vignettes/figs/plot_hist_phen_rank.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_hist_phen_rank.pdf -------------------------------------------------------------------------------- /vignettes/figs/plot_hist_phen_rank.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_hist_phen_rank.png -------------------------------------------------------------------------------- /vignettes/figs/plot_hist_simultaneous.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_hist_simultaneous.pdf -------------------------------------------------------------------------------- /vignettes/figs/plot_hist_simultaneous.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_hist_simultaneous.png -------------------------------------------------------------------------------- /vignettes/figs/plot_hist_subsequent.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_hist_subsequent.pdf -------------------------------------------------------------------------------- /vignettes/figs/plot_hist_subsequent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_hist_subsequent.png -------------------------------------------------------------------------------- /vignettes/figs/plot_hist_terminal.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_hist_terminal.pdf -------------------------------------------------------------------------------- /vignettes/figs/plot_hist_terminal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_hist_terminal.png -------------------------------------------------------------------------------- /vignettes/figs/plot_manhattan_simultaneous.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_manhattan_simultaneous.pdf -------------------------------------------------------------------------------- /vignettes/figs/plot_manhattan_simultaneous.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_manhattan_simultaneous.png -------------------------------------------------------------------------------- /vignettes/figs/plot_manhattan_subsequent.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_manhattan_subsequent.pdf -------------------------------------------------------------------------------- /vignettes/figs/plot_manhattan_subsequent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_manhattan_subsequent.png -------------------------------------------------------------------------------- /vignettes/figs/plot_manhattan_terminal.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_manhattan_terminal.pdf -------------------------------------------------------------------------------- /vignettes/figs/plot_manhattan_terminal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_manhattan_terminal.png -------------------------------------------------------------------------------- /vignettes/figs/plot_tree.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_tree.pdf -------------------------------------------------------------------------------- /vignettes/figs/plot_tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_tree.png -------------------------------------------------------------------------------- /vignettes/figs/plot_tree_parsimony.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_tree_parsimony.pdf -------------------------------------------------------------------------------- /vignettes/figs/plot_tree_parsimony.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_tree_parsimony.png -------------------------------------------------------------------------------- /vignettes/figs/tree_phen_eg.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/tree_phen_eg.pdf -------------------------------------------------------------------------------- /vignettes/figs/tree_phen_eg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/tree_phen_eg.png -------------------------------------------------------------------------------- /vignettes/old/ace.tree.cont.IC.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/ace.tree.cont.IC.pdf -------------------------------------------------------------------------------- /vignettes/old/ace.tree.cont.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/ace.tree.cont.pdf -------------------------------------------------------------------------------- /vignettes/old/ace_example_phen_R_0.Rdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/ace_example_phen_R_0.Rdata -------------------------------------------------------------------------------- /vignettes/old/figsunnamed-chunk-12-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/figsunnamed-chunk-12-1.pdf -------------------------------------------------------------------------------- /vignettes/old/figsunnamed-chunk-13-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/figsunnamed-chunk-13-1.pdf -------------------------------------------------------------------------------- /vignettes/old/figsunnamed-chunk-14-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/figsunnamed-chunk-14-1.pdf -------------------------------------------------------------------------------- /vignettes/old/figsunnamed-chunk-15-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/figsunnamed-chunk-15-1.pdf -------------------------------------------------------------------------------- /vignettes/old/figsunnamed-chunk-16-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/figsunnamed-chunk-16-1.pdf -------------------------------------------------------------------------------- /vignettes/old/figsunnamed-chunk-17-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/figsunnamed-chunk-17-1.pdf -------------------------------------------------------------------------------- /vignettes/old/figsunnamed-chunk-7-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/figsunnamed-chunk-7-1.pdf -------------------------------------------------------------------------------- /vignettes/old/phen_cont_skewed.Rdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/phen_cont_skewed.Rdata -------------------------------------------------------------------------------- /vignettes/old/phen_cont_skewed_rank.Rdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/phen_cont_skewed_rank.Rdata -------------------------------------------------------------------------------- /vignettes/old/treeWAS Vignette.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/treeWAS Vignette.pdf -------------------------------------------------------------------------------- /vignettes/old/treeWAS_example.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | ############# 4 | ## EXAMPLE ## 5 | ############# 6 | 7 | ####################### 8 | ## Clear environment ## 9 | ####################### 10 | ## NOTE TO USER: his step will delete all variables from your environment. 11 | ## You may want to save unsaved variables or skip this step. 12 | rm(list=ls()) 13 | 14 | ############################## 15 | ## Load sample distribution ## 16 | ############################## 17 | ## (currently using the ClonalFrame Saureus output 18 | ## just so we can see what happens when we 19 | ## simulate data based on this distribution 20 | ## AND then use it to inform treeWAS 21 | ## (as compared to treeWAS's performance with dist=NULL)) 22 | data(dist) 23 | 24 | ################################ 25 | ## Simulate a coalescent tree ## 26 | ################################ 27 | tree <- coalescent.tree.sim(n.ind = 100, seed = 1) 28 | 29 | ####################################################### 30 | ## Simulate a phenotype for individuals in this tree ## 31 | ####################################################### 32 | ## get list of phenotype simulation output 33 | phen.output <- phen.sim(tree, n.subs = 15) 34 | 35 | ## get phenotype for terminal nodes only 36 | phen <- phen.output$phen 37 | 38 | ## get phenotype for all nodes, 39 | ## terminal and internal 40 | phen.nodes <- phen.output$phen.nodes 41 | 42 | ## get the indices of phen.subs (ie. branches) 43 | phen.loci <- phen.output$phen.loci 44 | 45 | ################################# 46 | ## Plot Tree showing Phenotype ## 47 | ################################# 48 | phen.plot.colours <- plot.phen(tree = tree, 49 | phen.nodes = phen.nodes, 50 | plot = TRUE) 51 | 52 | ################################################################### 53 | ## Simulate genetic data (SNPs) that fit this tree and phenotype ## 54 | ################################################################### 55 | snps.output <- snp.sim(n.snps = 10000, n.subs=dist, 56 | n.snps.assoc = 10, assoc.prob = 90, 57 | tree = tree, 58 | phen.loci = phen.loci, 59 | heatmap = FALSE, reconstruct = FALSE, 60 | dist.dna.model="JC69", 61 | seed = 1) 62 | snps <- snps.output$snps 63 | snps.assoc <- snps.output$snps.assoc 64 | snps.names <- colnames(snps) 65 | snps.indices <- c(1:ncol(snps)) 66 | 67 | ################################################################################ 68 | ## Note that all previous steps can be performed with this combined function: ## 69 | ################################################################################ 70 | # sim.output <- coalescent.sim(n.ind=100, 71 | # n.snps=10000, n.subs=1, 72 | # n.snps.assoc=10, assoc.prob=90, 73 | # n.phen.subs=15, phen=NULL, 74 | # plot=TRUE, 75 | # heatmap=FALSE, reconstruct=FALSE, 76 | # seed=1) 77 | # snps <- sim.output$snps 78 | # tree <- sim.output$tree 79 | # phen <- sim.output$phen 80 | # snps.assoc <- sim.output$snps.assoc 81 | 82 | 83 | ################# 84 | ## Run treeWAS ## 85 | ################# 86 | 87 | ## First, we'll try treeWAS with dist=NULL 88 | ## (so it will use the default Poisson with parameter 1 to 89 | ## get the number of substitutions per site to simulate) 90 | 91 | treeWAS.output <- treeWAS(snps, phen, n.subs = 1, 92 | tree = tree, 93 | dist.dna.model = NULL, plot.tree = FALSE, 94 | test = "score", 95 | p.value = 0.001, p.value.correct = "bonf", p.value.by = "count", 96 | sim.n.snps = 10000, n.reps = 1, 97 | plot.null.dist = TRUE, plot.dist = FALSE) 98 | 99 | str(treeWAS.output) 100 | 101 | # out <- treeWAS.output 102 | # corr.dat <- out$corr.dat 103 | # corr.sim <- out$corr.sim 104 | 105 | 106 | ############## 107 | ## EVALUATE ## 108 | ############## 109 | test.positive <- treeWAS.output$sig.snps$SNP.locus 110 | test.negative <- snps.indices[-which(snps.indices %in% test.positive)] 111 | ## get true positives 112 | snps.not <- snps.names[-which(snps.indices %in% snps.assoc)] 113 | true.positive <- test.positive[which(test.positive %in% snps.assoc)] 114 | TP <- length(true.positive) 115 | ## get true negatives 116 | true.negative <- test.negative[which(test.negative %in% snps.not)] 117 | TN <- length(true.negative) 118 | ## get false positives 119 | false.positive <- test.positive[which(test.positive %in% snps.not)] 120 | FP <- length(false.positive) 121 | ## get false negatives 122 | false.negative <- test.negative[which(test.negative %in% snps.assoc)] 123 | FN <- length(false.negative) 124 | 125 | 126 | ################# 127 | ## sensitivity ## 128 | ################# 129 | ## ie. How many truly ASSOCIATED SNPs did you manage to catch 130 | ## ~ Pr(Positive Test | SNP ASSOCIATED) 131 | ## --> Set 1: will be 0/0 = NaN 132 | sensitivity <- (TP / (TP + FN)) 133 | sensitivity 134 | ################# 135 | ## specificity ## 136 | ################# 137 | ## ie. Of all the truly NOT associated SNPs, how many did you manage to rule out? 138 | ## ~ Pr(Negative Test | SNP NOT associated) 139 | specificity <- (TN / (TN + FP)) ## = (1 - FPR) 140 | specificity 141 | ######### 142 | ## PPV ## 143 | ######### 144 | ## ie. Of all the POSITIVE calls you made, how many were CORRECT/ identified truly ASSOCIATED SNPs 145 | ## ~ Pr(SNP ASSOCIATED | Positive Test) 146 | ## --> Set 1: will be 0 (UNLESS you made NO positive calls, then 0/0 = NaN) 147 | PPV <- (TP / (TP + FP)) ## = (1 - FDR) 148 | PPV 149 | 150 | 151 | ################# ################# ################# ################# 152 | 153 | ## COMPARE TO: ## 154 | 155 | ################# 156 | ## Run treeWAS ## 157 | ################# 158 | 159 | ## Second, we can try treeWAS with dist=dist 160 | ## (where dist comes from the .Rdata file loaded just before we ran coalescent.sim) 161 | ## (so it will use the true distribution to 162 | ## identify the number of substitutions per site to simulate) 163 | 164 | treeWAS.output2 <- treeWAS(snps, phen, n.subs = dist, 165 | tree = tree, 166 | dist.dna.model = NULL, plot.tree = FALSE, 167 | test = "score", 168 | p.value = 0.001, p.value.correct = "bonf", p.value.by = "count", 169 | sim.n.snps = 10000, n.reps = 1, 170 | plot.null.dist = TRUE, plot.dist = FALSE) 171 | 172 | str(treeWAS.output2) 173 | 174 | ############## 175 | ## EVALUATE ## 176 | ############## 177 | test.positive <- treeWAS.output2$sig.snps$SNP.locus 178 | test.negative <- snps.indices[-which(snps.indices %in% test.positive)] 179 | ## get true positives 180 | snps.not <- snps.names[-which(snps.indices %in% snps.assoc)] 181 | true.positive <- test.positive[which(test.positive %in% snps.assoc)] 182 | TP <- length(true.positive) 183 | ## get true negatives 184 | true.negative <- test.negative[which(test.negative %in% snps.not)] 185 | TN <- length(true.negative) 186 | ## get false positives 187 | false.positive <- test.positive[which(test.positive %in% snps.not)] 188 | FP <- length(false.positive) 189 | ## get false negatives 190 | false.negative <- test.negative[which(test.negative %in% snps.assoc)] 191 | FN <- length(false.negative) 192 | 193 | ################# 194 | ## sensitivity ## 195 | ################# 196 | ## ie. How many truly ASSOCIATED SNPs did you manage to catch 197 | ## ~ Pr(Positive Test | SNP ASSOCIATED) 198 | ## --> Set 1: will be 0/0 = NaN 199 | sensitivity <- (TP / (TP + FN)) 200 | sensitivity 201 | ################# 202 | ## specificity ## 203 | ################# 204 | ## ie. Of all the truly NOT associated SNPs, how many did you manage to rule out? 205 | ## ~ Pr(Negative Test | SNP NOT associated) 206 | specificity <- (TN / (TN + FP)) ## = (1 - FPR) 207 | specificity 208 | ######### 209 | ## PPV ## 210 | ######### 211 | ## ie. Of all the POSITIVE calls you made, how many were CORRECT/ identified truly ASSOCIATED SNPs 212 | ## ~ Pr(SNP ASSOCIATED | Positive Test) 213 | ## --> Set 1: will be 0 (UNLESS you made NO positive calls, then 0/0 = NaN) 214 | PPV <- (TP / (TP + FP)) ## = (1 - FDR) 215 | PPV 216 | 217 | ################# ################# ################# ################# 218 | 219 | ## COMPARE TO: ## 220 | 221 | ################# 222 | ## Run treeWAS ## 223 | ################# 224 | 225 | ## Third, we can try treeWAS with dist=NULL 226 | ## So we will use the Fitch parsimony functions from R pkg phangorn 227 | ## (reconfigured for our purposes in treeWAS function get.fitch.n.mts) 228 | ## to reconstruct the distribution of n.subs-per-site from the snps data and tree. 229 | 230 | treeWAS.output3 <- treeWAS(snps, phen, n.subs = NULL, 231 | tree = tree, 232 | dist.dna.model = NULL, plot.tree = FALSE, 233 | test = "score", 234 | p.value = 0.001, p.value.correct = "bonf", p.value.by = "count", 235 | sim.n.snps = 10000, n.reps = 1, 236 | plot.null.dist = TRUE, plot.dist = FALSE) 237 | 238 | str(treeWAS.output3) 239 | 240 | ############## 241 | ## EVALUATE ## 242 | ############## 243 | test.positive <- treeWAS.output3$sig.snps$SNP.locus 244 | test.negative <- snps.indices[-which(snps.indices %in% test.positive)] 245 | ## get true positives 246 | snps.not <- snps.names[-which(snps.indices %in% snps.assoc)] 247 | true.positive <- test.positive[which(test.positive %in% snps.assoc)] 248 | TP <- length(true.positive) 249 | ## get true negatives 250 | true.negative <- test.negative[which(test.negative %in% snps.not)] 251 | TN <- length(true.negative) 252 | ## get false positives 253 | false.positive <- test.positive[which(test.positive %in% snps.not)] 254 | FP <- length(false.positive) 255 | ## get false negatives 256 | false.negative <- test.negative[which(test.negative %in% snps.assoc)] 257 | FN <- length(false.negative) 258 | 259 | ################# 260 | ## sensitivity ## 261 | ################# 262 | ## ie. How many truly ASSOCIATED SNPs did you manage to catch 263 | ## ~ Pr(Positive Test | SNP ASSOCIATED) 264 | ## --> Set 1: will be 0/0 = NaN 265 | sensitivity <- (TP / (TP + FN)) 266 | sensitivity 267 | ################# 268 | ## specificity ## 269 | ################# 270 | ## ie. Of all the truly NOT associated SNPs, how many did you manage to rule out? 271 | ## ~ Pr(Negative Test | SNP NOT associated) 272 | specificity <- (TN / (TN + FP)) ## = (1 - FPR) 273 | specificity 274 | ######### 275 | ## PPV ## 276 | ######### 277 | ## ie. Of all the POSITIVE calls you made, how many were CORRECT/ identified truly ASSOCIATED SNPs 278 | ## ~ Pr(SNP ASSOCIATED | Positive Test) 279 | ## --> Set 1: will be 0 (UNLESS you made NO positive calls, then 0/0 = NaN) 280 | PPV <- (TP / (TP + FP)) ## = (1 - FDR) 281 | PPV 282 | -------------------------------------------------------------------------------- /vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-12-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-12-1.png -------------------------------------------------------------------------------- /vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-19-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-19-1.png -------------------------------------------------------------------------------- /vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-20-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-20-1.png -------------------------------------------------------------------------------- /vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-21-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-21-1.png -------------------------------------------------------------------------------- /vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-22-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-22-1.png -------------------------------------------------------------------------------- /vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-23-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-23-1.png -------------------------------------------------------------------------------- /vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-24-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-24-1.png -------------------------------------------------------------------------------- /vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-8-1.png --------------------------------------------------------------------------------