├── .DS_Store
├── .Rbuildignore
├── .gitignore
├── DESCRIPTION
├── LICENSE.txt
├── NAMESPACE
├── R
    ├── beeswarm.plot.R
    ├── coalescent.sim.R
    ├── coalescent.tree.sim.R
    ├── data.R
    ├── fitch.R
    ├── fwd.coalescent.sim.R
    ├── fwd.phen.sim.R
    ├── fwd.plot.prob.phen.R
    ├── fwd.snp.sim.R
    ├── get.sig.snps.R
    ├── heatmap.DNAbin.R
    ├── pair.tests.R
    ├── phen.sim.R
    ├── plot.phen.R
    ├── plot.sig.snps.R
    ├── readCFML.R
    ├── reconstruct.R
    ├── simTest.R
    ├── simultaneous.test.R
    ├── simultaneous.test.epi.R
    ├── snp.sim.Q.R
    ├── snp.sim.Q_old.R
    ├── snp.sim.R
    ├── subsequent.test.R
    ├── terminal.test.R
    ├── terminal.test.epi.R
    ├── tree.reconstruct.R
    ├── treeWAS.R
    └── utils.R
├── README.md
├── data
    ├── dist_0.01.rda
    ├── dist_0.05.rda
    ├── dist_0.1.rda
    ├── dist_0.2.rda
    ├── dist_0.25.rda
    ├── dist_0.rda
    ├── phen.cont.rank.rda
    ├── phen.cont.rda
    ├── phen.plot.col.rda
    ├── phen.rda
    ├── phen.reconstruction.rda
    ├── snps.assoc.rda
    ├── snps.rda
    ├── snps.reconstruction.rda
    ├── tree.rda
    └── treeWAS.example.out.rda
├── inst
    └── CITATION
├── man
    ├── asr.Rd
    ├── assoc.test.Rd
    ├── beeswarmPlot.Rd
    ├── coalescent.sim.Rd
    ├── coalescent.tree.sim.Rd
    ├── dist_0.01.Rd
    ├── dist_0.05.Rd
    ├── dist_0.1.Rd
    ├── dist_0.2.Rd
    ├── dist_0.25.Rd
    ├── dist_0.Rd
    ├── fwd.coalescent.sim.Rd
    ├── fwd.phen.sim.Rd
    ├── fwd.snp.sim.Rd
    ├── get.ancestral.pars.Rd
    ├── get.assoc.scores.Rd
    ├── get.binary.snps.Rd
    ├── get.fitch.n.mts.Rd
    ├── get.original.loci.Rd
    ├── get.score3.Rd
    ├── get.sig.snps.Rd
    ├── get.tip.order.Rd
    ├── get.unique.matrix.Rd
    ├── ggplotbg.Rd
    ├── heatmap.DNAbin.Rd
    ├── keepFirstN.Rd
    ├── keepLastN.Rd
    ├── manhattan.plot.Rd
    ├── memfree.Rd
    ├── pair.tests.Rd
    ├── phen.Rd
    ├── phen.cont.Rd
    ├── phen.cont.rank.Rd
    ├── phen.plot.col.Rd
    ├── phen.reconstruction.Rd
    ├── phen.sim.Rd
    ├── plot_phen.Rd
    ├── plot_prob_phen.Rd
    ├── plot_sig_snps.Rd
    ├── print.treeWAS.Rd
    ├── read.CFML.Rd
    ├── removeFirstN.Rd
    ├── removeLastN.Rd
    ├── selectBiallelicSNP.Rd
    ├── set.args.Rd
    ├── simTest.Rd
    ├── simultaneous.test.Rd
    ├── simultaneous.test.epi.Rd
    ├── snp.sim.Q.Rd
    ├── snp.sim.Rd
    ├── snps.Rd
    ├── snps.assoc.Rd
    ├── snps.reconstruction.Rd
    ├── subsequent.test.Rd
    ├── table.matrix.Rd
    ├── terminal.test.Rd
    ├── terminal.test.epi.Rd
    ├── tree.Rd
    ├── tree.reconstruct.Rd
    ├── treeWAS.Rd
    ├── treeWAS.example.out.Rd
    └── write.treeWAS.Rd
├── treeWAS.Rproj
└── vignettes
    ├── .DS_Store
    ├── figs
        ├── .DS_Store
        ├── Eqn_Legend_genotype.JPG
        ├── Eqn_Legend_genotype.pdf
        ├── Eqn_Legend_genotype.png
        ├── plot_hist_phen.pdf
        ├── plot_hist_phen.png
        ├── plot_hist_phen_rank.pdf
        ├── plot_hist_phen_rank.png
        ├── plot_hist_simultaneous.pdf
        ├── plot_hist_simultaneous.png
        ├── plot_hist_subsequent.pdf
        ├── plot_hist_subsequent.png
        ├── plot_hist_terminal.pdf
        ├── plot_hist_terminal.png
        ├── plot_manhattan_simultaneous.pdf
        ├── plot_manhattan_simultaneous.png
        ├── plot_manhattan_subsequent.pdf
        ├── plot_manhattan_subsequent.png
        ├── plot_manhattan_terminal.pdf
        ├── plot_manhattan_terminal.png
        ├── plot_tree.pdf
        ├── plot_tree.png
        ├── plot_tree_parsimony.pdf
        ├── plot_tree_parsimony.png
        ├── tree_phen_eg.pdf
        └── tree_phen_eg.png
    ├── old
        ├── README_10_07_2017.md
        ├── ace.tree.cont.IC.pdf
        ├── ace.tree.cont.pdf
        ├── ace_example.R
        ├── ace_example_phen_R_0.Rdata
        ├── figsunnamed-chunk-12-1.pdf
        ├── figsunnamed-chunk-13-1.pdf
        ├── figsunnamed-chunk-14-1.pdf
        ├── figsunnamed-chunk-15-1.pdf
        ├── figsunnamed-chunk-16-1.pdf
        ├── figsunnamed-chunk-17-1.pdf
        ├── figsunnamed-chunk-7-1.pdf
        ├── pagel_example.R
        ├── phen_cont_skewed.Rdata
        ├── phen_cont_skewed_rank.Rdata
        ├── score3_output_example.R
        ├── treeWAS Vignette.pdf
        ├── treeWAS_example.R
        └── treeWAS_vignette_files
        │   └── figure-markdown_strict
        │       ├── unnamed-chunk-10-1.png
        │       ├── unnamed-chunk-12-1.png
        │       ├── unnamed-chunk-19-1.png
        │       ├── unnamed-chunk-20-1.png
        │       ├── unnamed-chunk-21-1.png
        │       ├── unnamed-chunk-22-1.png
        │       ├── unnamed-chunk-23-1.png
        │       ├── unnamed-chunk-24-1.png
        │       ├── unnamed-chunk-5-1.png
        │       └── unnamed-chunk-8-1.png
    ├── treeWAS Vignette_files
        └── MathJax.js
    ├── treeWAS_vignette.Rmd
    └── treeWAS_vignette.html


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/.DS_Store


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^Meta$
2 | ^doc$
3 | ^.*\.Rproj$
4 | ^\.Rproj\.user$
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | Meta
2 | doc
3 | .Rproj.user
4 | .Rhistory
5 | .RData
6 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: treeWAS
 2 | Title: Phylogenetic tree-based microbial GWAS
 3 | Version: 1.0
 4 | Authors@R: c(person("Caitlin", "Collins", email="caitiecollins@gmail.com", role = c("aut", "cre")),person("Xavier", "Didelot",email = "xavier.didelot@gmail.com",role = c("aut")))
 5 | Maintainer: Caitlin Collins <caitiecollins@gmail.com>
 6 | Description: Perform microbial GWAS using phylogenetic trees to correct for population structure and recombination.
 7 | Year: 2018
 8 | License: GPL (>=2)
 9 | LazyData: true
10 | RoxygenNote: 7.2.3
11 | Depends: R (>= 3.0.0), 
12 |         adegenet, 
13 |         ape
14 | Imports: ade4, 
15 |         beeswarm, 
16 |         ggplot2,
17 | 		graphics,
18 | 		grid,
19 |         Hmisc, 
20 |         knitr,
21 |         phangorn, 
22 |         phytools, 
23 | 		png,
24 |         pryr, 
25 | 		rmarkdown,
26 |         scales,
27 | 		stats
28 | VignetteBuilder: knitr
29 | Encoding: UTF-8
30 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | treeWAS: A Phylogenetic Tree-Based Tool for Genome-Wide Association Studies in Microbes
 2 | Copyright (C) 2017  Caitlin Collins
 3 | 
 4 | 
 5 | This program is free software: you can redistribute it and/or modify
 6 | it under the terms of the GNU General Public License as published by
 7 | the Free Software Foundation, either version 3 of the License, or
 8 | (at your option) any later version.
 9 | 
10 | This program is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | GNU General Public License for more details.
14 | 
15 | You should have received a copy of the GNU General Public License
16 | along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
  1 | # Generated by roxygen2: do not edit by hand
  2 | 
  3 | S3method(print,treeWAS)
  4 | export(.getFixed)
  5 | export(.is.even)
  6 | export(.is.integer0)
  7 | export(.is.odd)
  8 | export(.substrLeft)
  9 | export(.substrRight)
 10 | export(.switch.phen)
 11 | export(asr)
 12 | export(assoc.test)
 13 | export(beeswarmPlot)
 14 | export(coalescent.sim)
 15 | export(coalescent.tree.sim)
 16 | export(fwd.coalescent.sim)
 17 | export(fwd.phen.sim)
 18 | export(fwd.snp.sim)
 19 | export(get.ancestral.pars)
 20 | export(get.assoc.scores)
 21 | export(get.binary.snps)
 22 | export(get.fitch.n.mts)
 23 | export(get.original.loci)
 24 | export(get.score3)
 25 | export(get.sig.snps)
 26 | export(get.tip.order)
 27 | export(get.unique.matrix)
 28 | export(ggplotbg)
 29 | export(heatmap.DNAbin)
 30 | export(keepFirstN)
 31 | export(keepLastN)
 32 | export(manhattan.plot)
 33 | export(memfree)
 34 | export(pair.tests)
 35 | export(phen.sim)
 36 | export(plot_phen)
 37 | export(plot_prob_phen)
 38 | export(plot_sig_snps)
 39 | export(read.CFML)
 40 | export(removeFirstN)
 41 | export(removeLastN)
 42 | export(selectBiallelicSNP)
 43 | export(set.args)
 44 | export(simTest)
 45 | export(simultaneous.test)
 46 | export(simultaneous.test.epi)
 47 | export(snp.sim)
 48 | export(snp.sim.Q)
 49 | export(subsequent.test)
 50 | export(table.matrix)
 51 | export(terminal.test)
 52 | export(terminal.test.epi)
 53 | export(tree.reconstruct)
 54 | export(treeWAS)
 55 | export(write.treeWAS)
 56 | import(adegenet)
 57 | import(ape, except = zoom)
 58 | import(ggplot2)
 59 | importFrom(Hmisc,all.is.numeric)
 60 | importFrom(ade4,dudi.pca)
 61 | importFrom(adegenet,transp)
 62 | importFrom(ape,read.dna)
 63 | importFrom(beeswarm,beeswarm)
 64 | importFrom(grDevices,col2rgb)
 65 | importFrom(grDevices,dev.off)
 66 | importFrom(grDevices,heat.colors)
 67 | importFrom(grDevices,pdf)
 68 | importFrom(grDevices,rgb)
 69 | importFrom(graphics,arrows)
 70 | importFrom(graphics,axis)
 71 | importFrom(graphics,barplot)
 72 | importFrom(graphics,box)
 73 | importFrom(graphics,hist)
 74 | importFrom(graphics,image)
 75 | importFrom(graphics,lines)
 76 | importFrom(graphics,mtext)
 77 | importFrom(graphics,par)
 78 | importFrom(graphics,plot.new)
 79 | importFrom(graphics,points)
 80 | importFrom(graphics,rect)
 81 | importFrom(graphics,text)
 82 | importFrom(graphics,title)
 83 | importFrom(phangorn,acctran)
 84 | importFrom(phangorn,ancestral.pml)
 85 | importFrom(phangorn,as.phyDat)
 86 | importFrom(phangorn,fitch)
 87 | importFrom(phangorn,midpoint)
 88 | importFrom(phangorn,pace)
 89 | importFrom(phangorn,phyDat)
 90 | importFrom(phangorn,pml)
 91 | importFrom(phangorn,pratchet)
 92 | importFrom(phytools,anc.ML)
 93 | importFrom(phytools,fastAnc)
 94 | importFrom(pryr,mem_used)
 95 | importFrom(pryr,object_size)
 96 | importFrom(scales,rescale)
 97 | importFrom(stats,anova)
 98 | importFrom(stats,as.formula)
 99 | importFrom(stats,chisq.test)
100 | importFrom(stats,cor)
101 | importFrom(stats,density)
102 | importFrom(stats,dist)
103 | importFrom(stats,ecdf)
104 | importFrom(stats,fisher.test)
105 | importFrom(stats,ftable)
106 | importFrom(stats,glm)
107 | importFrom(stats,lm)
108 | importFrom(stats,mantelhaen.test)
109 | importFrom(stats,p.adjust)
110 | importFrom(stats,quantile)
111 | importFrom(stats,residuals)
112 | importFrom(stats,rexp)
113 | importFrom(stats,rnorm)
114 | importFrom(stats,rpois)
115 | importFrom(utils,combn)
116 | importFrom(utils,str)
117 | importFrom(utils,write.csv)
118 | importFrom(utils,write.table)
119 | 


--------------------------------------------------------------------------------
/R/coalescent.tree.sim.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #########################
  4 | ## coalescent.tree.sim ##
  5 | #########################
  6 | 
  7 | 
  8 | ########################################################################
  9 | 
 10 | ###################
 11 | ## DOCUMENTATION ##
 12 | ###################
 13 | 
 14 | #' Short one-phrase description.
 15 | #'
 16 | #' Longer proper discription of function...
 17 | #'
 18 | #' @param n.ind An integer specifying the number of terminal nodes desired.
 19 | #' @param seed An optional integer controlling the pseudo-random process underlying the tree generation.
 20 | #'
 21 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com}
 22 | #' @examples
 23 | #'
 24 | #' ## basic use of fn
 25 | #' tree <- coalescent.tree.sim(n.ind = 100, seed = 1)
 26 | #'
 27 | #' ## plot output
 28 | #' plot(tree)
 29 | #'
 30 | #' @rawNamespace import(ape, except = zoom)
 31 | #' @importFrom phangorn midpoint
 32 | #'
 33 | #' @export
 34 | 
 35 | ########################################################################
 36 | #  @useDynLib phangorn, .registration = TRUE
 37 | 
 38 | 
 39 | 
 40 | coalescent.tree.sim <- function(n.ind=100, seed=NULL){
 41 | 
 42 |   if(!is.null(seed)) set.seed(seed)
 43 | 
 44 |   n.nodes <- n.ind + (n.ind-1) # total n.nodes (internal, external)
 45 |   inds <- c(1:n.ind) # terminal nodes
 46 |   nodes <- rev(c((n.ind+1):n.nodes)) # internal nodes
 47 |   tree.params <- list() # to store and update output
 48 | 
 49 | 
 50 |   for(i in 1:(length(inds)-1)){
 51 |     ## get inds.ori from last generation:
 52 |     if(i==1){
 53 |       inds.ori <- inds
 54 |     }else{
 55 |       inds.ori <- tree.params[[(i-1)]][["inds.remaining"]]
 56 |     }
 57 |     ## get N, the number of individuals (remaining at this generation)
 58 |     ## from which a random 2 are to be selected for coalescence
 59 |     N <- length(inds.ori)
 60 | 
 61 |     ###################
 62 |     ## BRANCH LENGTH ##
 63 |     ###################
 64 |     ## get lamda, the parameter of the exponential distribution,
 65 |     ## given the number of individuals at this generation
 66 |     lambda <- (N*(N-1)) / 2
 67 |     ## draw x, the length of time to coalescence at this generation
 68 |     x <- rexp(n=1, rate=lambda)
 69 | 
 70 |     #####################
 71 |     ## COALESCENT PAIR ##
 72 |     #####################
 73 |     ## get co.pair, the 2 inds to coalesce at this generation
 74 |     co.pair <- sample(inds.ori, 2)
 75 |     ## merge these 2 inds, replace with new internal node,
 76 |     ## update the list of inds to sample at the next generation
 77 |     inds.remaining <- c(inds.ori[-which(inds.ori %in% co.pair)], nodes[i])
 78 | 
 79 |     ############
 80 |     ## OUTPUT ##
 81 |     ############
 82 |     ## store the output in the ith element of our list tree.params:
 83 |     tree.params[[i]] <- list()
 84 |     tree.params[[i]][[1]] <- x
 85 |     tree.params[[i]][[2]] <- co.pair
 86 |     tree.params[[i]][[3]] <- inds.ori
 87 |     tree.params[[i]][[4]] <- inds.remaining
 88 |     names(tree.params[[i]]) <- c("Time", "co.pair", "inds.ori", "inds.remaining")
 89 |   } # end for loop
 90 | 
 91 | 
 92 | 
 93 | 
 94 |   ## get edge.list
 95 |   to <- as.vector(unlist(sapply(c(1:length(tree.params)),
 96 |                                 function(e) tree.params[[e]][["co.pair"]])))
 97 |   from <- as.vector(unlist(sapply(c(1:length(nodes)),
 98 |                                   function(e) rep(nodes[e], 2))))
 99 |   edge.list <- data.frame(from,to)
100 | 
101 |   ## get edge lengths
102 |   times <- as.vector(unlist(sapply(c(1:length(tree.params)),
103 |                                    function(e) tree.params[[e]][["Time"]])))
104 | 
105 |   ## make empty edge.lengths vector to store output below:
106 |   edge.lengths <- NA
107 | 
108 |   ## for all the edges in our edge.list data.frame:
109 |   for(i in 1:nrow(edge.list)){
110 |     if(edge.list$to[i] %in% inds){
111 |       ## if downstream node = terminal, sum all time intervals til ancestor.
112 |       edge.lengths[i] <- sum(times[1:which(nodes==edge.list$from[i])])
113 |     }else{
114 |       ## BUT, if the downstream node = internal, must subtract time btw.
115 |       ## downstream node and final generation.
116 |       length.total <- sum(times[1:which(nodes==edge.list$from[i])])
117 |       length.toRemove <- sum(times[1:which(nodes==edge.list$to[i])])
118 |       edge.lengths[i] <- length.total - length.toRemove
119 |     }
120 |   } # end for loop
121 | 
122 |   ## convert edge.list to matrix
123 |   edge.list <- as.matrix(edge.list)
124 |   colnames(edge.list) <- NULL
125 |   dimnames(edge.list) <- NULL
126 | 
127 |   ## put output into tree list (phylo format):
128 |   tree <- list()
129 |   tree$edge <- edge.list
130 |   tree$tip.label <- c(1:n.ind)
131 |   tree$edge.length <- edge.lengths
132 |   tree$Nnode <- as.integer(n.ind - 1)
133 | 
134 |   ## change class by force
135 |   class(tree) <- "phylo"
136 |   ## return tree in pruningwise order:
137 |   tree <- reorder.phylo(tree, order="pruningwise")
138 |   ## root tree:
139 |   if(!is.rooted(tree)) tree <- midpoint(tree)
140 | 
141 |   return(tree)
142 | 
143 | } # end coalescent.tree.sim
144 | 


--------------------------------------------------------------------------------
/R/fitch.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #####################
  3 | ## get.fitch.n.mts ##
  4 | #####################
  5 | ## phangorn-based fitch fn
  6 | 
  7 | ########################################################################
  8 | 
  9 | ###################
 10 | ## DOCUMENTATION ##
 11 | ###################
 12 | 
 13 | #' Caclulate parsimony scores.
 14 | #'
 15 | #' Determine parsimony scores for all genetic loci, or a phenotypic variable, along a given tree.
 16 | #' An extension of the fitch function available in package phangorn.
 17 | #'
 18 | #' @param x A numeric matrix or vector containing two unique values with row.names matching tree tip.labels.
 19 | #' @param tree A phylo object.
 20 | #'
 21 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com}
 22 | #'
 23 | #' @examples
 24 | #' \dontrun{
 25 | #'
 26 | #' ## generate a tree
 27 | #' tree <- ape::rtree(100)
 28 | #' ## generate snps, a matrix of 0s and 1s
 29 | #' snps <- matrix(sample(c(0,1),100000,TRUE), nrow=100)
 30 | #' row.names(snps) <- tree$tip.label
 31 | #'
 32 | #' ## run function
 33 | #' out <- get.fitch.n.mts(x=snps, tree)
 34 | #'
 35 | #' ## examine output
 36 | #' str(out)
 37 | #' table(out)
 38 | #' hist(out)
 39 | #' }
 40 | #'
 41 | #' @importFrom phangorn fitch
 42 | #' @importFrom phangorn as.phyDat
 43 | #'
 44 | #' @export
 45 | 
 46 | ########################################################################
 47 | # @useDynLib phangorn, .registration = TRUE
 48 | 
 49 | 
 50 | get.fitch.n.mts <- function(x, tree, snps=NULL){
 51 | 
 52 |   ## load packages
 53 |   # require(phangorn)
 54 | 
 55 |   ## Re-coding snps as x (to allow for phen/vectors).
 56 |   ## --> snps now deprecated:
 57 |   X <- NULL
 58 |   if(!missing(x)){
 59 |     X <- x
 60 |     if(!is.null(snps) & !is.null(x)){
 61 |       warning("As 'x' is specified, we ignore the 'snps' argument. \n
 62 |               (In get.fitch.n.mts the 'snps' argument has now been replaced by an argument named 'x'.)")
 63 |     }
 64 |   }else{
 65 |     if(!is.null(snps)){
 66 |       X <- snps
 67 |     }
 68 |   }
 69 |   ## If ONE of x or snps was specified, continue; else, stop:
 70 |   if(!is.null(X)){
 71 |     x <- X
 72 |   }else{
 73 |     stop("'x' must be specified.")
 74 |   }
 75 | 
 76 |   ## checks
 77 |   ## do not include NA as a level:
 78 |   levs <- unique(as.vector(x[!is.na(x)]))
 79 |   if((!is.numeric(x) & !is.logical(x)) | length(levs[!is.na(levs)])!=2){
 80 |     stop("x must be a numeric matrix or vector, with two unique values, excluding NAs
 81 |          (though we recommend that NAs be in the minority for each column).\n")
 82 |   }
 83 |   # levs <- unique(as.vector(x))
 84 |   if(any(is.na(levs))){
 85 |     if(is.matrix(x)){
 86 |       nnas <- sapply(c(1:ncol(x)), function(e) length(which(is.na(x[,e])))/nrow(x))
 87 |       toRemove <- which(nnas > 0.5)
 88 |       if(length(toRemove) > 0){
 89 |         cat(length(toRemove), "snps columns are over 50% NAs.
 90 |             You may want to remove these columns as they are unlikely to be significant
 91 |             and can generate inappropriate inferences during ancestral state reconstruction.\n")
 92 |       }
 93 |     }else{
 94 |       nnas <- length(which(is.na(x)))/length(x)
 95 |       # toRemove <- which(nnas > 0.5)
 96 |       if(nnas > 0.5){
 97 |         cat("x is over 50% NAs.
 98 |             This may generate inappropriate inferences during ancestral state reconstruction.\n")
 99 |       }
100 |     }
101 |   }
102 | 
103 |   x.levels <- sort(levs, na.last = TRUE)
104 |   ## returns only unique patterns...
105 |   ## *use levels=states (eg. c(0,1)), but keep NAs in x and use ambiguity=NA
106 |   ## to allow NAs without counting them twd parsimony score values.
107 |   x.phyDat <- phangorn::as.phyDat(as.matrix(x),
108 |                            type="USER", levels=x.levels, ambiguity=NA)
109 |   ## get index of all original x columns to map to unique pattern
110 |   index <- attr(x.phyDat, "index")
111 | 
112 |   ## get parsimony score for all unique patterns in x
113 |   ## NB: For phangorn::fitch, x data must be of class phyDat
114 |   fitch.unique <- phangorn::fitch(tree, x.phyDat, site="site")
115 |   # table(fitch.unique)
116 | 
117 |   ## get score for all original sites
118 |   fitch.complete <- fitch.unique[index]
119 |   return(fitch.complete)
120 | } # end get.fitch.n.mts
121 | 
122 | 
123 | 


--------------------------------------------------------------------------------
/R/fwd.coalescent.sim.R:
--------------------------------------------------------------------------------
  1 | 
  2 | ########################
  3 | ## fwd.coalescent.sim ##
  4 | ########################
  5 | 
  6 | ## a function for simulating trees under a fully-linked coalescent model.
  7 | ## optional simulation of a phenotype and phenotypically-associated SNPs is implemented.
  8 | ## optional use of a distribution to guide the substitution rate of the non-associated SNPs is implemented.
  9 | 
 10 | ## TO DO:
 11 | ## 1) (Re-)implement associated SNP randomization procedure...
 12 | ## want to implement procedures that combine the above options...
 13 | ## 2) Allow phenotypically-associated SNPs simulation to be optionally guided
 14 | ## by a user-inputted phenotype for the terminal nodes (--> would need to simulate
 15 | ## phenotypic substitutions from the terminal nodes UP to the root, the reverse
 16 | ## of the current procedure...)
 17 | ## 3) Implement assoc.options (currently using deprecated "all" option without requiring argument,
 18 | ## but would like to consider implementing alternative "model" option(s))
 19 | 
 20 | 
 21 | ## ARGUMENTS ##
 22 | # n.ind <- 10 # n.genomes you want to end up with
 23 | # gen.size <- 1000000 # bases
 24 | # theta <- gen.size*2 # (if sim.by=="branch")# OR # 1*2 # (if sim.by=="locus")
 25 | # biallelic <- TRUE # if TRUE, select ONLY complementary nt; if FALSE,
 26 | #                select from 3 alternatives (ie. A/C/G/T-current nt)
 27 | # seed <- 1 # allow user to control randomization to get reproducible results.
 28 | # n.snps.assoc <- 5
 29 | # assoc.option <- c("all", "model") # deprecated (only "all" available)
 30 | # sim.by <- c("locus", "branch") # deprecated (only "locus" has all current protocols implemented)
 31 | 
 32 | 
 33 | ## EXAMPLE ##
 34 | # out <- coalescent.sim(n.ind=100, gen.size=10000, sim.by="locus",
 35 | #                       theta=1*2, dist=NULL,
 36 | #                       theta_p=15, phen=NULL,
 37 | #                       n.snps.assoc=20, assoc.option="all", assoc.prob=90,
 38 | #                       haploid=TRUE, biallelic=TRUE, seed=1,
 39 | #                       plot=TRUE, heatmap=FALSE, plot2="UPGMA")
 40 | 
 41 | ########################################################################
 42 | 
 43 | ###################
 44 | ## DOCUMENTATION ##
 45 | ###################
 46 | 
 47 | #' Short one-phrase description.
 48 | #'
 49 | #' Longer proper discription of function...
 50 | #'
 51 | #' @param n.ind An integer specifying the number of individual genomes to simulate
 52 | #' (ie. the number of terminal nodes in the tree).
 53 | #' @param n.snps An integer specifying the number of genetic loci to simulate.
 54 | #' @param n.subs Either an integer or a vector (containing a distribution) that is
 55 | #' used to determine the number of substitutions
 56 | #' to occur on the phylogenetic tree for each genetic locus (see details).
 57 | #' @param n.snps.assoc An optional integer specifying the number of genetic loci
 58 | #' @param assoc.prob An optional integer (> 0, <= 100) specifying the strength of the
 59 | #' association between the n.snps.assoc loci and the phenotype (see details).
 60 | #' @param n.phen.subs An integer specifying the expected number of phenotypic
 61 | #' substitutions to occur on the phylogenetic tree (through the same process as
 62 | #' the n.subs parameter when n.subs is an integer (see details)).
 63 | #' @param phen An optional vector containing a phenotype for each of the
 64 | #' n.ind individuals if no phenotypic simulation is desired.
 65 | #' @param heatmap A logical indicating whether to produce a heatmap of the genetic distance
 66 | #' between the simulated genomes of the n.ind individuals.
 67 | #' @param reconstruct Either a logical indicating whether to attempt to reconstruct
 68 | #' a phylogenetic tree using the simulated genetic data, or one of c("UPGMA", "nj", "ml")
 69 | #' to specify that tree reconstruction is desired by one of these three methods
 70 | #' (Unweighted Pair Group Method with Arithmetic Mean, Neighbour-Joining, Maximum-Likelihood).
 71 | #' @param seed An optional integer controlling the pseudo-random process of simulation. Two
 72 | #' instances of coalescent.sim with the same seed and arguments will produce identical output.
 73 | #'
 74 | #' @details #### n.subs ####
 75 | #' If the value of the n.subs parameter is set to an integer, this integer is
 76 | #' used as the parameter of a Poisson distribution from which the number of substitutions to
 77 | #' occur on the phylogenetic tree is drawn for each of the n.snps simulated genetic loci.
 78 | #' If n.subs is a vector containing a distribution, this is used directly (in proportion to n.snps)
 79 | #' to define the number of substitutions per site. For example, if n.subs=c(3000, 900, 70, 20, 0, 10)
 80 | #' and n.snps=8000, then 6000 simulated sites will undergo exactly
 81 | #' one substitution somewhere on the phylogenetic tree, 1800 will undergo two,
 82 | #' 140 three, 40 four, 0 five, and 20 six.
 83 | #' #### assoc.prob ####
 84 | #' The assoc.prob parameter controls the strength of association through a process analagous to dilution.
 85 | #' All n.snps.assoc loci are initially simulated to undergo a substitution
 86 | #' every time the phenotype undergoes a substitution (ie. perfect association).
 87 | #' The assoc.prob parameter then acts like a dilution factor, removing (100 - assoc.prob)%
 88 | #' of the substitutions that occurred during simulation under perfect association.
 89 | #'
 90 | #'
 91 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com}
 92 | #' @export
 93 | #'
 94 | #' @import adegenet
 95 | #' @rawNamespace import(ape, except = zoom)
 96 | 
 97 | ########################################################################
 98 | 
 99 | ############
100 | ## NOTES: ##
101 | ############
102 | ## theta_p changed to n.phen.subs (and just n.subs in phen.sim.R)
103 | 
104 | 
105 | 
106 | fwd.coalescent.sim <- function(n.ind=100,
107 |                                n.snps=10000, n.subs=1,
108 |                                n.snps.assoc=10, n.subs.assoc=15,
109 |                                p=1,
110 |                                heatmap=FALSE, reconstruct=FALSE,
111 |                                dist.dna.model="JC69",
112 |                                seed=1){
113 |   ## load packages:
114 |   # require(adegenet)
115 |   # require(ape)
116 | 
117 |   if(length(which(c(plot, heatmap, reconstruct)==TRUE))==1){
118 |     par(ask=FALSE)
119 |   }else{
120 |     par(ask=TRUE)
121 |   }
122 | 
123 |   ################################
124 |   ## Simulate Phylogenetic Tree ##
125 |   ################################
126 |   tree <- coalescent.tree.sim(n.ind = n.ind, seed = seed)
127 | 
128 |   ###################
129 |   ## Simulate SNPs ##
130 |   ###################
131 |   snps.list <- fwd.snp.sim(n.snps=n.snps, n.subs=n.subs,
132 |                            n.snps.assoc=n.snps.assoc, n.subs.assoc=n.subs.assoc,
133 |                            tree=tree,
134 |                            heatmap=heatmap, reconstruct=reconstruct,
135 |                            dist.dna.model=dist.dna.model,
136 |                            seed=seed)
137 |   snps <- snps.list$snps
138 |   snps.assoc <- snps.list$snps.assoc
139 | 
140 |   ########################
141 |   ## Simulate Phenotype ##
142 |   ########################
143 |   phen <- fwd.phen.sim(tree, snps.assoc=snps[,snps.assoc], p=p)
144 | 
145 |   ################
146 |   ## Get Output ##
147 |   ################
148 |   out <- list(snps, snps.assoc, phen, tree)
149 |   names(out) <- c("snps", "snps.assoc", "phen", "tree")
150 |   return(out)
151 | 
152 | } # end fwd.coalescent.sim
153 | 


--------------------------------------------------------------------------------
/R/fwd.phen.sim.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | ##################
  4 | ## fwd.phen.sim ##
  5 | ##################
  6 | 
  7 | ## TO DO ##
  8 | ## CAREFUL--phen.sim seems not to be working with trees other than those
  9 | ## produced with your coalescent.tree.sim fn (eg. rtree(100))!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 10 | 
 11 | 
 12 | ########################################################################
 13 | 
 14 | ###################
 15 | ## DOCUMENTATION ##
 16 | ###################
 17 | 
 18 | #' Simulate a phenotype, from root to tips.
 19 | #'
 20 | #' [*An exploratory function:*] Having already simulated a genotype,
 21 | #' this function allows you to simulate an associated phenotype along the tree, from root to tips.
 22 | #'
 23 | #' @param snps.assoc A matrix created by the \code{fwd.snp.sim} function,
 24 | #' which indicates where genotypic substitutions occur on the tree at phenoypically-associated sites.
 25 | #' @param p An integer specifying the probability of phenotypic substition,
 26 | #' given genotypic substitution (see details).
 27 | #' @param tree An phylo object.
 28 | #'
 29 | #' @details The parameter \code{p} controls the simulation of the phenotype by specifying
 30 | #' the expected value of the number of phenotypic substitions to occur on the tree provided,
 31 | #' given that a genotypic substitution has occurred on a particular branch of the tree.
 32 | #'
 33 | #'
 34 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com}
 35 | #' @examples
 36 | #'
 37 | #' ## basic use of fn
 38 | #' tree <- coalescent.tree.sim(n.ind = 100, seed = 1)
 39 | #'
 40 | #' ## plot output
 41 | #' plot(tree)
 42 | #'
 43 | #' @export
 44 | 
 45 | ########################################################################
 46 | 
 47 | ## TO DO: ##
 48 | ## Add arg continuous=FALSE --> continuous phen sim.
 49 | ## Implement ASR for the snps.assoc only --> get phen for internal nodes too.
 50 | 
 51 | 
 52 | ## OPTIONS: ##
 53 | ## Cumulative probability (eg. if 7/10 SNPs, 70% chance of phen)--may allow for lots of noise...
 54 | ## Threshold (eg. must have 7/10 SNPs to have phen)--will allow for lots of noise.
 55 | ## Specific combinations (eg. Must have SNPs 1&2 OR 3&4)--will be very hard for treeWAS.
 56 | ## Combination of above
 57 | 
 58 | fwd.phen.sim <- function(snps.assoc, p=1, tree=NULL){
 59 | 
 60 | 
 61 |   n.snps.assoc <- sapply(c(1:nrow(snps.assoc)),
 62 |                             function(e)
 63 |                               length(which(snps.assoc[e,] == 1)))
 64 |   ####################
 65 |   ## .get.phen.prob ##
 66 |   ####################
 67 |   .get.phen.prob <- function(n.snps.assoc, p){
 68 |     if(p == 1){
 69 |       ys <- n.snps.assoc/ncol(snps.assoc)
 70 |     }else{
 71 |       ys <- (1-p^n.snps.assoc)/(1-p^ncol(snps.assoc))
 72 |     }
 73 |     return(ys)
 74 |   } # end .get.phen.prob
 75 | 
 76 |   phen.prob <- sapply(c(1:length(n.snps.assoc)),
 77 |                       function(e)
 78 |                         .get.phen.prob(n.snps.assoc[e], p))
 79 |   phen <- as.factor(
 80 |                 sapply(c(1:length(phen.prob)),
 81 |                  function(e)
 82 |                    sample(c("A", "B"),
 83 |                           size=1,
 84 |                           replace=TRUE,
 85 |                           prob=c(phen.prob[e], 1-phen.prob[e]))))
 86 | 
 87 |   #   ###############
 88 |   #   ## HISTOGRAM ##
 89 |   #   ###############
 90 |   #   hist(.get.phen.prob(n.snps.assoc=n.snps.assoc, p=p),
 91 |   #        breaks=10, col="blue", xlim=c(0,1),
 92 |   #        main=paste("Histogram of Pr(phen)
 93 |   #                   \n p = ", p, sep=""))
 94 |   #
 95 |   #   ################
 96 |   #   ## PROB CURVE ##
 97 |   #   ################
 98 |   #   plot_prob_phen(p=p, n.snps.assoc=ncol(snps.assoc))
 99 | 
100 | 
101 |   ###############
102 |   ## w p = 0.6 ##
103 |   ###############
104 | 
105 |   ###########
106 |   ## TABLE ##
107 |   ###########
108 |   #table(phen)
109 |   #   A  B
110 |   #   95  5
111 | 
112 |   #########################
113 |   ## CORRELATION (SCORE) ##
114 |   #########################
115 |   #abs(corr.dat[snps.assoc])
116 |   #0.08 0.20 0.04 0.18 0.26 0.20 0.28 0.40 0.36 0.08
117 | 
118 | 
119 |   ###############
120 |   ## w p = 0.8 ##
121 |   ###############
122 | 
123 |   ###########
124 |   ## TABLE ##
125 |   ###########
126 |   #table(phen)
127 |   #   A  B
128 |   #   75 25
129 | 
130 |   #########################
131 |   ## CORRELATION (SCORE) ##
132 |   #########################
133 |   #abs(corr.dat[snps.assoc])
134 |   #0.16 0.08 0.08 0.14 0.10 0.12 0.36 0.24 0.32 0.08
135 | 
136 |   #############
137 |   ## w p = 1 ##
138 |   #############
139 | 
140 |   ###########
141 |   ## TABLE ##
142 |   ###########
143 |   #table(phen)
144 |   #   A  B
145 |   #   49 51
146 | 
147 |   #########################
148 |   ## CORRELATION (SCORE) ##
149 |   #########################
150 |   #abs(corr.dat[snps.assoc])
151 |   #0.04 0.04 0.12 0.02 0.02 0.08 0.00 0.04 0.08 0.12
152 | 
153 |   ###############
154 |   ## w p = 1.2 ##
155 |   ###############
156 | 
157 |   ###########
158 |   ## TABLE ##
159 |   ###########
160 |   #table(phen)
161 |   #   A  B
162 |   #   37 63
163 | 
164 |   #########################
165 |   ## CORRELATION (SCORE) ##
166 |   #########################
167 |   #abs(corr.dat[snps.assoc])
168 |   #0.08 0.00 0.12 0.10 0.06 0.04 0.04 0.28 0.16 0.04
169 | 
170 | 
171 |   return(phen)
172 | 
173 | } # end fwd.phen.sim
174 | 
175 | 
176 | 


--------------------------------------------------------------------------------
/R/fwd.plot.prob.phen.R:
--------------------------------------------------------------------------------
 1 | 
 2 | ####################
 3 | ## plot_prob_phen ##
 4 | ####################
 5 | 
 6 | ########################################################################
 7 | 
 8 | ###################
 9 | ## DOCUMENTATION ##
10 | ###################
11 | 
12 | #' Plot the probability of association, given \code{p} and \code{n.snps.assoc}.
13 | #'
14 | #' [*For use with the 'fwd.-.sim' functions:*]
15 | #' Plot the cumulative probability of association (Pr(phen=1)), with a given value of \code{p},
16 | #' as the number of associated sites (SNPi=1) increases from i=0 to i=\code{n.snps.assoc}.
17 | #'
18 | #' @param p A numeric value indicating the probability of substitution, at each site, along the tree.
19 | #' @param n.snps.assoc An integer specifying the number of genetic loci that are associated with the phenotype.
20 | #'
21 | #'
22 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com}
23 | #' @examples
24 | #' \dontrun{
25 | #' ## basic use of fn ##
26 | #' ## compare probability of having phenotype with 10 SNPs at varying p:
27 | #' plot_prob_phen(p=0.8, n.snps.assoc=10)
28 | #' plot_prob_phen(p=0.5, n.snps.assoc=10)
29 | #' plot_prob_phen(p=0.2, n.snps.assoc=10)
30 | #' }
31 | #' @export
32 | 
33 | ########################################################################
34 | 
35 | 
36 | plot_prob_phen <- function(p=0.5, n.snps.assoc=10){
37 | 
38 |   xs <- 0:n.snps.assoc
39 |   if(p == 1){
40 |     ys <- xs/n.snps.assoc
41 |   }else{
42 |     ys <- (1-p^xs)/(1-p^10)
43 |   }
44 | 
45 |   ## plot ##
46 |   plot(xs,ys,xlim=c(0,10),ylim=c(0,1),
47 |        main=paste("p = ", p, sep=""),
48 |        xlab="Number of associated sites in state 1",
49 |        ylab="Cumulative probability of association")
50 | 
51 | } # end plot_prob_phen
52 | 
53 | 
54 | 
55 | 
56 | #################################
57 | ##  ENABLE ALTERNATE FN NAME:  ##
58 | #################################
59 | # plot.prob.phen <- function(p, n.snps.assoc, ...){
60 | #   return(plot_prob_phen(p, n.snps.assoc,  ...))
61 | # } # end plot.prob.phen
62 | 
63 | 
64 | 
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------
/R/heatmap.DNAbin.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ####################
 4 | ## heatmap.DNAbin ##
 5 | ####################
 6 | 
 7 | ########################################################################
 8 | 
 9 | ###################
10 | ## DOCUMENTATION ##
11 | ###################
12 | 
13 | #' Short one-phrase description.
14 | #'
15 | #' Longer proper discription of function...
16 | #'
17 | #' @param dna A DNAbin object.
18 | #' @param dist.dna.model A character string specifying the type of model to use in
19 | #' calculating the genetic distance between individual genomes (see ?dist.dna).
20 | #'
21 | #'
22 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com}
23 | #'
24 | #'
25 | #' @rawNamespace import(ape, except = zoom)
26 | #' @export
27 | 
28 | ########################################################################
29 | 
30 | heatmap.DNAbin <- function(dna, dist.dna.model="JC69"){
31 | 
32 |   # require(ape)
33 | 
34 |   if(!"DNAbin" %in% class(dna)) dna <- as.DNAbin(dna)
35 | 
36 |   #############
37 |   ## HEATMAP ##
38 |   #############
39 |   ## get a distance matrix between the genomes
40 |   D <- dist.dna(dna, model = dist.dna.model)
41 | 
42 |   mat <- t(as.matrix(D))
43 |   mat <- mat[,ncol(mat):1]
44 |   par(mar=c(1,5,5,1))
45 |   image(x=1:ncol(mat), y=1:ncol(mat), mat,
46 |         col=rev(heat.colors(100)),
47 |         xaxt="n", yaxt="n", xlab="", ylab="")
48 |   axis(side=2, at=c(1:ncol(mat)),
49 |        labels=rev(names(dna)), las=2, cex.axis=1)
50 |   axis(side=3, at=c(1:ncol(mat)),
51 |        labels=names(dna), las=1, cex.axis=1)
52 |   ## return margin parameter to default:
53 |   par(mar=c(5,4,4,2)+0.1)
54 | 
55 | } # end heatmap.DNAbin
56 | 


--------------------------------------------------------------------------------
/R/pair.tests.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | ################
 5 | ## pair.tests ##
 6 | ################
 7 | 
 8 | ########################################################################
 9 | 
10 | ###################
11 | ## DOCUMENTATION ##
12 | ###################
13 | 
14 | #' Pairwise tests for categorical phenotypes
15 | #'
16 | #' Internal function to calculate treeWAS 
17 | #' terminal, simultaneous, subsequent tests, 
18 | #' and chi-squared p-values for a given snp across pairs of
19 | #' phenotype levels.  
20 | #'
21 | #' @param x A contingency table (snps[,i] x phen) for score 1 (\code{terminal.test} 
22 | #'          with \code{correct.prop = TRUE}, \code{categorical = TRUE}).
23 | #' @param y A vector of values containing pairwise score 2 (\code{simultaneous.test} 
24 | #'          with \code{categorical = TRUE}) results for snps[,i].
25 | #' @param z A contingency table (snps.rec[,i] x phen.rec) for score 3 (\code{subsequent.test} 
26 | #'          with \code{correct.prop = TRUE}, \code{categorical = TRUE}).
27 | #'
28 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com}
29 | #' @export
30 | #' @examples
31 | #' ## Example ##
32 | #' \dontrun{
33 | #' ## basic use of fn
34 | #' out <- pair.tests(x, y, z)
35 | #' }
36 | #' 
37 | #' @importFrom stats chisq.test
38 | #'
39 | 
40 | ########################################################################
41 | 
42 | 
43 | pair.tests <- function (x, y, z, 
44 |                         method = "bonf", digits = 3){
45 |   n <- nrow(x)
46 |   N <- n * (n - 1)/2
47 |   df <- data.frame(phen.pair = rep("A", N), stringsAsFactors = FALSE)
48 |   p.chisq <- rep(NA, N)
49 |   phi <- rep(NA, N)
50 |   phi.rec <- rep(NA, N)
51 |   k <- 0
52 |   
53 |   for (a in 1:(n - 1)) {
54 |     for (b in (a + 1):n) {
55 |       k <- k + 1
56 |       ## Get phen pair:
57 |       nom.a <- as.character(rownames(x)[a])
58 |       nom.b <- as.character(rownames(x)[b])
59 |       mat <- matrix(c(x[a, ], x[b, ]), nrow = 2, byrow = TRUE)
60 |       mat.rec <- matrix(c(z[a, ], z[b, ]), nrow = 2, byrow = TRUE)
61 |       df$phen.pair[k] <- paste0(nom.a, " : ", nom.b)
62 |       ## Calculate scores 1, 3, chisq.p values:
63 |       x2 <- suppressWarnings(chisq.test(mat, correct=FALSE))
64 |       x2.rec <- suppressWarnings(chisq.test(mat.rec, correct=FALSE))
65 |       p.chisq[k] <- signif(x2$p.value, digits = digits)
66 |       phi[k] <- signif(sqrt(x2$statistic/sum(mat)), digits = digits)
67 |       phi.rec[k] <- signif(sqrt(x2.rec$statistic/sum(mat.rec)), digits = digits)
68 |     } # end for (b) loop
69 |   } # end for (a) loop
70 |   
71 |   ## Reorder pairwise score 2:
72 |   ox <- rep(NA, length(y))
73 |   for(pp in 1:length(y)){
74 |     noms.pp <- strsplit(names(y)[pp], " : ")[[1]]
75 |     ox[pp] <- which(sapply(c(1:nrow(df)), 
76 |                            function(e) 
77 |                              all(strsplit(df$phen.pair[e], " : ")[[1]] %in% noms.pp)))
78 |   } # end for (pp) loop
79 |   
80 |   df$terminal <- phi
81 |   df$simultaneous <- y[ox]
82 |   df$subsequent <- phi.rec
83 |   df$p.chisq <- p.chisq
84 |   df$p.adj.chisq <- signif(p.adjust(df$p.chisq, method = method), 
85 |                            digits = digits)
86 |   return(df)
87 |   
88 | } # end pair.tests
89 | 
90 | 
91 | ## eg. output:
92 | # PT[[snps.sig[j]]]
93 | #         phen.pair terminal simultaneous subsequent  p.chisq p.adj.chisq
94 | # 1   chicken : cow    0.876            9      0.852 1.62e-12    4.86e-12
95 | # 2 chicken : human    0.435           -2      0.497 4.53e-04    1.36e-03
96 | # 3     cow : human    0.530            3      0.417 9.25e-06    2.77e-05
97 | 


--------------------------------------------------------------------------------
/R/phen.sim.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | ##############
  4 | ## phen.sim ##
  5 | ##############
  6 | 
  7 | ## TO DO ##
  8 | ## CAREFUL--phen.sim seems not to be working with trees other than those
  9 | ## produced with your coalescent.tree.sim fn (eg. rtree(100))!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 10 | 
 11 | 
 12 | ########################################################################
 13 | 
 14 | ###################
 15 | ## DOCUMENTATION ##
 16 | ###################
 17 | 
 18 | #' Short one-phrase description.
 19 | #'
 20 | #' Longer proper discription of function...
 21 | #'
 22 | #' @param tree An phylo object.
 23 | #' @param n.subs An integer controlling the phenotypic substition rate (see details).
 24 | #' @param grp.min An optional numeric value < 0.5 specifying the minimum accepted proportion of terminal nodes
 25 | #' to be in the minor phenotypic group. It may be useful to specify a \code{grp.min} of,
 26 | #' for example, 0.2 (the default) to prevent excessive imbalance in the phenotypic group sizes. However,
 27 | #' it is important to note that (at least for the time being) \code{grp.min} values closer to
 28 | #' 0.5 are likely to cause the computational time of \code{phen.sim} to increase substantially,
 29 | #' as the function will run until acceptable group sizes are randomly generated.
 30 | #' @param seed An optional integer used to set the seed and control the pseudo-random process used in
 31 | #' \code{phen.sim}, enabling the repeatable regeneration of identical output.
 32 | #'
 33 | #' @description The parameter n.subs controls the simulation of the phenotype by specifying
 34 | #' the expected value of the number of phenotypic substitions to occur on the tree provided.
 35 | #' The true number of phenotypic substitions is drawn from a Poisson distribution with parameter n.subs.
 36 | #'
 37 | #'
 38 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com}
 39 | #'
 40 | #' @examples
 41 | #'
 42 | #' ## basic use of fn
 43 | #' tree <- coalescent.tree.sim(n.ind = 100, seed = 1)
 44 | #'
 45 | #' ## plot output
 46 | #' plot(tree)
 47 | #'
 48 | #' @importFrom phangorn midpoint
 49 | #'
 50 | #' @export
 51 | 
 52 | ########################################################################
 53 | # @useDynLib phangorn, .registration = TRUE
 54 | 
 55 | phen.sim <- function(tree,
 56 |                      n.subs = 15,
 57 |                      grp.min = 0.2,
 58 |                      # coaltree = TRUE,
 59 |                      n.subs.var=TRUE, # simulate approximately n.subs subs
 60 |                      seed = NULL){
 61 | 
 62 |   if(!is.null(seed)) set.seed(seed)
 63 | 
 64 |   ## HANDLE TREE: ##
 65 |   ## Always work with trees in "pruningwise" order:
 66 |   tree <- reorder.phylo(tree, order="pruningwise")
 67 |   ## Trees must be rooted:
 68 |   if(!is.rooted(tree)) tree <- midpoint(tree)
 69 | 
 70 |   ####################################
 71 |   ## PHENOTYPE simulation procedure ## ~ sim.by.locus...
 72 |   ####################################
 73 | 
 74 |   ## simulate phenotype for root individual:
 75 |   if(!is.null(n.subs)){
 76 |     phen.root <- "A"
 77 |   }else{
 78 |     phen.root <- NULL
 79 |   }
 80 | 
 81 |   ## store the inputted desired number of phenotypic substitutions
 82 |   n.phen.subs <- n.subs
 83 | 
 84 |   ## make dummy variables in which to store the resulting n.mts variables:
 85 |   lambda_p <- n.subs <- NA
 86 | 
 87 |   ## ensure phen variables start as NULL
 88 |   phen.branch <- phen.nodes <- phen.leaves <- NULL
 89 | 
 90 | 
 91 | 
 92 |   #############################################################
 93 |   ## If the user has specified a "mt" rate for the phenotype ##
 94 |   #############################################################
 95 | 
 96 |   ## (indicating that they want to generate a NEW phenotype for the tree provided)
 97 |   if(!is.null(n.phen.subs)){
 98 | 
 99 |     ## START WHILE LOOP HERE ###########
100 | 
101 |     toRepeat <- TRUE
102 | 
103 |     while(toRepeat == TRUE){
104 | 
105 |       ## draw the number of substitutions to occur:
106 |       ## draw an approximate n.subs given input n.subs
107 |       ## (eg to get a distribution around n.subs over multiple sims):
108 |       if(n.subs.var == TRUE){
109 |         n.subs <- rpois(n=1, lambda=n.phen.subs)
110 |         ## if n.subs==0 or ==1, re-sample
111 |         while(n.subs <= 1){
112 |           n.subs <- rpois(n=1, lambda=n.phen.subs)
113 |         }
114 | 
115 |       }else{
116 |         ## or draw exactly n.subs as input:
117 |         n.subs <- n.phen.subs
118 |       }
119 | 
120 |       ## draw the branches to which you will assign the
121 |       ## n.subs to occur for the phenotype (~ branch length):
122 |       phen.loci <- sample(c(1:length(tree$edge.length)),
123 |                           n.subs, replace=FALSE, prob=tree$edge.length)
124 |       ## rearrange phen.loci
125 |       phen.loci <- sort(phen.loci, decreasing=TRUE)
126 | 
127 | 
128 | 
129 |       ###############################
130 |       ## For Loop to get PHENOTYPE ##
131 |       ###############################
132 |       ## get phenotype for all branches/ nodes in tree
133 |       ## (from root node (ie. tree$edge[nrow(tree$edge), 1]) down):
134 |       phen.nodes <- phen.branch <- list()
135 | 
136 |       ## set phenotype for all branches and nodes to be phen.root:
137 |       phen.branch[1:length(tree$edge.length)] <- phen.root
138 |       names(phen.branch) <- paste("e", c(1:length(phen.branch)), sep=".")
139 | 
140 |       phen.nodes[1:length(unique(as.vector(unlist(tree$edge))))] <- phen.root
141 |       names(phen.nodes) <- paste("n", c(1:length(phen.nodes)), sep=".")
142 | 
143 |       #############################################################################
144 | 
145 |       #############################################################################
146 | 
147 |       ## get the node INDICES for all individuals (terminal and internal)
148 |       all.inds <- sort(unique(as.vector(unlist(tree$edge)))) # 1:(n.ind*2 - 1)
149 | 
150 |       ####################################################################
151 |       ############################
152 |       ## Get Anc-Des EDGE ORDER ##
153 |       ############################
154 |       ## Get sequence from lowest ("root", Nterm+1) to highest ancestral node:
155 |       ix <- c(min(tree$edge[,1]):max(tree$edge[,1]))
156 |       ## Get for loop index of rows in tree$edge[,1], in pairs, from lowest to highest:
157 |       x <- as.vector(unlist(sapply(c(1:length(ix)), function(e) which(tree$edge[,1] == ix[e]))))
158 |       ####################################################################
159 | 
160 | 
161 | 
162 |       ## get phen of nodes
163 |       for(i in 1:length(x)){
164 |         if(x[i] %in% phen.loci){
165 |           phen.nodes[[tree$edge[x[i],2]]] <- .switch.phen(phen.nodes[[tree$edge[x[i],1]]])
166 |         }else{
167 |           ## if no phen subs occur on branch i, set phen of
168 |           ## downstream individual to be equal to ancestor's
169 |           phen.nodes[[tree$edge[x[i],2]]] <- phen.nodes[[tree$edge[x[i], 1]]]
170 |         }
171 |       } # end for loop
172 | 
173 |       ## get phen of TERMINAL nodes (leaves)
174 |       # n.ind <- tree$Nnode+1
175 |       n.ind <- min(tree$edge[,1])-1
176 |       phen.leaves <- as.factor(as.vector(unlist(phen.nodes[c(1:n.ind)])))
177 | 
178 |       ## Assign names to phen as tree$tip.labs in original order ##
179 |       ## If checks fail, individuals in phen.leaves will be named 1:N (not ideal)
180 |       names(phen.leaves) <- c(1:length(phen.leaves))
181 |       ## Check that tip.labs are not NULL:
182 |       if(is.null(tree$tip.label)){
183 |         warning("tree$tip.label was NULL.
184 |                 Assigning individuals names 1:N. Note that these may NOT match sequence labels!")
185 |       }else{
186 |         ## Check that tip.labs is of correct length:
187 |         if(length(tree$tip.label) != length(phen.leaves)){
188 |           warning("The length of tree$tip.label did not match
189 |                   the number of terminal node phenotypes simulated.
190 |                   Assigning individuals names 1:N. Note that these may NOT match sequence labels!")
191 |         }else{
192 |           ## If checks passed, assign tip.labs to be names of phen.leaves:
193 |           names(phen.leaves) <- tree$tip.label
194 |         }
195 |       }
196 | 
197 |       ## CHECK THAT MIN GRP.SIZE >= THRESHOLD ##
198 |       if(!is.null(grp.min)){
199 |         tab <- table(phen.leaves)
200 |         grp.thresh <- (tree$Nnode+1)*grp.min
201 |         if(min(tab) < grp.thresh){
202 |           toRepeat <- TRUE
203 |         }else{
204 |           toRepeat <- FALSE
205 |         }
206 |       }else{
207 |         toRepeat <- FALSE
208 |       }
209 | 
210 |     } # end WHILE LOOP #########
211 | 
212 | 
213 |     ## get phen of branches
214 |     for(i in 1:length(x)){
215 |       ## Branches with ONE phenotype get labelled by that phenotype:
216 |       if(length(unique(phen.nodes[tree$edge[x[i],]])) == 1){
217 |         if("A" %in% phen.nodes[tree$edge[x[i],]]){
218 |           phen.branch[[x[i]]] <- "A"
219 |         }else{
220 |           phen.branch[[x[i]]] <- "B"
221 |         }
222 |       }else{
223 |         ## Branches with TWO phenotypes get labelled as such, in ORDER:
224 |         temp <- as.vector(unlist(phen.nodes[tree$edge[x[i],]]))
225 |         if(temp[1] == "A"){
226 |           phen.branch[[x[i]]] <- c("A", "B")
227 |         }else{
228 |           phen.branch[[x[i]]] <- c("B", "A")
229 |         }
230 |       }
231 |     } # end for loop
232 | 
233 |   } ## end PHEN sim procedure...
234 | 
235 |   ## convert phen.nodes to factor
236 |   phen.nodes <- as.factor(as.vector(unlist(phen.nodes)))
237 | 
238 | 
239 |   ## Assign names to phen.nodes ##
240 |   ## ... as c(tree$tip.labs, tree$node.labs) in original order:
241 |   ## If checks fail, individuals in phen.leaves will be named 1:N, node.1:node.Ninternal
242 | 
243 |   ## Assign terminal nodes names same as phen.leaves (ideally = tree$tip.label)
244 |   noms.term <- names(phen.leaves)
245 | 
246 |   ## Assign internal nodes either tree$node.label or node.1:node.N:
247 |   int.inds <- c((length(phen.leaves)+1):length(phen.nodes))
248 |   noms.int <- paste("node", int.inds, sep=".")
249 |   ## Check that node.labs are not NULL:
250 |   if(!is.null(tree$node.label)){
251 |     ## Check that node.labs is of correct length:
252 |     if(length(tree$node.label) == length(int.inds)){
253 |       noms.int <- tree$node.label
254 |     }
255 |   }
256 | 
257 |   ## Assign these names to phen.nodes:
258 |   names(phen.nodes) <- c(noms.term, noms.int)
259 | 
260 | 
261 | 
262 |   ## make output list
263 |   phen.list <- list(phen.leaves, phen.nodes, phen.branch, phen.loci)
264 |   names(phen.list) <- c("phen", "phen.nodes", "phen.edges", "phen.loci")
265 | 
266 |   return(phen.list)
267 | } # end phen.sim
268 | 


--------------------------------------------------------------------------------
/R/simultaneous.test.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | 
  5 | #######################
  6 | ## simultaneous.test ## ## SCORE 2 ##
  7 | #######################
  8 | 
  9 | ########################################################################
 10 | 
 11 | ###################
 12 | ## DOCUMENTATION ##
 13 | ###################
 14 | 
 15 | #' Simultaneous test
 16 | #'
 17 | #' Calculates treeWAS score 2, the simultaneous test, as the number of 
 18 | #' substitutions or changes in genotype (\code{snps.reconstruction}) and phenotype 
 19 | #' (\code{phen.reconstruction}) that occur simultaneously on the same branches of the tree. 
 20 | #'
 21 | #' @param snps.reconstruction A matrix containing the terminal and reconstructed
 22 | #' ancestral states of SNPs for all nodes in the tree.
 23 | #' @param phen.reconstruction A vector containing the terminal and reconstructed
 24 | #' ancestral states of the phenotype for all nodes in the tree.
 25 | #' @param tree A phylo object containing the tree representing the ancestral relationships
 26 | #' between the individuals for which snps and phen are known.
 27 | #'
 28 | #'
 29 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com}
 30 | #'
 31 | #'
 32 | #' @importFrom scales rescale
 33 | #' @importFrom Hmisc all.is.numeric
 34 | #' @importFrom utils combn
 35 | #'
 36 | #' @export
 37 | 
 38 | ########################################################################
 39 | # @useDynLib phangorn, .registration = TRUE
 40 | # @importFrom phangorn midpoint
 41 | 
 42 | simultaneous.test <- function(snps.reconstruction,
 43 |                               phen.reconstruction,
 44 |                               tree,
 45 |                               categorical = FALSE){
 46 | 
 47 |   snps.rec <- snps.reconstruction
 48 |   phen.rec <- phen.reconstruction
 49 |   rm(snps.reconstruction)
 50 |   rm(phen.reconstruction)
 51 | 
 52 |   ## Always work with tree in pruningwise order:
 53 |   tree <- reorder.phylo(tree, order="pruningwise")
 54 |   ## Trees must be rooted:
 55 |   # if(!is.rooted(tree)) tree <- midpoint(tree) # require(phangorn)
 56 |   ## Get tree edges:
 57 |   edges <- tree$edge
 58 | 
 59 |   ####################################################################
 60 |   #####################
 61 |   ## Handle phen.rec ##
 62 |   #####################
 63 |   ## convert phenotype to numeric:
 64 |   phen.rec.ori <- phen.rec
 65 |   ## Convert to numeric (required for assoc tests):
 66 |   na.before <- length(which(is.na(phen.rec)))
 67 | 
 68 |   ## NB: can only be binary or continuous at this point...
 69 |   levs <- unique(as.vector(unlist(phen.rec)))
 70 |   n.levs <- length(levs[!is.na(levs)])
 71 |   if(!is.numeric(phen.rec)){
 72 |     if(all.is.numeric(phen.rec)){
 73 |       phen.rec <- as.numeric(as.character(phen.rec))
 74 |     }else{
 75 |       phen.rec <- as.numeric(as.factor(phen.rec))
 76 |       if(n.levs > 2){
 77 |         if(categorical != TRUE){
 78 |           warning("phen.rec has more than 2 levels but is not numeric. 
 79 |                   Setting 'categorical' to TRUE.")
 80 |           categorical <- TRUE
 81 |         }
 82 |       }
 83 |     }
 84 |   }
 85 |   ## ensure ind names not lost
 86 |   names(phen.rec) <- names(phen.rec.ori)
 87 | 
 88 |   ## Check that no errors occurred in conversion:
 89 |   na.after <- length(which(is.na(phen.rec)))
 90 |   if(na.after > na.before){
 91 |     stop("NAs created while converting phen.rec to numeric.")
 92 |   }
 93 |   ####################################################################
 94 | 
 95 |   ################################################
 96 |   ## RE-SCALE NON-BINARY VALUES (phen only ...) ##
 97 |   ################################################
 98 |   ## phen.rec (both Pa and Pd should be on same scale):
 99 |   if(categorical == FALSE){
100 |     phen.rec <- rescale(phen.rec, to=c(0,1)) # require(scales)
101 |   }
102 | 
103 |   ###############################
104 |   ## GET DIFFS ACROSS BRANCHES ##
105 |   ###############################
106 | 
107 |   if(categorical == FALSE){
108 |     ## ORIGINAL SCORE 2:
109 |     ## Get SNPs diffs: ##
110 |     snps.diffs <- snps.rec[edges[,1], ] - snps.rec[edges[,2], ]
111 |     
112 |     ## Get phen diffs: ##
113 |     phen.diffs <- phen.rec[edges[,1]] - phen.rec[edges[,2]]
114 |     
115 |     sp.diffs <- snps.diffs * phen.diffs
116 |     
117 |     ## Return with sign:
118 |     score2 <- colSums(sp.diffs, na.rm=TRUE)
119 |     # score2 <- abs(score2)
120 |     names(score2) <- colnames(snps.rec)
121 |     
122 |   }else{
123 |     ## CATEGORICAL SCORE 2:
124 |     
125 |     ## Get SNPs diffs: ##
126 |     snps.diffs <- snps.rec[edges[,1], ] - snps.rec[edges[,2], ]
127 |     
128 |     pairs <- t(combn(unique(phen.rec[!is.na(phen.rec)]), m=2))
129 |     S2 <- list()
130 |     for(p in 1:nrow(pairs)){
131 |       
132 |       ## Get phen diffs: ##
133 |       pr <- phen.rec
134 |       pr[which(!pr %in% pairs[p,])] <- NA
135 |       pr <- as.numeric(as.factor(as.character(pr)))-1
136 |       phen.diffs <- pr[edges[,1]] - pr[edges[,2]]
137 |       
138 |       sp.diffs <- snps.diffs * phen.diffs
139 |       S2[[p]] <- colSums(sp.diffs, na.rm=TRUE)
140 |     } # end for (p) loop
141 |     
142 |     s2 <- do.call(rbind, S2)
143 |     score2 <- colSums(abs(s2), na.rm=TRUE)
144 |     names(score2) <- colnames(snps.rec)
145 |   }
146 |   
147 |   return(score2)
148 | 
149 | } # end simultaneous.test
150 | 
151 | 
152 | 
153 | 
154 | 


--------------------------------------------------------------------------------
/R/simultaneous.test.epi.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | 
  5 | #######################
  6 | ## simultaneous.test ##
  7 | #######################
  8 | 
  9 | ########################################################################
 10 | 
 11 | ###################
 12 | ## DOCUMENTATION ##
 13 | ###################
 14 | 
 15 | #' Test for association between genetic loci with Score 2.
 16 | #'
 17 | #' [*\emph{A work in progress; not curently integrated into treeWAS:}*]
 18 | #' Use the simultaneous.test (Score 2) to test for associations between genetic loci, 
 19 | #' which may indicate an epistatic interaction.
 20 | #' This function can be used either to test 
 21 | #' for pairwise association between all pairs of genetic loci
 22 | #' or for associations between a subset of snps and all other snps 
 23 | #' (recommended for large datasets; see details). 
 24 | #'
 25 | #' @param snps.reconstruction A matrix containing the terminal and reconstructed
 26 | #' ancestral states of SNPs for all nodes in the tree.
 27 | #' @param tree A phylo object containing the tree representing the ancestral relationships
 28 | #' between the individuals for which snps and phen are known.
 29 | #' @param snps.subset An optional vector (see details); else, NULL. 
 30 | #' The snps.subset vector can be a character vector, containing a subset of colnames(snps.rec), 
 31 | #' a logical vector, using TRUE or FALSE to indicate which columns are to be retained and excluded,
 32 | #' or an integer vector, specifying the column indices to be retained. 
 33 | #' 
 34 | #' 
 35 | #' @details The number of pairwise tests between all pairs of snps 
 36 | #' grows rapidly as the number of snps columns increases. 
 37 | #' As such, for datasets where ncol(snps.reconstruction) is large, we recommend that
 38 | #' the snps.subset argument is used to reduce the number of tests, by
 39 | #' indicating which snps to test for association with all other snps. 
 40 | #' The snps.subset index can be used to select any subset of snps of interest. 
 41 | #' For example, one may wish to test for interactions between all snps and a subset of snps that 
 42 | #' had been deemed significantly associated with a particular phenotype in a previous run of treeWAS.
 43 | #'
 44 | #'
 45 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com}
 46 | #'
 47 | #'
 48 | #' @importFrom scales rescale
 49 | #' @importFrom Hmisc all.is.numeric
 50 | #'
 51 | #' @export
 52 | 
 53 | ########################################################################
 54 | # @useDynLib phangorn, .registration = TRUE
 55 | # @importFrom phangorn midpoint
 56 | 
 57 | simultaneous.test.epi <- function(snps.reconstruction, # can be snps.REC OR snps.sim.REC matrix ## NOTE: subs.edges no longer required for any version of this test.
 58 |                                   tree,
 59 |                                   snps.subset=NULL){
 60 | 
 61 |   snps.rec <- snps.reconstruction
 62 |   rm(snps.reconstruction)
 63 |   
 64 | 
 65 |   ## Always work with tree in pruningwise order:
 66 |   tree <- reorder.phylo(tree, order="pruningwise")
 67 |   ## Trees must be rooted:
 68 |   # if(!is.rooted(tree)) tree <- midpoint(tree) # require(phangorn)
 69 |   ## Get tree edges:
 70 |   edges <- tree$edge
 71 | 
 72 | 
 73 |   ################################################
 74 |   ## GET SUBSET of SNPS (logical/names/indices) ##
 75 |   ################################################
 76 |   toKeep <- NULL
 77 |   if(!is.null(snps.subset)){
 78 |     if(!is.vector(snps.subset)){
 79 |       stop("snps.subset must be a vector (either a logical or numerical index vector, 
 80 |            or a vector of snps.rec column names, indicating which columns are to be kept as a subset")
 81 |     }else{
 82 |       ## LOGICAL (snps.subset = T/F toKeep) ##
 83 |       if(is.logical(snps.subset)){
 84 |         toKeep <- which(snps.subset == TRUE)
 85 |       }else{
 86 |         ## NUMERIC (snps.subset = indices toKeep) ##
 87 |         if(is.numeric(snps.subset)){
 88 |           if(!all(snps.subset %in% c(1:ncol(snps.rec)))){
 89 |             stop("not all snps.subset correspond to indices in 1:ncol(snps.rec)")
 90 |           }else{
 91 |             toKeep <- snps.subset
 92 |           }
 93 |         }else{
 94 |           ## CHARACTER (snps.subset = colnames toKeep) ##
 95 |         if(!all(snps.subset %in% colnames(snps.rec))){
 96 |           stop("not all snps.subset are in colnames(snps.rec)")
 97 |         }else{
 98 |           toKeep <- which(colnames(snps.rec) %in% snps.subset)
 99 |         } 
100 |         }
101 |       }
102 |       # snps.rec <- snps.rec[,toKeep]
103 |       # toKeep <- which(colnames(snps.rec) %in% snps.subset) # where (snps.subset = sig.snps.names)
104 |     }
105 |   }
106 | 
107 |   ####################################################################
108 | 
109 |   ###############################
110 |   ## GET DIFFS ACROSS BRANCHES ##
111 |   ###############################
112 | 
113 |   ## Get SNPs diffs: ##
114 |   snps.diffs <- snps.rec[edges[,1], ] - snps.rec[edges[,2], ]
115 | 
116 |   ## Get snp1:snp2 diffs: ##
117 |   s1s2.diffs <- SCORE2 <- list()
118 | 
119 |   ## If no snps.subset, run test over all columns...
120 |   if(is.null(toKeep)) toKeep <- 1:ncol(snps.diffs)
121 |   for(i in 1:length(toKeep)){
122 |     s1s2.diffs[[i]] <- snps.diffs[,toKeep[i]] * snps.diffs
123 |     ## Return with sign:
124 |     SCORE2[[i]] <- colSums(s1s2.diffs[[i]], na.rm=TRUE)
125 |     # SCORE2 <- abs(SCORE2)
126 |     names(SCORE2[[i]]) <- paste(colnames(snps.rec)[toKeep[i]], colnames(snps.rec), sep="/")
127 |   } # end for loop
128 | 
129 |   #######################
130 |   score2 <- unlist(SCORE2)
131 |   #######################
132 |   noms <-  strsplit(names(score2), "/")
133 |   str(noms)
134 |   mat <- rep(NA, length(noms))
135 |   mat <- cbind(mat, mat)
136 |   for(i in 1:length(noms)){
137 |     if(length(noms[[i]]) > 2){
138 |       x <- noms[[i]][1]
139 |       x[2] <- paste(noms[[i]][2:length(noms[[i]])], collapse="/")
140 |       noms[[i]] <- x
141 |     }
142 |     mat[i,] <- noms[[i]]
143 |   }  # end for loop
144 |   noms <- do.call(rbind, noms)
145 |   # str(noms)
146 |   #######################
147 |   # noms.ori <- names(score2)
148 |   attr(score2, "snps1") <- noms[,1]
149 |   attr(score2, "snps2") <- noms[,2]
150 |   # names(score2) ## still there, just not visible w str(score2)
151 |   #######################
152 | 
153 |   return(score2)
154 | 
155 | } # end simultaneous.test.epi
156 | ####################################################################################
157 | ####################################################################################
158 | 
159 | 
160 | 
161 | 
162 | 
163 | 
164 | #
165 | 
166 | 
167 | 
168 | 
169 | 
170 | 
171 | 


--------------------------------------------------------------------------------
/R/subsequent.test.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #####################
  3 | ## subsequent.test ## ## SCORE 3 ##
  4 | #####################
  5 | 
  6 | ########################################################################
  7 | 
  8 | ###################
  9 | ## DOCUMENTATION ##
 10 | ###################
 11 | 
 12 | #' Subsequent test
 13 | #'
 14 | #' Calculates treeWAS score 3, the subsequent test.
 15 | #'
 16 | #' @param tree A phylo object.
 17 | #'
 18 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com}
 19 | #'
 20 | #' @examples
 21 | #'
 22 | #' ## basic use of fn
 23 | #' tree <- coalescent.tree.sim(n.ind = 100, seed = 1)
 24 | #'
 25 | #' @importFrom scales rescale
 26 | #' @importFrom Hmisc all.is.numeric
 27 | #' @export
 28 | 
 29 | ########################################################################
 30 | # @useDynLib phangorn, .registration = TRUE
 31 | # @importFrom phangorn midpoint
 32 | 
 33 | subsequent.test <- function(snps.reconstruction,
 34 |                             phen.reconstruction,
 35 |                             tree,
 36 |                             correct.prop = FALSE,
 37 |                             categorical = FALSE){
 38 | 
 39 |   snps.rec <- snps.reconstruction
 40 |   phen.rec <- phen.reconstruction
 41 |   rm(snps.reconstruction)
 42 |   rm(phen.reconstruction)
 43 | 
 44 |   ## Always work with tree in pruningwise order:
 45 |   tree <- reorder.phylo(tree, order="pruningwise")
 46 |   ## Trees must be rooted:
 47 |   # if(!is.rooted(tree)) tree <- midpoint(tree) # require(phangorn)
 48 |   ## get tree edges:
 49 |   edges <- tree$edge
 50 | 
 51 |   ####################################################################
 52 |   #####################
 53 |   ## Handle phen.rec ##
 54 |   #####################
 55 |   ## convert phenotype to numeric:
 56 |   phen.rec.ori <- phen.rec
 57 |   ## Convert to numeric (required for assoc tests):
 58 |   na.before <- length(which(is.na(phen.rec)))
 59 | 
 60 |   ## NB: can only be binary or continuous at this point...
 61 |   levs <- unique(as.vector(unlist(phen.rec)))
 62 |   n.levs <- length(levs[!is.na(levs)])
 63 |   if(!is.numeric(phen.rec)){
 64 |     if(all.is.numeric(phen.rec)){
 65 |       phen.rec <- as.numeric(as.character(phen.rec))
 66 |     }else{
 67 |       phen.rec <- as.numeric(as.factor(phen.rec))
 68 |       if(n.levs > 2){
 69 |         if(categorical != TRUE){
 70 |           warning("phen.rec has more than 2 levels but is not numeric.
 71 |                   Setting 'categorical' to TRUE.")
 72 |           categorical <- TRUE
 73 |         }
 74 |       }
 75 |     }
 76 |   }
 77 |   ## ensure ind names not lost
 78 |   names(phen.rec) <- names(phen.rec.ori)
 79 | 
 80 |   ## Check that no errors occurred in conversion:
 81 |   na.after <- length(which(is.na(phen.rec)))
 82 |   if(na.after > na.before){
 83 |     stop("NAs created while converting phen.rec to numeric.")
 84 |   }
 85 |   ####################################################################
 86 | 
 87 |   ################################################
 88 |   ## RE-SCALE NON-BINARY VALUES (phen only ...) ##
 89 |   ################################################
 90 |   ## phen.rec (both Pa and Pd should be on same scale):
 91 |   if(categorical == FALSE){
 92 |     phen.rec <- rescale(phen.rec, to=c(0,1)) # require(scales)
 93 |   }
 94 | 
 95 |   ###############################
 96 |   ## GET SCORE ACROSS BRANCHES ##
 97 |   ###############################
 98 | 
 99 |   ## Get snps, phen values for all internal+terminal nodes:
100 |   Sx <- snps.rec
101 |   Px <- phen.rec
102 | 
103 |   #################################################################     #####
104 |   ###############
105 |   ## SCORE 3.0 ##
106 |   ###############
107 |   if(categorical == FALSE){
108 |     if(correct.prop == FALSE){
109 |       ## Get snps, phen values for ancestral & descendant nodes:
110 |       Pa <- phen.rec[edges[,1]]
111 |       Pd <- phen.rec[edges[,2]]
112 |       Sa <- snps.rec[edges[,1], ]
113 |       Sd <- snps.rec[edges[,2], ]
114 |       bl <- tree$edge.length
115 | 
116 |       ## ORIGINAL INTEGRAL-BASED SCORE3 (without edge length):
117 |       score3 <- get.score3(Pa = Pa, Pd = Pd, Sa = Sa, Sd = Sd, l = NULL)
118 | 
119 |       ## Return with sign:
120 |       score3 <- colSums(score3, na.rm=TRUE)
121 |     }else{
122 |       ## MARGINAL-CORRECTED SCORE 1 (Phi):
123 |       score3 <- ((colSums((1 - Px)*(1 - Sx), na.rm=TRUE)*colSums(Px*Sx, na.rm=TRUE)) -
124 |                    (colSums((1 - Px)*Sx, na.rm=TRUE)*colSums(Px*(1 - Sx), na.rm=TRUE))) /
125 |         (sqrt(colSums(1 - Sx, na.rm=TRUE)*colSums(Sx, na.rm=TRUE)*sum((1 - Px), na.rm=TRUE)*sum(Px, na.rm=TRUE)))
126 |     }
127 |   }else{
128 |     ## CATEGORICAL SCORE 3 (Phi):
129 |     score3 <- suppressWarnings(sqrt(sapply(c(1:ncol(Sx)), function(e)
130 |       chisq.test(x=Px, y=Sx[,e], correct=F)$statistic)/length(Px)))
131 |   }
132 | 
133 |   # score3 <- abs(score3)
134 |   names(score3) <- colnames(snps.rec)
135 | 
136 |   return(score3)
137 | 
138 | } # end subsequent.test
139 | 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | 
147 | ################
148 | ## get.score3 ##
149 | ################
150 | 
151 | ########################################################################
152 | 
153 | ###################
154 | ## DOCUMENTATION ##
155 | ###################
156 | 
157 | #' Short one-phrase description.
158 | #'
159 | #' Longer proper discription of function...
160 | #'
161 | #' @param Pa A numeric value containing either the state,
162 | #' or the probability of the state, of the phenotype at a given \emph{ancestral} node.
163 | #' @param Pd A numeric value containing either the state,
164 | #' or the probability of the state, of the phenotype at a given \emph{descendant} node.
165 | #' @param Sa A numeric value containing either the state,
166 | #' or the probability of the state, of SNPi at a given \emph{ancestral} node.
167 | #' @param Sd A numeric value containing either the state,
168 | #' or the probability of the state, of SNPi at a given \emph{descendant} node.
169 | #' @param l A numeric value specifying the length of the branch in the phylogenetic tree
170 | #' that joins the ancestral and descendant node.
171 | #'
172 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com}
173 | #' @export
174 | #' @examples
175 | #' ## Example ##
176 | #' \dontrun{
177 | #' ## basic use of fn
178 | #' tree <- coalescent.tree.sim(n.ind = 100, seed = 1)
179 | #' }
180 | 
181 | ########################################################################
182 | 
183 | get.score3 <- function(Pa, Pd, Sa, Sd, l=NULL){
184 | 
185 |   score3 <- NULL
186 | 
187 |   if(!is.null(l)){
188 |     ## NEW integral-based score (WITH edge-length!)...
189 |     score3 <- (l*(((4/3)*Pa*Sa) +
190 |                    ((2/3)*Pa*Sd) +
191 |                    ((2/3)*Pd*Sa) +
192 |                    ((4/3)*Pd*Sd) -
193 |                    Pa -
194 |                    Pd -
195 |                    Sa -
196 |                    Sd +
197 |                    1))/sum(l)
198 |   }else{
199 |     ## NEW integral-based score (WITHOUT edge-length!)...
200 |     score3 <- (((4/3)*Pa*Sa) +
201 |                  ((2/3)*Pa*Sd) +
202 |                  ((2/3)*Pd*Sa) +
203 |                  ((4/3)*Pd*Sd) -
204 |                  Pa -
205 |                  Pd -
206 |                  Sa -
207 |                  Sd +
208 |                  1)/length(Pa)
209 |   }
210 | 
211 |   return(score3)
212 | 
213 | } # end get.score3
214 | 
215 | 
216 | 
217 | ###########################################################################################
218 | 
219 | ###########################################################################################
220 | 
221 | 
222 | 
223 | 
224 | 
225 | 
226 | 
227 | 
228 | 
229 | 
230 | 
231 | 
232 | 
233 | 
234 | #
235 | 


--------------------------------------------------------------------------------
/R/terminal.test.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | ###################
  5 | ## terminal.test ## ## SCORE 1 ##
  6 | ###################
  7 | 
  8 | ########################################################################
  9 | 
 10 | ###################
 11 | ## DOCUMENTATION ##
 12 | ###################
 13 | 
 14 | #' Terminal test
 15 | #'
 16 | #' Calculates treeWAS score 1, the terminal test.
 17 | #'
 18 | #' @param tree A phylo object.
 19 | #'
 20 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com}
 21 | #' @export
 22 | #' @examples
 23 | #' ## Example ##
 24 | #' \dontrun{
 25 | #' ## basic use of fn
 26 | #' out <- terminal.test(snps, phen)
 27 | #' }
 28 | #'
 29 | #' @importFrom scales rescale
 30 | #' @importFrom Hmisc all.is.numeric
 31 | #'
 32 | 
 33 | ########################################################################
 34 | 
 35 | 
 36 | terminal.test <- function(snps,
 37 |                           phen,
 38 |                           correct.prop = FALSE,
 39 |                           categorical = FALSE){
 40 | 
 41 |   ####################################################################
 42 |   #################
 43 |   ## Handle phen ##
 44 |   #################
 45 |   ## convert phenotype to numeric:
 46 |   phen.ori <- phen
 47 |   ## Convert to numeric (required for assoc tests):
 48 |   na.before <- length(which(is.na(phen)))
 49 | 
 50 |   ## NB: can only be binary or continuous at this point...
 51 |   levs <- unique(as.vector(unlist(phen)))
 52 |   n.levs <- length(levs[!is.na(levs)])
 53 |   if(!is.numeric(phen)){
 54 |     if(all.is.numeric(phen)){
 55 |       phen <- as.numeric(as.character(phen))
 56 |     }else{
 57 |       phen <- as.numeric(as.factor(phen))
 58 |       if(n.levs > 2){
 59 |         if(categorical != TRUE){
 60 |           warning("phen has more than 2 levels but is not numeric.
 61 |                   Setting 'categorical' to TRUE.")
 62 |           categorical <- TRUE
 63 |         }
 64 |     }
 65 |     }
 66 |   }
 67 |   ## ensure ind names not lost
 68 |   names(phen) <- names(phen.ori)
 69 | 
 70 |   ## Check that no errors occurred in conversion:
 71 |   na.after <- length(which(is.na(phen)))
 72 |   if(na.after > na.before){
 73 |     stop("NAs created while converting phen to numeric.")
 74 |   }
 75 |   ####################################################################
 76 | 
 77 |   ##################################
 78 |   ## GET SCORE 1 @ TERMINAL NODES ##
 79 |   ##################################
 80 | 
 81 |   Pd <- phen # .rec[edges[,2]]
 82 |   Sd <- snps # .rec[edges[,2], ]
 83 | 
 84 |   ################################################
 85 |   ## RE-SCALE NON-BINARY VALUES (phen only (?)) ##
 86 |   ################################################
 87 |   Pd.ori <- Pd
 88 |   if(categorical == FALSE){
 89 |     Pd <- rescale(Pd, to=c(0,1))  ## require(scales)
 90 |   }
 91 | 
 92 |   #################################################################     #####
 93 |   #############
 94 |   ## SCORE 1 ##
 95 |   #############
 96 |   if(categorical == FALSE){
 97 |     if(correct.prop == FALSE){
 98 |       ## ORIGINAL TERMINAL SCORE 1:
 99 |       score1 <- (Pd*Sd - (1 - Pd)*Sd - Pd*(1 - Sd) + (1 - Pd)*(1 - Sd))  ## CALCULATE SCORE 1 EQUATION
100 | 
101 |       ## Return with sign:
102 |       score1 <- colSums(score1, na.rm=TRUE)/length(Pd)
103 |     }else{
104 |       ## MARGINAL-CORRECTED SCORE 1 (Phi):
105 |       score1 <- ((colSums((1 - Pd)*(1 - Sd), na.rm=TRUE)*colSums(Pd*Sd, na.rm=TRUE)) -
106 |                    (colSums((1 - Pd)*Sd, na.rm=TRUE)*colSums(Pd*(1 - Sd), na.rm=TRUE))) /
107 |         (sqrt(colSums(1 - Sd, na.rm=TRUE)*colSums(Sd, na.rm=TRUE)*sum((1 - Pd), na.rm=TRUE)*sum(Pd, na.rm=TRUE)))
108 |     }
109 |   }else{
110 |     ## CATEGORICAL SCORE 1 (Phi):
111 |     score1 <- suppressWarnings(sqrt(sapply(c(1:ncol(Sd)), function(e)
112 |       chisq.test(x=Pd, y=Sd[,e], correct=F)$statistic)/length(Pd)))
113 |   }
114 | 
115 |   # score1 <- abs(score1)
116 |   names(score1) <- colnames(snps)
117 | 
118 |   return(score1)
119 | 
120 | } # end terminal.test
121 | 


--------------------------------------------------------------------------------
/R/terminal.test.epi.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | ###################
  5 | ## terminal.test ## 
  6 | ###################
  7 | 
  8 | 
  9 | ########################################################################
 10 | 
 11 | ###################
 12 | ## DOCUMENTATION ##
 13 | ###################
 14 | 
 15 | #' Test for epistasis between genetic loci with Score 1.
 16 | #'
 17 | #' [*\emph{A work in progress; not curently integrated into treeWAS:}*]
 18 | #' Use the terminal.test (Score 1) to test for associations between genetic loci, 
 19 | #' which may indicate an epistatic interaction.
 20 | #' This function can be used either to test 
 21 | #' for pairwise association between all pairs of genetic loci
 22 | #' or for associations between a subset of snps and all other snps 
 23 | #' (recommended for large datasets; see details). 
 24 | #'
 25 | #' @param snps A matrix containing the states of SNPs (in columns) for all individuals (in rows).
 26 | #' @param snps.subset An optional vector (see details); else, NULL. 
 27 | #' The snps.subset vector can be a character vector, containing a subset of colnames(snps.rec), 
 28 | #' a logical vector, using TRUE or FALSE to indicate which columns are to be retained and excluded,
 29 | #' or an integer vector, specifying the column indices to be retained. 
 30 | #' 
 31 | #' 
 32 | #' @details The number of pairwise tests between all pairs of snps 
 33 | #' grows rapidly as the number of snps columns increases. 
 34 | #' As such, for datasets where ncol(snps.reconstruction) is large, we recommend that
 35 | #' the snps.subset argument is used to reduce the number of tests, by
 36 | #' indicating which snps to test for association with all other snps. 
 37 | #' The snps.subset index can be used to select any subset of snps of interest. 
 38 | #' For example, one may wish to test for interactions between all snps and a subset of snps that 
 39 | #' had been deemed significantly associated with a particular phenotype in a previous run of treeWAS.
 40 | #' 
 41 | #' 
 42 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com}
 43 | #'
 44 | #'
 45 | #' @importFrom scales rescale
 46 | #' @importFrom Hmisc all.is.numeric
 47 | #'
 48 | #' @export
 49 | 
 50 | ########################################################################
 51 | 
 52 | 
 53 | terminal.test.epi <- function(snps,
 54 |                               snps.subset=NULL){
 55 | 
 56 |   ################################################
 57 |   ## GET SUBSET of SNPS (logical/names/indices) ##
 58 |   ################################################
 59 |   toKeep <- NULL
 60 |   if(!is.null(snps.subset)){
 61 |     # if(length(snps.subset) != ncol(snps)){
 62 |     #   stop("snps.subset must be of length ncol(snps).")
 63 |     # }else{
 64 |     if(is.logical(snps.subset)){
 65 |       toKeep <- snps.subset
 66 |     }else{
 67 |       if(all(snps.subset %in% colnames(snps))) toKeep <- which(colnames(snps) %in% snps.subset)
 68 |     }
 69 |     # snps.rec <- snps[,toKeep]
 70 |     toKeep <- which(colnames(snps) %in% snps.subset) # where (snps.subset = sig.snps.names)
 71 |     # }
 72 |   }
 73 | 
 74 |   ################################################
 75 |   ## RE-SCALE NON-BINARY VALUES (phen only (?)) ##
 76 |   ################################################
 77 |   # Pd <- phen
 78 |   # Pd.ori <- Pd
 79 |   # # if(n.levs > 2)
 80 |   # Pd <- rescale(Pd, to=c(0,1))  ## require(scales)
 81 | 
 82 |   #################################################################     #####
 83 |   #############
 84 |   ## SCORE 1 ##
 85 |   #############
 86 |   #######################
 87 |   ## ORIGINAL TERMINAL SCORE 1:
 88 |   # score1 <- (Pd*Sd - (1 - Pd)*Sd - Pd*(1 - Sd) + (1 - Pd)*(1 - Sd))  ## CALCULATE SCORE 1 EQUATION
 89 |   #######################
 90 | 
 91 |   ## Get snp1:snp2 diffs: ##
 92 |   s1s2 <- SCORE1 <- list()
 93 |   Sd <- snps
 94 |   ## If no snps.subset, run test over all columns...
 95 |   if(is.null(toKeep)) toKeep <- 1:ncol(snps)
 96 |   for(i in 1:length(toKeep)){
 97 |     Pd <- snps[,toKeep[i]]
 98 |     s1s2[[i]] <- (Pd*Sd - (1 - Pd)*Sd - Pd*(1 - Sd) + (1 - Pd)*(1 - Sd))  ## CALCULATE SCORE 1 EQUATION
 99 | 
100 |     ## Return with sign:
101 |     SCORE1[[i]] <- colSums(s1s2[[i]], na.rm=TRUE)/length(Pd)
102 |     # SCORE1 <- abs(SCORE1)
103 |     names(SCORE1[[i]]) <- paste(colnames(snps)[toKeep[i]], colnames(snps), sep="/")
104 |   } # end for loop
105 | 
106 |   score1 <- unlist(SCORE1)
107 |   #######################
108 | 
109 |   # names(score1) <- colnames(snps)
110 |   #######################
111 |   noms <-  strsplit(names(score1), "/")
112 |   # str(noms)
113 |   mat <- rep(NA, length(noms))
114 |   mat <- cbind(mat, mat)
115 |   for(i in 1:length(noms)){
116 |     if(length(noms[[i]]) > 2){
117 |       x <- noms[[i]][1]
118 |       x[2] <- paste(noms[[i]][2:length(noms[[i]])], collapse="/")
119 |       noms[[i]] <- x
120 |     }
121 |     mat[i,] <- noms[[i]]
122 |   }  # end for loop
123 |   noms <- do.call(rbind, noms)
124 |   # str(noms)
125 |   #######################
126 |   # noms.ori <- names(score1)
127 |   attr(score1, "snps1") <- noms[,1]
128 |   attr(score1, "snps2") <- noms[,2]
129 |   # names(score1) ## still there, just not visible w str(score1)
130 |   #######################
131 | 
132 | 
133 |   return(score1)
134 | 
135 | } # end terminal.test.epi
136 | 


--------------------------------------------------------------------------------
/R/tree.reconstruct.R:
--------------------------------------------------------------------------------
  1 | 
  2 | ######################
  3 | ## tree.reconstruct ##
  4 | ######################
  5 | 
  6 | 
  7 | ########################################################################
  8 | 
  9 | ###################
 10 | ## DOCUMENTATION ##
 11 | ###################
 12 | 
 13 | #' Short one-phrase description.
 14 | #'
 15 | #' Longer proper discription of function...
 16 | #'
 17 | #' @param dna A matrix or DNAbin object containing genomes for (only)
 18 | #'                the terminal nodes of the tree to be reconstructed.
 19 | #'                Individuals should be in the rows and loci in the columns; rows and columns should be labelled.
 20 | #' @param method A character string specifying the method of phylogenetic reconstruction:
 21 | #'                one of \code{"NJ"}, \code{"BIONJ"} (the default), or \code{"parsimony"};
 22 | #'                or, if NAs are present in the distance matrix, one of: \code{"NJ*"} or \code{"BIONJ*"}.
 23 | #' @param dist.dna.model A character string specifying the type of model to use in
 24 | #'                          calculating the genetic distance between individual genomes (see ?dist.dna).
 25 | #' @param plot A logical specifying whether to plot the reconstructed phylogenetic tree.
 26 | #'
 27 | #'
 28 | #'
 29 | #' @author Caitlin Collins \email{caitiecollins@@gmail.com}
 30 | #'
 31 | #'
 32 | #'
 33 | #' @rawNamespace import(ape, except = zoom)
 34 | #' @importFrom phangorn as.phyDat
 35 | #' @importFrom phangorn midpoint
 36 | #' @importFrom phangorn pratchet
 37 | #' @importFrom phangorn acctran
 38 | #'
 39 | #'
 40 | #' @export
 41 | 
 42 | ########################################################################
 43 | # @import phangorn
 44 | # @useDynLib phangorn as.phyDat midpoint optim.pml pml, .registration = TRUE
 45 | # @useDynLib phangorn, .registration = TRUE
 46 | # @importFrom phangorn optim.pml
 47 | # @importFrom phangorn pml
 48 | # @importFrom phangorn optim.parsimony
 49 | 
 50 | ############
 51 | ## TO DO: ##
 52 | ############
 53 | ## add all the options from hclust (stats) as available methods..?
 54 | ## change all methods to either upper or lower case (or add to lower check).
 55 | 
 56 | 
 57 | tree.reconstruct <- function(dna,
 58 |                              method= c("BIONJ", "NJ", "parsimony", "BIONJ*", "NJ*"),
 59 |                              dist.dna.model="JC69",
 60 |                              plot=TRUE){
 61 | 
 62 |   ###################
 63 |   ## LOAD PACKAGES ##
 64 |   ###################
 65 |   # require(ape)
 66 |   # require(phangorn)
 67 | 
 68 |   ############
 69 |   ## CHECKS ##
 70 |   ############
 71 |   ## DNA ##
 72 |   if(class(dna)[1] != "DNAbin"){
 73 |     # if(class(dna) == "genind"){
 74 |     #   dna <- dna@tab ## might be problems w ploidy...
 75 |     # }
 76 |     if(is.matrix(dna)){
 77 |       # dna <- as.DNAbin(dna)
 78 |       sp <- matrix(as.character(dna), nrow=nrow(dna), ncol=ncol(dna))
 79 |       rownames(sp) <- rownames(dna)
 80 |       colnames(sp) <- colnames(dna)
 81 | 
 82 |       ## Check/convert levels:
 83 |       levs <- unique(as.vector(unlist(sp)))
 84 |       nts <- c("a", "c", "g", "t")
 85 |       if(length(levs[!is.na(levs)]) > 4){
 86 |         stop("There must be no more than 4 unique values in dna, excluding NAs.")
 87 |       }
 88 |       if(!all(levs %in% nts)){
 89 |         for(i in 1:length(levs)){
 90 |           sp <- replace(sp, which(sp == levs[i]), nts[i])
 91 |         } # end for loop
 92 |       } # end levs conversion
 93 | 
 94 |       dna <- as.DNAbin(sp)
 95 |       rownames(dna) <- rownames(sp)
 96 |       colnames(dna) <- colnames(sp)
 97 |     }else{
 98 |       stop("dna should be of class DNAbin or matrix")
 99 |     }
100 |   }
101 |   ## TREE REC METHOD ##
102 |   method <- tolower(method)
103 |   if(method == "njs") method <- "nj*"
104 |   if(method == "bionjs") method <- "bionj*"
105 |   method <- match.arg(arg = method,
106 |                       choices = c("bionj", "nj", "parsimony", "nj*", "bionj*"),
107 |                       several.ok = FALSE)
108 |   if(!any(c("nj", "bionj", "parsimony", "nj*", "bionj*") %in% method)){
109 |     warning("method should be one of  'nj', 'bionj', 'parsimony', 'nj*', 'bionj*'. Choosing 'BIONJ'.")
110 |     method <- "bionj"
111 |   }else{
112 |     ## use first arg if more than 1 present:
113 |     if(length(method) > 1){
114 |       method <- method[1]
115 |     }
116 |   }
117 | 
118 |   # if(method == "upgma"){
119 |   #   warning("UPGMA enforces ultrametricity, which can bias treeWAS results.
120 |   #           NJ or BIONJ may give more reliable results.")
121 |   # }
122 | 
123 |   tree <- NULL
124 | 
125 | 
126 |   ##########################
127 |   ## Get distance matrix: ##
128 |   ##########################
129 |   D <- dist.dna(dna, model = dist.dna.model)
130 | 
131 |   ## Handle MISSING data:
132 |   ## NOTE: hclust not able to handle NAs/NaNs in D..
133 |   ## NB: NAs are ok in dna, but NAs in D arise when dist.dna cannot find a dist btw. any 2 individuals,
134 |   ## e.g., there is an NA at all loci in at least one of the 2 inds.
135 |   ## --> Use phylo methods that can handle NAs in D (eg. NJ* and BIONJ*, from ape).
136 |   if(any(is.na(D))){
137 |     if(!method %in% c("nj*", "bionj*")){
138 |       if(method == "nj"){
139 |         method <- "nj*"
140 |       }else{
141 |         method <- "bionj*"
142 |       }
143 |       cat("NAs in distance matrix. Replacing method of phylo estimation with ", method, ".", sep="")
144 |     }
145 |   }
146 | 
147 |   ###################################
148 |   ## Methods with NO missing data: ##     #####     #####     #####     #####     #####     #####     #####     #####     #####
149 |   ###################################
150 | 
151 |   ########
152 |   ## NJ ##
153 |   ########
154 |   if(method=="nj"){
155 |     tree <- nj(D)
156 |     #tree <- midpoint(ladderize(tree))
157 |     ## Always work with tree in pruningwise order:
158 |     tree <- reorder.phylo(tree, order="pruningwise")
159 |     ## Trees must be rooted:
160 |     if(!is.rooted(tree)) tree <- midpoint(tree)
161 |     if(plot==TRUE){
162 |       plot(tree, edge.width=2, cex=0.5)
163 |       title("Neighbour-joining tree")
164 |       axisPhylo()
165 |     }
166 |   }
167 |   ###########
168 |   ## BIONJ ##
169 |   ###########
170 |   if(method=="bionj"){
171 |     tree <- bionj(D)
172 |     #tree <- midpoint(ladderize(tree))
173 |     ## Always work with tree in pruningwise order:
174 |     tree <- reorder.phylo(tree, order="pruningwise")
175 |     ## Trees must be rooted:
176 |     if(!is.rooted(tree)) tree <- midpoint(tree)
177 |     if(plot==TRUE){
178 |       plot(tree, edge.width=2, cex=0.5)
179 |       title("BIONJ tree")
180 |       axisPhylo()
181 |     }
182 |   }
183 | 
184 |   ###########
185 |   ## UPGMA ##
186 |   ###########
187 |   # if(method=="upgma"){
188 |   #   tree <- hclust(D, method="average")
189 |   #   tree <- as.phylo(tree)
190 |   #   #tree <- midpoint(ladderize(tree))
191 |   #   ## Always work with tree in pruningwise order:
192 |   #   tree <- reorder.phylo(tree, order="pruningwise")
193 |   #   ## Trees must be rooted:
194 |   #   if(!is.rooted(tree)) tree <- midpoint(tree)
195 |   #   if(plot==TRUE){
196 |   #     plot(tree, edge.width=2, cex=0.5)
197 |   #     title("UPGMA tree")
198 |   #   }
199 |   # }
200 | 
201 |   ###############
202 |   ## parsimony ## ## a bit slow if many unique columns
203 |   ###############
204 |   if(method=="parsimony"){
205 |     ## as.phyDat warns if NAs present (& doesn't include these...)
206 |     dna4 <- suppressWarnings(as.phyDat(dna))
207 |     ## get pars tree:
208 |     # tre.ini <- nj(D)
209 |     # tree <- optim.parsimony(tre.ini, dna4)
210 |     tre.ini <- pratchet(dna4, trace=0) # better (can also return set of treeS)
211 |                         # , maxit=400, k=8)
212 |     ## add edge lengths w ACCTRAN:
213 |     tree <- acctran(tre.ini, dna4) # edge lengths in n.subs (but relative lengths still fine).
214 |     ## Always work with tree in pruningwise order:
215 |     tree <- reorder.phylo(tree, order="pruningwise")
216 |     ## Convert edge.lengths from parsimony cost to n.subs-per-site
217 |     ## (s.t. parsimony lengths ~ lengths via NJ or UPGMA):
218 |     tree$edge.length <- tree$edge.length/ncol(dna)
219 |     ## Trees must be rooted:
220 |     if(!is.rooted(tree)) tree <- midpoint(tree)
221 |     if(plot==TRUE){
222 |       plot(tree, edge.width=2, cex=0.5)
223 |       title("Parsimony tree")
224 |       axisPhylo()
225 |     }
226 |   }
227 | 
228 |   ########
229 |   ## ML ##  ## discontinued: too slow
230 |   ########
231 |   # if(method=="ml"){
232 |   #   dna4 <- suppressWarnings(as.phyDat(dna))
233 |   #   tre.ini <- nj(D)
234 |   #   fit.ini <- pml(tre.ini, dna4, k=nrow(dna))
235 |   #   fit <- optim.pml(fit.ini, optNni = TRUE, optBf = TRUE,
236 |   #                    optQ = TRUE, optGamma = TRUE)
237 |   #
238 |   #   ## NOTE--you may want to store these in a results.ml list
239 |   #   ## and return it with your results instead of printing
240 |   #   ## OR at least print a message
241 |   #   ## (eg. "Printing maximum-likelihood calculations...")
242 |   #   ## before printing these numbers...
243 |   #
244 |   #   #     anova(fit.ini, fit)
245 |   #   #     AIC(fit.ini)
246 |   #   #     AIC(fit)
247 |   #
248 |   #   tree <- fit$tree
249 |   #   #tree <- midpoint(ladderize(tree))
250 |   #   ## Always work with tree in pruningwise order:
251 |   #   tree <- reorder.phylo(tree, order="pruningwise")
252 |   #   ## Trees must be rooted:
253 |   #   if(!is.rooted(tree)) tree <- midpoint(tree)
254 |   #   if(plot==TRUE){
255 |   #     plot(tree, show.tip=TRUE, edge.width=2)
256 |   #     title("Maximum-likelihood tree")
257 |   #     axisPhylo()
258 |   #   }
259 |   # }
260 | 
261 | 
262 |   ######################################
263 |   ## Methods with MISSING data (in D) ##     #####     #####     #####     #####     #####     #####     #####     #####     #####
264 |   ######################################
265 | 
266 |   #########
267 |   ## NJ* ##
268 |   #########
269 |   if(method=="nj*"){
270 |     tree <- njs(D)
271 |     #tree <- midpoint(ladderize(tree))
272 |     ## Always work with tree in pruningwise order:
273 |     tree <- reorder.phylo(tree, order="pruningwise")
274 |     ## Trees must be rooted:
275 |     if(!is.rooted(tree)) tree <- midpoint(tree)
276 |     if(plot==TRUE){
277 |       plot(tree, edge.width=2)
278 |       title("Neighbour-joining* tree")
279 |       axisPhylo()
280 |     }
281 |   }
282 |   ############
283 |   ## BIONJ* ##
284 |   ############
285 |   if(method=="bionj*"){
286 |     tree <- bionjs(D)
287 |     #tree <- midpoint(ladderize(tree))
288 |     ## Always work with tree in pruningwise order:
289 |     tree <- reorder.phylo(tree, order="pruningwise")
290 |     ## Trees must be rooted:
291 |     if(!is.rooted(tree)) tree <- midpoint(tree)
292 |     if(plot==TRUE){
293 |       plot(tree, edge.width=2)
294 |       title("BIONJ* tree")
295 |       axisPhylo()
296 |     }
297 |   }
298 | 
299 | 
300 |   par(ask=FALSE)
301 | 
302 |   return(tree)
303 | } # end tree.reconstruct
304 | 
305 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | ---
 3 | output:
 4 |   html_document:
 5 |     keep_md: yes
 6 | ---
 7 | 
 8 | -->
 9 | 
10 | # *treeWAS*: A phylogenetic tree-based approach to genome-wide association studies in microbes
11 | 
12 | 
13 | <!-- ########################################################################################################## -->
14 | ## Introduction
15 | <!-- ########################################################################################################## -->
16 | 
17 | The *treeWAS* R package allows users to apply our phylogenetic tree-based appraoch to Genome-Wide Association Studies (GWAS) to microbial genetic and phenotypic data. 
18 | In short, *treeWAS* measures the statistical association between a phenotype of interest and the genotype at all loci, with the aim of identifying significant associations, while correcting for the confounding effects of clonal population structure and homologous recombination. 
19 | *treeWAS* is applicable to both bacterial and viral genetic data from both the core and accessory genomes, and to both binary and continuous phenotypes.The approach adopted within *treeWAS* is described fully in our paper, available in [PLOS Computational Biology](http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1005958).
20 | 
21 | 
22 | 
23 | ***
24 | 
25 | <!-- ########################################################################################################## -->
26 | ## Installation
27 | <!-- ########################################################################################################## -->
28 | 
29 | *treeWAS* is currently hosted on GitHub at <https://github.com/caitiecollins/treeWAS>.  
30 | <!-- ([https://github.com/caitiecollins/treeWAS](https://github.com/caitiecollins/treeWAS)).-->
31 | 
32 | The most up-to-date version of *treeWAS* can be easily installed directly within R, using the `devtools` package: 
33 | 
34 | 
35 | ```{r, eval=FALSE, highlight=TRUE}
36 | ## install devtools, if necessary:
37 | install.packages("devtools", dep=TRUE)
38 | library(devtools)
39 | 
40 | ## install treeWAS from github:
41 | install_github("caitiecollins/treeWAS", build_vignettes = TRUE)
42 | library(treeWAS)
43 | ```
44 | 
45 | 
46 | ***
47 | 
48 | <!-- ########################################################################################################## -->
49 | ## Documentation
50 | <!-- ########################################################################################################## -->
51 | 
52 | Documentation on how to use *treeWAS* can be found on GitHub in [the Wiki](https://github.com/caitiecollins/treeWAS/wiki). 
53 | 
54 | 
55 | The Wiki contains sections on [The Method](https://github.com/caitiecollins/treeWAS/wiki/1.-How-treeWAS-Works) behind *treeWAS*, 
56 | the [Data & Data Cleaning](https://github.com/caitiecollins/treeWAS/wiki/2.-Data-&-Data-Cleaning) required, 
57 | the [treeWAS Function & Arguments](https://github.com/caitiecollins/treeWAS/wiki/3.-treeWAS-Function-&-Arguments), 
58 | a guide to [Interpreting Output](https://github.com/caitiecollins/treeWAS/wiki/4.-Interpreting-Output) returned by *treeWAS*, 
59 | functions to facilitate [Integration with ClonalFrameML](https://github.com/caitiecollins/treeWAS/wiki/5.-ClonalFrameML-Integration), 
60 | and information describing how to flag [Bugs & Features](https://github.com/caitiecollins/treeWAS/wiki/6.-Bugs-&-Features).
61 | 
62 | 
63 | 
64 | Once you have installed and loaded the *treeWAS* package, you can also find this information in the vignette. 
65 | To open the vignette from within R (recommended if any formatted elements are not rendering properly in the wiki), 
66 | run `browseVignettes` and click on the `HTML` hyperlink:
67 | 
68 | 
69 | ```{r, eval=FALSE}
70 | browseVignettes("treeWAS")
71 | ```
72 | 
73 | You may also find useful tips and relevant discussions among the [Issues](https://github.com/caitiecollins/treeWAS/issues) posted by other users (including [Closed Issues](https://github.com/caitiecollins/treeWAS/issues?q=is%3Aissue+is%3Aclosed)).
74 | 
75 | 
76 | ***
77 | 


--------------------------------------------------------------------------------
/data/dist_0.01.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/dist_0.01.rda


--------------------------------------------------------------------------------
/data/dist_0.05.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/dist_0.05.rda


--------------------------------------------------------------------------------
/data/dist_0.1.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/dist_0.1.rda


--------------------------------------------------------------------------------
/data/dist_0.2.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/dist_0.2.rda


--------------------------------------------------------------------------------
/data/dist_0.25.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/dist_0.25.rda


--------------------------------------------------------------------------------
/data/dist_0.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/dist_0.rda


--------------------------------------------------------------------------------
/data/phen.cont.rank.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/phen.cont.rank.rda


--------------------------------------------------------------------------------
/data/phen.cont.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/phen.cont.rda


--------------------------------------------------------------------------------
/data/phen.plot.col.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/phen.plot.col.rda


--------------------------------------------------------------------------------
/data/phen.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/phen.rda


--------------------------------------------------------------------------------
/data/phen.reconstruction.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/phen.reconstruction.rda


--------------------------------------------------------------------------------
/data/snps.assoc.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/snps.assoc.rda


--------------------------------------------------------------------------------
/data/snps.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/snps.rda


--------------------------------------------------------------------------------
/data/snps.reconstruction.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/snps.reconstruction.rda


--------------------------------------------------------------------------------
/data/tree.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/tree.rda


--------------------------------------------------------------------------------
/data/treeWAS.example.out.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/data/treeWAS.example.out.rda


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | citHeader("To cite the treeWAS package:")
 2 | 
 3 | ## Method reference:
 4 | citEntry(
 5 |   entry="Article",
 6 |   title = "A phylogenetic method to perform genome-wide association studies in microbes that accounts for population structure and recombination",
 7 |   journal= "PLOS Computational Biology",
 8 |   year = "2018",
 9 |   author = c(person(c("Caitlin"), "Collins"),
10 |                     person(c("Xavier"), "Didelot")),
11 |   volume   = "14",
12 |   number   = "2",
13 |   pages    = "1-21",
14 |   doi = "10.1371/journal.pcbi.1005958",
15 |   textVersion = "Collins C, Didelot X (2018). 'A phylogenetic method to perform genome-wide association studies in
16 |   microbes that accounts for population structure and recombination.' PLOS Computational Biology,
17 |   *14*(2), 1-21. doi:10.1371/journal.pcbi.1005958 <https://doi.org/10.1371/journal.pcbi.1005958>."
18 | )
19 | 
20 | 
21 | 
22 | 
23 | 
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/man/asr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/reconstruct.R
 3 | \name{asr}
 4 | \alias{asr}
 5 | \title{Ancestral state reconstruction}
 6 | \usage{
 7 | asr(
 8 |   var,
 9 |   tree,
10 |   type = c("parsimony", "ML", "ace"),
11 |   method = c("discrete", "continuous"),
12 |   unique.cols = FALSE
13 | )
14 | }
15 | \arguments{
16 | \item{var}{Either a matrix or a vector containing the state of a variable (eg. SNPs or a phenotype)
17 | for all individuals (ie. for all terminal nodes in the tree).}
18 | 
19 | \item{tree}{A phylo object containing the tree representing the ancestral relationships
20 | between the individuals for which snps and phen are known.}
21 | 
22 | \item{type}{A character string specifying whether ancestral state reconstruction should be
23 | performed by \code{parsimony} or \code{ML} (as performed by the \code{ace} function in package \emph{ape}).}
24 | 
25 | \item{method}{A character string specifying the type of ASR method to implement,
26 | either \code{'discrete'} or \code{'continuous'} (only used if \code{type} is set to "ML").}
27 | 
28 | \item{unique.cols}{A logical indicating whether only unique column patterns are present in \code{var},
29 | if \code{var} is a matrix (if so (\code{TRUE}), a time-consuming step can be skipped);
30 | by default, \code{FALSE}.}
31 | }
32 | \value{
33 | Depending on the dimensions of the input \code{var} object,
34 | either a matrix or a vector containing \emph{both} the known states
35 | of the variable at the terminal nodes (in positions 1:Nterminal) and the
36 | inferred states at internal nodes (in positions (Nterminal+1):Ntotal).
37 | }
38 | \description{
39 | Reconstruct the ancestral states of a vector or matrix object by using either
40 | parsimony or maximum-likelihood methods to infer the states
41 | at the internal nodes of a phylogenetic tree.
42 | }
43 | \author{
44 | Caitlin Collins \email{caitiecollins@gmail.com}
45 | }
46 | 


--------------------------------------------------------------------------------
/man/assoc.test.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get.sig.snps.R
 3 | \name{assoc.test}
 4 | \alias{assoc.test}
 5 | \title{Run a test of association between SNPs and a phenotype.}
 6 | \usage{
 7 | assoc.test(
 8 |   snps,
 9 |   phen,
10 |   tree = NULL,
11 |   test = c("terminal", "simultaneous", "subsequent"),
12 |   correct.prop = FALSE,
13 |   categorical = FALSE
14 | )
15 | }
16 | \arguments{
17 | \item{snps}{A matrix containing the real snps.}
18 | 
19 | \item{phen}{A factor or vector containing the phenotype (only allowed to contain two levels for now).}
20 | 
21 | \item{test}{A character string or vector containing one or more of the following available tests of association:
22 | "terminal", "simultaneous", "subsequent", "cor", "fisher". By default, the first three tests are run.
23 | See details for more information on what these tests do and when they may be appropriate.}
24 | }
25 | \description{
26 | Run one of five tests of association between each column of a SNPs matrix and a phenotype
27 | (some tests only implemented for \emph{binary} SNPs and phenotype).
28 | }
29 | \author{
30 | Caitlin Collins \email{caitiecollins@gmail.com}
31 | }
32 | 


--------------------------------------------------------------------------------
/man/beeswarmPlot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/beeswarm.plot.R
 3 | \name{beeswarmPlot}
 4 | \alias{beeswarmPlot}
 5 | \title{Beeswarm-and-Box-Plot.}
 6 | \usage{
 7 | beeswarmPlot(
 8 |   y = "sensitivity",
 9 |   x = "test",
10 |   df,
11 |   y.lab = NULL,
12 |   pt.size = 4,
13 |   x.text = FALSE,
14 |   x.text.size = 15,
15 |   y.text.size = 15,
16 |   y.title.size = 20,
17 |   y.lim = c(-0.002, 1.02),
18 |   mean = TRUE
19 | )
20 | }
21 | \arguments{
22 | \item{y}{A character string specifying the label of the (numeric) column
23 | in data frame \code{df} to be plotted along the y-axis.}
24 | }
25 | \description{
26 | Wrapper combining the beeswarm and box plot functions from packages \code{beeswarm} and \code{ggplot2}.
27 | }
28 | \author{
29 | Caitlin Collins \email{caitiecollins@gmail.com}
30 | }
31 | 


--------------------------------------------------------------------------------
/man/coalescent.sim.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/coalescent.sim.R
  3 | \name{coalescent.sim}
  4 | \alias{coalescent.sim}
  5 | \title{Simulate a tree, phenotype, and genetic data.}
  6 | \usage{
  7 | coalescent.sim(
  8 |   n.ind = 100,
  9 |   n.snps = 10000,
 10 |   n.subs = 1,
 11 |   n.snps.assoc = 0,
 12 |   assoc.prob = 100,
 13 |   n.phen.subs = 15,
 14 |   phen = NULL,
 15 |   plot = TRUE,
 16 |   heatmap = FALSE,
 17 |   reconstruct = FALSE,
 18 |   dist.dna.model = "JC69",
 19 |   grp.min = 0.25,
 20 |   row.names = TRUE,
 21 |   set = 1,
 22 |   tree = NULL,
 23 |   coaltree = TRUE,
 24 |   s = 20,
 25 |   af = 10,
 26 |   filename.plot = NULL,
 27 |   seed = NULL
 28 | )
 29 | }
 30 | \arguments{
 31 | \item{n.ind}{An integer specifying the number of individual genomes to simulate
 32 | (ie. the number of terminal nodes in the tree).}
 33 | 
 34 | \item{n.snps}{An integer specifying the number of genetic loci to simulate.}
 35 | 
 36 | \item{n.subs}{Either an integer or a vector (containing a distribution) that is
 37 | used to determine the number of substitutions
 38 | to occur on the phylogenetic tree for each genetic locus (see details).}
 39 | 
 40 | \item{n.snps.assoc}{An optional integer specifying the number of genetic loci}
 41 | 
 42 | \item{assoc.prob}{An optional integer (> 0, <= 100) specifying the strength of the
 43 | association between the n.snps.assoc loci and the phenotype (see details).}
 44 | 
 45 | \item{n.phen.subs}{An integer specifying the expected number of phenotypic
 46 | substitutions to occur on the phylogenetic tree (through the same process as
 47 | the n.subs parameter when n.subs is an integer (see details)).}
 48 | 
 49 | \item{phen}{An optional vector containing a phenotype for each of the
 50 | n.ind individuals if no phenotypic simulation is desired.}
 51 | 
 52 | \item{plot}{A logical indicating whether to generate a plot of the phylogenetic tree (\code{TRUE}) or not (\code{FALSE}, the default).}
 53 | 
 54 | \item{heatmap}{A logical indicating whether to produce a heatmap of the genetic distance
 55 | between the simulated genomes of the n.ind individuals.}
 56 | 
 57 | \item{reconstruct}{Either a logical indicating whether to attempt to reconstruct
 58 | a phylogenetic tree using the simulated genetic data, or one of c("UPGMA", "nj", "ml")
 59 | to specify that tree reconstruction is desired by one of these three methods
 60 | (Unweighted Pair Group Method with Arithmetic Mean, Neighbour-Joining, Maximum-Likelihood).}
 61 | 
 62 | \item{dist.dna.model}{A character string specifying the type of model to use in reconstructing the phylogenetic tree for
 63 | calculating the genetic distance between individual genomes, only used if \code{tree} is
 64 | a character string (see ?dist.dna).}
 65 | 
 66 | \item{grp.min}{An optional number between 0.1 and 0.9 to control the proportional size of the smaller phenotypic group.}
 67 | 
 68 | \item{row.names}{An optional vector containing row names for the individuals to be simulated.}
 69 | 
 70 | \item{set}{An integer (1, 2, or 3) required to select the method of generating associated loci if \code{n.snps.assoc} is not zero.}
 71 | 
 72 | \item{coaltree}{A logical indicating whether to generate a coalescent tree (\code{TRUE}, the default),
 73 | or an rtree-type tree (\code{FALSE}, see ?rtree).}
 74 | 
 75 | \item{s}{If \code{set} is 3, the \code{s} parameter controls a baseline number of substitutions to be
 76 | experienced by the phenotype and associated loci: by default, 20.}
 77 | 
 78 | \item{af}{If \code{set} is 3, the \code{af} parameter provides an association factor,
 79 | controlling the preference for association over non-association at associated loci:  by default, 10 (for a 10x preference).}
 80 | 
 81 | \item{filename.plot}{An optional character string denoting the file location for saving any plots produced; else \code{NULL}.}
 82 | 
 83 | \item{seed}{An optional integer to control the pseudo-randomisation process and allow for identical repeat runs of the function;
 84 | else \code{NULL}.}
 85 | }
 86 | \description{
 87 | This funtion allows the user to simulate a phylogenetic tree, as well as
 88 | phenotypic and genetic data, including associated and unassociated loci.
 89 | }
 90 | \details{
 91 | \strong{Homoplasy Distribution}
 92 | 
 93 | The homoplasy distribution contains the number of substitutions per site.
 94 | 
 95 | If the value of the \code{n.subs} parameter is set to an integer, this integer is
 96 | used as the parameter of a Poisson distribution from which the number of substitutions to
 97 | occur on the phylogenetic tree is drawn for each of the \code{n.snps} simulated genetic loci.
 98 | 
 99 | The \code{n.subs} argument can also be used to provide a distribution
100 | to define the number of substitutions per site.
101 | 
102 | It must be in the form of a \emph{named} vector (or table), or a vector in which the \emph{i}'th element
103 | contains the number of \emph{loci} that have been estimated to undergo \emph{i} substitutions on the tree.
104 | The vector must be of length \emph{max n.subs}, and "empty" indices must contain zeros.
105 | For example: the vector \code{n.subs = c(1833, 642, 17, 6, 1, 0, 0, 1)},
106 | could be used to define the homoplasy distribution for a dataset with 2500 loci,
107 | where the maximum number of substitutions to be undergone on the tree by any locus is 8,
108 | and no loci undergo either 6 or 7 substitutions.
109 | 
110 | 
111 | \strong{Association Probability}
112 | 
113 | The \code{assoc.prob} parameter is only functional when \code{set} is set to 1.
114 | If so, \code{assoc.prob} controls the strength of association through a process analagous to dilution.
115 | All \code{n.snps.assoc} loci are initially simulated to undergo a substitution
116 | every time the phenotype undergoes a substitution (ie. perfect association).
117 | The assoc.prob parameter then acts like a dilution factor, removing \code{(100 - assoc.prob)\%}
118 | of the substitutions that occurred during simulation under perfect association.
119 | }
120 | \examples{
121 | \dontrun{
122 | ## load example homoplasy distribution
123 | data(dist_0)
124 | str(dist_0)
125 | 
126 | ## simulate a matrix with 10 associated loci:
127 | dat <- coalescent.sim(n.ind = 100,
128 |                         n.snps = 1000,
129 |                         n.subs = dist_0,
130 |                         n.snps.assoc = 10,
131 |                         assoc.prob = 90,
132 |                         n.phen.subs = 15,
133 |                         phen = NULL,
134 |                         plot = TRUE,
135 |                         heatmap = FALSE,
136 |                         reconstruct = FALSE,
137 |                         dist.dna.model = "JC69",
138 |                         grp.min = 0.25,
139 |                         row.names = NULL,
140 |                         coaltree = TRUE,
141 |                         s = NULL,
142 |                         af = NULL,
143 |                         filename = NULL,
144 |                         set = 1,
145 |                         seed = 1)
146 | 
147 | ## examine output:
148 | str(dat)
149 | 
150 | ## isolate elements of output:
151 | snps <- dat$snps
152 | phen <- dat$phen
153 | snps.assoc <- dat$snps.assoc
154 | tree <- dat$tree
155 | }
156 | }
157 | \author{
158 | Caitlin Collins \email{caitiecollins@gmail.com}
159 | }
160 | 


--------------------------------------------------------------------------------
/man/coalescent.tree.sim.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/coalescent.tree.sim.R
 3 | \name{coalescent.tree.sim}
 4 | \alias{coalescent.tree.sim}
 5 | \title{Short one-phrase description.}
 6 | \usage{
 7 | coalescent.tree.sim(n.ind = 100, seed = NULL)
 8 | }
 9 | \arguments{
10 | \item{n.ind}{An integer specifying the number of terminal nodes desired.}
11 | 
12 | \item{seed}{An optional integer controlling the pseudo-random process underlying the tree generation.}
13 | }
14 | \description{
15 | Longer proper discription of function...
16 | }
17 | \examples{
18 | 
19 | ## basic use of fn
20 | tree <- coalescent.tree.sim(n.ind = 100, seed = 1)
21 | 
22 | ## plot output
23 | plot(tree)
24 | 
25 | }
26 | \author{
27 | Caitlin Collins \email{caitiecollins@gmail.com}
28 | }
29 | 


--------------------------------------------------------------------------------
/man/dist_0.01.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{dist_0.01}
 5 | \alias{dist_0.01}
 6 | \title{Nsubs per site with limited recombination (R = 0.01; M = 0.01).}
 7 | \format{
 8 | A named vector of length 15.
 9 | }
10 | \usage{
11 | data(dist_0.01)
12 | }
13 | \description{
14 | This vector contains a homoplasy distribution,
15 | representing the relative number of substitutions per site
16 | that occurred along a phylogenetic tree when evolution was
17 | simulated with a mutation rate of M = 0.01 and a recombination rate of R = 0.01
18 | (\emph{r/m} = 1).
19 | }
20 | \details{
21 | A per-site mutation rate of M = 0.01
22 | indicates that each site, on average, undergoes 0.01
23 | substitutions due to mutation along the phylogenetic tree.
24 | A per-site recombination rate of R = 0.01
25 | indicates that each site, on average, undergoes 0.01
26 | substitutions per site due to within-species recombination.
27 | 
28 | Each element of the vector indicates the number of genetic loci
29 | that have undergone the number of substitutions indicated by the name of that element (Nsub = i).
30 | 
31 | If visualised as a bar plot (with \code{barplot(dist_0.01)}),
32 | one would see that the Nsub distribution is arranged as if it were the counts of a histogram
33 | with index names along the x-axis, corresponding to Nsub (the number of substitutions per site),
34 | and cell counts along the y-axis, showing Nloci (the number of genetic sites undergoing Nsub=i substitutions along the tree).
35 | }
36 | \author{
37 | Caitlin Collins \email{caitiecollins@gmail.com}
38 | }
39 | \keyword{data}
40 | \keyword{datasets}
41 | 


--------------------------------------------------------------------------------
/man/dist_0.05.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{dist_0.05}
 5 | \alias{dist_0.05}
 6 | \title{Nsubs per site with recombination (R = 0.05; M = 0.01).}
 7 | \format{
 8 | A named vector of length 26.
 9 | }
10 | \usage{
11 | data(dist_0.05)
12 | }
13 | \description{
14 | This vector contains a homoplasy distribution,
15 | representing the relative number of substitutions per site
16 | that occurred along a phylogenetic tree when evolution was
17 | simulated with a mutation rate of M = 0.01 and a recombination rate of R = 0.05
18 | (\emph{r/m} = 5).
19 | }
20 | \details{
21 | A per-site mutation rate of M = 0.01
22 | indicates that each site, on average, undergoes 0.01
23 | substitutions due to mutation along the phylogenetic tree.
24 | A per-site recombination rate of R = 0.05
25 | indicates that each site, on average, undergoes 0.05
26 | substitutions per site due to within-species recombination.
27 | 
28 | Each element of the vector indicates the number of genetic loci
29 | that have undergone the number of substitutions indicated by the name of that element (Nsub = i).
30 | 
31 | If visualised as a bar plot (with \code{barplot(dist_0.05)}),
32 | one would see that the Nsub distribution is arranged as if it were the counts of a histogram
33 | with index names along the x-axis, corresponding to Nsub (the number of substitutions per site),
34 | and cell counts along the y-axis, showing Nloci (the number of genetic sites undergoing Nsub=i substitutions along the tree).
35 | }
36 | \author{
37 | Caitlin Collins \email{caitiecollins@gmail.com}
38 | }
39 | \keyword{data}
40 | \keyword{datasets}
41 | 


--------------------------------------------------------------------------------
/man/dist_0.1.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{dist_0.1}
 5 | \alias{dist_0.1}
 6 | \title{Nsubs per site with recombination (R = 0.1; M = 0.01).}
 7 | \format{
 8 | A named vector of length 31.
 9 | }
10 | \usage{
11 | data(dist_0.1)
12 | }
13 | \description{
14 | This vector contains a homoplasy distribution,
15 | representing the relative number of substitutions per site
16 | that occurred along a phylogenetic tree when evolution was
17 | simulated with a mutation rate of M = 0.01 and a recombination rate of R = 0.1
18 | (\emph{r/m} = 10).
19 | }
20 | \details{
21 | A per-site mutation rate of M = 0.01
22 | indicates that each site, on average, undergoes 0.01
23 | substitutions due to mutation along the phylogenetic tree.
24 | A per-site recombination rate of R = 0.1
25 | indicates that each site, on average, undergoes 0.1
26 | substitutions per site due to within-species recombination.
27 | 
28 | Each element of the vector indicates the number of genetic loci
29 | that have undergone the number of substitutions indicated by the name of that element (Nsub = i).
30 | 
31 | If visualised as a bar plot (with \code{barplot(dist_0.1)}),
32 | one would see that the Nsub distribution is arranged as if it were the counts of a histogram
33 | with index names along the x-axis, corresponding to Nsub (the number of substitutions per site),
34 | and cell counts along the y-axis, showing Nloci (the number of genetic sites undergoing Nsub=i substitutions along the tree).
35 | }
36 | \author{
37 | Caitlin Collins \email{caitiecollins@gmail.com}
38 | }
39 | \keyword{data}
40 | \keyword{datasets}
41 | 


--------------------------------------------------------------------------------
/man/dist_0.2.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{dist_0.2}
 5 | \alias{dist_0.2}
 6 | \title{Nsubs per site with considerable recombination (R = 0.2; M = 0.01).}
 7 | \format{
 8 | A named vector of length 30.
 9 | }
10 | \usage{
11 | data(dist_0.2)
12 | }
13 | \description{
14 | This vector contains a homoplasy distribution,
15 | representing the relative number of substitutions per site
16 | that occurred along a phylogenetic tree when evolution was
17 | simulated with a mutation rate of M = 0.01 and a recombination rate of R = 0.2
18 | (\emph{r/m} = 20).
19 | }
20 | \details{
21 | A per-site mutation rate of M = 0.01
22 | indicates that each site, on average, undergoes 0.01
23 | substitutions due to mutation along the phylogenetic tree.
24 | A per-site recombination rate of R = 0.2
25 | indicates that each site, on average, undergoes 0.2
26 | substitutions per site due to within-species recombination.
27 | 
28 | Each element of the vector indicates the number of genetic loci
29 | that have undergone the number of substitutions indicated by the name of that element (Nsub = i).
30 | 
31 | If visualised as a bar plot (with \code{barplot(dist_0.2)}),
32 | one would see that the Nsub distribution is arranged as if it were the counts of a histogram
33 | with index names along the x-axis, corresponding to Nsub (the number of substitutions per site),
34 | and cell counts along the y-axis, showing Nloci (the number of genetic sites undergoing Nsub=i substitutions along the tree).
35 | }
36 | \author{
37 | Caitlin Collins \email{caitiecollins@gmail.com}
38 | }
39 | \keyword{data}
40 | \keyword{datasets}
41 | 


--------------------------------------------------------------------------------
/man/dist_0.25.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{dist_0.25}
 5 | \alias{dist_0.25}
 6 | \title{Nsubs per site with considerable recombination (R = 0.25; M = 0.01).}
 7 | \format{
 8 | A named vector of length 34.
 9 | }
10 | \usage{
11 | data(dist_0.25)
12 | }
13 | \description{
14 | This vector contains a homoplasy distribution,
15 | representing the relative number of substitutions per site
16 | that occurred along a phylogenetic tree when evolution was
17 | simulated with a mutation rate of M = 0.01 and a recombination rate of R = 0.25
18 | (\emph{r/m} = 25).
19 | }
20 | \details{
21 | A per-site mutation rate of M = 0.01
22 | indicates that each site, on average, undergoes 0.01
23 | substitutions due to mutation along the phylogenetic tree.
24 | A per-site recombination rate of R = 0.25
25 | indicates that each site, on average, undergoes 0.25
26 | substitutions per site due to within-species recombination.
27 | 
28 | Each element of the vector indicates the number of genetic loci
29 | that have undergone the number of substitutions indicated by the name of that element (Nsub = i).
30 | 
31 | If visualised as a bar plot (with \code{barplot(dist_0.25)}),
32 | one would see that the Nsub distribution is arranged as if it were the counts of a histogram
33 | with index names along the x-axis, corresponding to Nsub (the number of substitutions per site),
34 | and cell counts along the y-axis, showing Nloci (the number of genetic sites undergoing Nsub=i substitutions along the tree).
35 | }
36 | \author{
37 | Caitlin Collins \email{caitiecollins@gmail.com}
38 | }
39 | \keyword{data}
40 | \keyword{datasets}
41 | 


--------------------------------------------------------------------------------
/man/dist_0.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{dist_0}
 5 | \alias{dist_0}
 6 | \title{Nsubs per site with no recombination (R = 0; M = 0.01).}
 7 | \format{
 8 | A named vector of length 4.
 9 | }
10 | \usage{
11 | data(dist_0)
12 | }
13 | \description{
14 | This vector contains a homoplasy distribution,
15 | representing the relative number of substitutions per site
16 | that occurred along a phylogenetic tree when evolution was
17 | simulated with a mutation rate of M = 0.01 and a recombination rate of R = 0
18 | (\emph{r/m} = 0).
19 | }
20 | \details{
21 | A per-site mutation rate of M = 0.01
22 | indicates that each site, on average, undergoes 0.01
23 | substitutions due to mutation along the phylogenetic tree.
24 | A per-site recombination rate of R = 0
25 | indicates that no recombination occurred.
26 | 
27 | Each element of the vector indicates the number of genetic loci
28 | that have undergone the number of substitutions indicated by the name of that element (Nsub = i).
29 | 
30 | If visualised as a bar plot (with \code{barplot(dist_0)}),
31 | one would see that the Nsub distribution is arranged as if it were the counts of a histogram
32 | with index names along the x-axis, corresponding to Nsub (the number of substitutions per site),
33 | and cell counts along the y-axis, showing Nloci (the number of genetic sites undergoing Nsub=i substitutions along the tree).
34 | }
35 | \author{
36 | Caitlin Collins \email{caitiecollins@gmail.com}
37 | }
38 | \keyword{data}
39 | \keyword{datasets}
40 | 


--------------------------------------------------------------------------------
/man/fwd.coalescent.sim.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fwd.coalescent.sim.R
 3 | \name{fwd.coalescent.sim}
 4 | \alias{fwd.coalescent.sim}
 5 | \title{Short one-phrase description.}
 6 | \usage{
 7 | fwd.coalescent.sim(
 8 |   n.ind = 100,
 9 |   n.snps = 10000,
10 |   n.subs = 1,
11 |   n.snps.assoc = 10,
12 |   n.subs.assoc = 15,
13 |   p = 1,
14 |   heatmap = FALSE,
15 |   reconstruct = FALSE,
16 |   dist.dna.model = "JC69",
17 |   seed = 1
18 | )
19 | }
20 | \arguments{
21 | \item{n.ind}{An integer specifying the number of individual genomes to simulate
22 | (ie. the number of terminal nodes in the tree).}
23 | 
24 | \item{n.snps}{An integer specifying the number of genetic loci to simulate.}
25 | 
26 | \item{n.subs}{Either an integer or a vector (containing a distribution) that is
27 | used to determine the number of substitutions
28 | to occur on the phylogenetic tree for each genetic locus (see details).}
29 | 
30 | \item{n.snps.assoc}{An optional integer specifying the number of genetic loci}
31 | 
32 | \item{heatmap}{A logical indicating whether to produce a heatmap of the genetic distance
33 | between the simulated genomes of the n.ind individuals.}
34 | 
35 | \item{reconstruct}{Either a logical indicating whether to attempt to reconstruct
36 | a phylogenetic tree using the simulated genetic data, or one of c("UPGMA", "nj", "ml")
37 | to specify that tree reconstruction is desired by one of these three methods
38 | (Unweighted Pair Group Method with Arithmetic Mean, Neighbour-Joining, Maximum-Likelihood).}
39 | 
40 | \item{seed}{An optional integer controlling the pseudo-random process of simulation. Two
41 | instances of coalescent.sim with the same seed and arguments will produce identical output.}
42 | 
43 | \item{assoc.prob}{An optional integer (> 0, <= 100) specifying the strength of the
44 | association between the n.snps.assoc loci and the phenotype (see details).}
45 | 
46 | \item{n.phen.subs}{An integer specifying the expected number of phenotypic
47 | substitutions to occur on the phylogenetic tree (through the same process as
48 | the n.subs parameter when n.subs is an integer (see details)).}
49 | 
50 | \item{phen}{An optional vector containing a phenotype for each of the
51 | n.ind individuals if no phenotypic simulation is desired.}
52 | }
53 | \description{
54 | Longer proper discription of function...
55 | }
56 | \details{
57 | #### n.subs ####
58 | If the value of the n.subs parameter is set to an integer, this integer is
59 | used as the parameter of a Poisson distribution from which the number of substitutions to
60 | occur on the phylogenetic tree is drawn for each of the n.snps simulated genetic loci.
61 | If n.subs is a vector containing a distribution, this is used directly (in proportion to n.snps)
62 | to define the number of substitutions per site. For example, if n.subs=c(3000, 900, 70, 20, 0, 10)
63 | and n.snps=8000, then 6000 simulated sites will undergo exactly
64 | one substitution somewhere on the phylogenetic tree, 1800 will undergo two,
65 | 140 three, 40 four, 0 five, and 20 six.
66 | #### assoc.prob ####
67 | The assoc.prob parameter controls the strength of association through a process analagous to dilution.
68 | All n.snps.assoc loci are initially simulated to undergo a substitution
69 | every time the phenotype undergoes a substitution (ie. perfect association).
70 | The assoc.prob parameter then acts like a dilution factor, removing (100 - assoc.prob)%
71 | of the substitutions that occurred during simulation under perfect association.
72 | }
73 | \author{
74 | Caitlin Collins \email{caitiecollins@gmail.com}
75 | }
76 | 


--------------------------------------------------------------------------------
/man/fwd.phen.sim.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fwd.phen.sim.R
 3 | \name{fwd.phen.sim}
 4 | \alias{fwd.phen.sim}
 5 | \title{Simulate a phenotype, from root to tips.}
 6 | \usage{
 7 | fwd.phen.sim(snps.assoc, p = 1, tree = NULL)
 8 | }
 9 | \arguments{
10 | \item{snps.assoc}{A matrix created by the \code{fwd.snp.sim} function,
11 | which indicates where genotypic substitutions occur on the tree at phenoypically-associated sites.}
12 | 
13 | \item{p}{An integer specifying the probability of phenotypic substition,
14 | given genotypic substitution (see details).}
15 | 
16 | \item{tree}{An phylo object.}
17 | }
18 | \description{
19 | [*An exploratory function:*] Having already simulated a genotype,
20 | this function allows you to simulate an associated phenotype along the tree, from root to tips.
21 | }
22 | \details{
23 | The parameter \code{p} controls the simulation of the phenotype by specifying
24 | the expected value of the number of phenotypic substitions to occur on the tree provided,
25 | given that a genotypic substitution has occurred on a particular branch of the tree.
26 | }
27 | \examples{
28 | 
29 | ## basic use of fn
30 | tree <- coalescent.tree.sim(n.ind = 100, seed = 1)
31 | 
32 | ## plot output
33 | plot(tree)
34 | 
35 | }
36 | \author{
37 | Caitlin Collins \email{caitiecollins@gmail.com}
38 | }
39 | 


--------------------------------------------------------------------------------
/man/fwd.snp.sim.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fwd.snp.sim.R
 3 | \name{fwd.snp.sim}
 4 | \alias{fwd.snp.sim}
 5 | \title{Short one-phrase description.}
 6 | \usage{
 7 | fwd.snp.sim(
 8 |   n.snps = 10000,
 9 |   n.subs = 1,
10 |   n.snps.assoc = 0,
11 |   n.subs.assoc = 15,
12 |   tree = coalescent.tree.sim(100),
13 |   heatmap = FALSE,
14 |   reconstruct = FALSE,
15 |   dist.dna.model = "JC69",
16 |   seed = 1
17 | )
18 | }
19 | \arguments{
20 | \item{snps}{description.}
21 | }
22 | \description{
23 | Longer proper discription of function...
24 | }
25 | \author{
26 | Caitlin Collins \email{caitiecollins@gmail.com}
27 | }
28 | 


--------------------------------------------------------------------------------
/man/get.ancestral.pars.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/reconstruct.R
 3 | \name{get.ancestral.pars}
 4 | \alias{get.ancestral.pars}
 5 | \title{Ancestral sequence reconstruction via parsimony}
 6 | \usage{
 7 | get.ancestral.pars(var, tree, unique.cols = FALSE)
 8 | }
 9 | \arguments{
10 | \item{var}{A matrix or vector containing a variable whose state at ancestral nodes we want to infer.}
11 | 
12 | \item{tree}{A phylo object containing a phylogenetic tree whose tips contain the same individuals as are
13 | in the elements of \code{var}, if \code{var} is a vector,
14 | or in the rows of \code{var}, if \code{var} is a matrix.}
15 | }
16 | \description{
17 | A wrapper for the \code{ancestral.pars} function from \emph{ape}. Can perform
18 | parsimonious ASR for variables in matrix or vector form.
19 | }
20 | \details{
21 | Note that the (row)names of \code{var} should match the tip.labels of \code{tree}.
22 | }
23 | \author{
24 | Caitlin Collins \email{caitiecollins@gmail.com}
25 | }
26 | 


--------------------------------------------------------------------------------
/man/get.assoc.scores.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get.sig.snps.R
 3 | \name{get.assoc.scores}
 4 | \alias{get.assoc.scores}
 5 | \title{Get significant SNPs, according to a given test of association.}
 6 | \usage{
 7 | get.assoc.scores(
 8 |   snps,
 9 |   snps.sim,
10 |   phen,
11 |   tree,
12 |   test = "terminal",
13 |   correct.prop = FALSE,
14 |   categorical = FALSE,
15 |   snps.reconstruction = NULL,
16 |   snps.sim.reconstruction = NULL,
17 |   phen.reconstruction = NULL,
18 |   unique.cols = FALSE
19 | )
20 | }
21 | \arguments{
22 | \item{snps}{A matrix containing the real snps.}
23 | 
24 | \item{snps.sim}{A matrix or list of matrices containing simulated snps.}
25 | 
26 | \item{phen}{A factor or vector containing the phenotype (only allowed to contain two levels for now).}
27 | 
28 | \item{tree}{A phylo object containing a phylogenetic tree in which the number of tips is equal to the
29 | length of \code{phen} and the number of rows of \code{snps} and \code{snps.sim}.}
30 | 
31 | \item{test}{A character string or vector containing one or more of the following available tests of association:
32 | "terminal", "simultaneous", "subsequent", "cor", "fisher". By default, the terminal test is run
33 | (note that within treeWAS, the first three tests are run in a loop by default).
34 | See details for more information on what these tests do and when they may be appropriate.}
35 | 
36 | \item{correct.prop}{A logical indicating whether the \code{"terminal"} and \code{"subsequent"} tests will be corrected for
37 | phenotypic class imbalance. Recommended if the proportion of individuals varies significantly across
38 | the levels of the phenotype (if binary) or if the phenotype is skewed (if continuous).
39 | If \code{correct.prop} is \code{FALSE} (the default),
40 | the original versions of each test will be run as described in our
41 | \href{http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1005958}{PLOS Computational Biology paper}.
42 | If \code{TRUE}, an alternate association metric (based on the phi correlation coefficient) is calculated
43 | across the terminal and all (internal and terminal) nodes, respectively.}
44 | 
45 | \item{categorical}{A logical indicating whether \code{phen} should be treated as a nominal categorical variable
46 | whose unique values should be treated as levels rather than as meaningful numbers.}
47 | }
48 | \description{
49 | Identify which SNPs are deemed to be significantly associated with a phenotype,
50 | according to a given test of association and p-value.
51 | (Serves as the treeWAS association testing function;
52 | runs the \code{assoc.test} function internally.)
53 | }
54 | \author{
55 | Caitlin Collins \email{caitiecollins@gmail.com}
56 | }
57 | 


--------------------------------------------------------------------------------
/man/get.binary.snps.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{get.binary.snps}
 4 | \alias{get.binary.snps}
 5 | \title{Reduce a genetic data matrix to only necessary columns.}
 6 | \usage{
 7 | get.binary.snps(snps, force = FALSE)
 8 | }
 9 | \arguments{
10 | \item{snps}{A genetic data matrix.}
11 | }
12 | \description{
13 | Function to reduce a genetic data matrix containing multiple columns per locus
14 | to one column for each binary locus and N columns for each N-allelic non-binary locus.
15 | }
16 | \details{
17 | This funtion identifies the number of alleles at each locus by assuming that
18 | the allele of each column is contained in the last two characters of each column name.
19 | We recommend that the columns of \code{snps} be labelled using the following four suffixes:
20 | ".a", ".c", ".g", ".t" (e.g., "Locus_123243.a", "Locus_123243.g").
21 | If you are using an alternative naming convention,
22 | but the allele is also always being denoted using the last two characters
23 | (e.g., "Locus_123243_1", "Locus_123243_2"),
24 | the function will still work if you set the argument \code{force = TRUE}.
25 | Please also be careful not to accidentally remove any purposeful duplications with repeated names;
26 | for example, if you have deliberately duplicated unique columns
27 | (e.g., by expanding according to an index returned by ClonalFrameML).
28 | }
29 | \author{
30 | Caitlin Collins \email{caitiecollins@gmail.com}
31 | }
32 | 


--------------------------------------------------------------------------------
/man/get.fitch.n.mts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fitch.R
 3 | \name{get.fitch.n.mts}
 4 | \alias{get.fitch.n.mts}
 5 | \title{Caclulate parsimony scores.}
 6 | \usage{
 7 | get.fitch.n.mts(x, tree, snps = NULL)
 8 | }
 9 | \arguments{
10 | \item{x}{A numeric matrix or vector containing two unique values with row.names matching tree tip.labels.}
11 | 
12 | \item{tree}{A phylo object.}
13 | }
14 | \description{
15 | Determine parsimony scores for all genetic loci, or a phenotypic variable, along a given tree.
16 | An extension of the fitch function available in package phangorn.
17 | }
18 | \examples{
19 | \dontrun{
20 | 
21 | ## generate a tree
22 | tree <- ape::rtree(100)
23 | ## generate snps, a matrix of 0s and 1s
24 | snps <- matrix(sample(c(0,1),100000,TRUE), nrow=100)
25 | row.names(snps) <- tree$tip.label
26 | 
27 | ## run function
28 | out <- get.fitch.n.mts(x=snps, tree)
29 | 
30 | ## examine output
31 | str(out)
32 | table(out)
33 | hist(out)
34 | }
35 | 
36 | }
37 | \author{
38 | Caitlin Collins \email{caitiecollins@gmail.com}
39 | }
40 | 


--------------------------------------------------------------------------------
/man/get.original.loci.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/readCFML.R
 3 | \name{get.original.loci}
 4 | \alias{get.original.loci}
 5 | \title{\code{(read.CFML+)} Get original sequence positions of polymorphic loci.}
 6 | \usage{
 7 | get.original.loci(
 8 |   seqs,
 9 |   dat,
10 |   sig.snps.names,
11 |   n.bp = 50,
12 |   suff.length = 2,
13 |   csv = TRUE,
14 |   csv.prefix = NULL,
15 |   NA.thresh = 0.2
16 | )
17 | }
18 | \arguments{
19 | \item{seqs}{A \code{DNAbin} object containing the original sequences
20 | input into ClonalFrameML (see details).}
21 | 
22 | \item{dat}{An object containing the output of the \code{read.CFML} function.}
23 | 
24 | \item{sig.snps.names}{A character vector containing the names of
25 | polymorphic loci whose original sequence positions you desire (see details).}
26 | 
27 | \item{n.bp}{An integer specifying the desired length of the flanking
28 | sequence to be returned; by default, 50 (see details).}
29 | 
30 | \item{suff.length}{An integer specifying the suffix length
31 | of \code{snps} elements; by default, 2 (see details).}
32 | 
33 | \item{csv}{A logical indicating whether to save the results as a CSV file.}
34 | 
35 | \item{csv.prefix}{An optional character vector specifying a directory and
36 | filename prefix for the CSV file (if \code{csv=TRUE}); default name/suffix, "sig_loci.csv".
37 | \emph{Please be careful: Any existing file of that name will be overwritten!}}
38 | 
39 | \item{NA.thresh}{A number between 0 and 1 indicating the max allowable
40 | proportion of NAs that the output sequence fragments can contain.
41 | (if a sequence fragment from row 1 exceeds this threshold,
42 | a sufficiently complete sequence fragment will be sought in subsequent rows); by default, 0.2.}
43 | }
44 | \value{
45 | \code{get.original.loci} returns a list containing:
46 | \enumerate{
47 | \item \code{loci}: The original sequence positions for all polymorphic loci in \code{seqs}.
48 | \item \code{loci.sig}: The original sequence positions for all polymorphic loci in \code{sig.snps.names}.
49 | \item \code{seq.sig}: A list of length \code{sig.snps.names} containing sequence fragments of length \code{n.bp}.
50 | }
51 | }
52 | \description{
53 | If you ran \code{read.CFML} on ClonalFrameML output before running \code{treeWAS},
54 | this function can be used to identify the original sequence positions of your polymorphic loci.
55 | E.g., If \code{treeWAS} identified loci "1417.a" and "2017.g" as significant, \code{get.original.loci}
56 | can identify corresponding sequence positions "1165743" and "1741392" and return
57 | flanking sequence segments.
58 | }
59 | \details{
60 | \strong{seqs} must contain ClonalFrameML \emph{input*},
61 | which can be read in from fasta with \code{read.dna("FILENAME.fasta", format="fasta")}
62 | (*not the ClonalFrameML output file "ML_sequence.fasta" or the \code{seqs} element of \code{read.CFML} output).\cr\cr
63 | \strong{sig.snps.names} can contain any set of \code{colnames(snps)}, for example,
64 | the set of significant loci identified by \code{treeWAS} (\code{out$treeWAS.combined$treeWAS.combined}).\cr\cr
65 | \strong{n.bp} specifies the total length of flanking sequence
66 | (drawn from the first row of \code{seqs} only),
67 | half of which will be on either side of each locus in \code{sig.snps.names}.
68 | Each such sequence will be of total length \code{n.bp+1}, arranged (e.g., with \code{n.bp = 50}) as:\cr
69 | <---25bp---><locus.i><---25bp--->.\cr\cr
70 | \strong{suff.length} tells the \code{removeLastN} function how many characters are used to specify
71 | the allele in \code{sig.snps.names} and \code{colnames(snps)}. For names of the form:
72 | "1234.a", \code{suff.length = 2} (note that the decimal counts as a character).
73 | If \code{snps} names are purely numeric with no alleles indicated
74 | (i.e., they already match names in \code{seqs}), then set \code{suff.length = 0}.
75 | }
76 | \examples{
77 | ## Example ##
78 | \dontrun{
79 | fasta <- "./filename.fas"
80 | prefix <- "/filename.fas.out"
81 | 
82 | ## read in original fasta sequence:
83 | seqs <- read.dna(fasta, format="fasta")
84 | 
85 | ## load saved read.CFML output
86 | dat <- get(load(sprintf('\%s.read.CFML_dat.Rdata', prefix)))
87 | 
88 | ## get sig snps from treeWAS results
89 | sig.snps.names <- out$treeWAS.combined$treeWAS.combined
90 | 
91 | out <- get.original.loci(seqs, dat, sig.snps.names, n.bp=40, csv=T, csv.prefix="/filename")
92 | }
93 | 
94 | }
95 | \author{
96 | Caitlin Collins \email{caitiecollins@gmail.com}
97 | }
98 | 


--------------------------------------------------------------------------------
/man/get.score3.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/subsequent.test.R
 3 | \name{get.score3}
 4 | \alias{get.score3}
 5 | \title{Short one-phrase description.}
 6 | \usage{
 7 | get.score3(Pa, Pd, Sa, Sd, l = NULL)
 8 | }
 9 | \arguments{
10 | \item{Pa}{A numeric value containing either the state,
11 | or the probability of the state, of the phenotype at a given \emph{ancestral} node.}
12 | 
13 | \item{Pd}{A numeric value containing either the state,
14 | or the probability of the state, of the phenotype at a given \emph{descendant} node.}
15 | 
16 | \item{Sa}{A numeric value containing either the state,
17 | or the probability of the state, of SNPi at a given \emph{ancestral} node.}
18 | 
19 | \item{Sd}{A numeric value containing either the state,
20 | or the probability of the state, of SNPi at a given \emph{descendant} node.}
21 | 
22 | \item{l}{A numeric value specifying the length of the branch in the phylogenetic tree
23 | that joins the ancestral and descendant node.}
24 | }
25 | \description{
26 | Longer proper discription of function...
27 | }
28 | \examples{
29 | ## Example ##
30 | \dontrun{
31 | ## basic use of fn
32 | tree <- coalescent.tree.sim(n.ind = 100, seed = 1)
33 | }
34 | }
35 | \author{
36 | Caitlin Collins \email{caitiecollins@gmail.com}
37 | }
38 | 


--------------------------------------------------------------------------------
/man/get.sig.snps.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get.sig.snps.R
 3 | \name{get.sig.snps}
 4 | \alias{get.sig.snps}
 5 | \title{Get significant SNPs, according to a given test of association.}
 6 | \usage{
 7 | get.sig.snps(
 8 |   corr.dat,
 9 |   corr.sim,
10 |   snps.names,
11 |   test = "terminal",
12 |   p.value = 0.01,
13 |   p.value.correct = "bonf",
14 |   p.value.by = "count"
15 | )
16 | }
17 | \arguments{
18 | \item{corr.dat}{A vector containing the association score values, for a given association test, for the real data.}
19 | 
20 | \item{corr.sim}{A vector containing the association score values, for a given association test, for the simulated data.}
21 | 
22 | \item{snps.names}{The column names of the original \code{snps} matrix from which the association score values
23 | in \code{corr.dat} were derived.}
24 | 
25 | \item{test}{A character string or vector containing one or more of the following available tests of association:
26 | "terminal", "simultaneous", "subsequent", "cor", "fisher". By default, the terminal test is run
27 | (note that within treeWAS, the first three tests are run in a loop by default).
28 | See details for more information on what these tests do and when they may be appropriate.}
29 | 
30 | \item{p.value}{A single number specifying the p.value below which correlations are deemed to be 'significant'.}
31 | 
32 | \item{p.value.correct}{Specify if/how to correct for multiple testing:
33 | either FALSE, or one of 'bonf' or 'fdr' (indicating, respectively,
34 | the Bonferroni and False Discovery Rate corrections). By default, 'bonf' is selected}
35 | 
36 | \item{p.value.by}{Specify how to determine the location of the p.value threshold:
37 | either 'count' or 'density' (indicating, respectively, that the p.value threshold should
38 | be determined by exact count or with the use of a density function).}
39 | }
40 | \description{
41 | Identify which SNPs are deemed to be significantly associated with a phenotype,
42 | according to a given test of association and p-value.
43 | (Serves as the treeWAS association testing function;
44 | runs the \code{assoc.test} function internally.)
45 | }
46 | \author{
47 | Caitlin Collins \email{caitiecollins@gmail.com}
48 | }
49 | 


--------------------------------------------------------------------------------
/man/get.tip.order.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{get.tip.order}
 4 | \alias{get.tip.order}
 5 | \title{Get the order of the tip labels of a phylogenetic tree as plotted.}
 6 | \usage{
 7 | get.tip.order(tree, original.format = TRUE)
 8 | }
 9 | \arguments{
10 | \item{tree}{An object of class phylo containing a tree
11 | whose tip order is desired to be known.}
12 | 
13 | \item{original.format}{A logical, indicating whether to use the original
14 | format of this function (kept for consistency's sake) or the new format.
15 | (For now, if you find one isn't giving you sensible output,
16 | please try changing this argument.)}
17 | }
18 | \description{
19 | Longer proper discription of function...
20 | }
21 | \author{
22 | Caitlin Collins \email{caitiecollins@gmail.com}
23 | }
24 | 


--------------------------------------------------------------------------------
/man/get.unique.matrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{get.unique.matrix}
 4 | \alias{get.unique.matrix}
 5 | \title{Get unique rows/columns of a matrix with an index vector.}
 6 | \usage{
 7 | get.unique.matrix(data, MARGIN = 2, silent = TRUE)
 8 | }
 9 | \arguments{
10 | \item{data}{A matrix or data.frame, potentially containing
11 | non-unique patterns in its rows or columns.}
12 | 
13 | \item{MARGIN}{A single integer specifying the array margin to be held fixed.
14 | (To get unique \emph{rows}, select \code{MARGIN} = 1;
15 | for unique \emph{columns}, select \code{MARGIN} = 2.)}
16 | }
17 | \value{
18 | A list with the following elements:
19 | \itemize{
20 |    \item{\code{index} \item{An index vector containing the indices (row numbers),
21 |          in a matrix composed only of unique rows,
22 |          to which each row in the original matrix maps.}}
23 |    \item{\code{unique.data} \item{A new matrix
24 |          containing only the unique rows of the input matrix.}}
25 | }
26 | }
27 | \description{
28 | A wrapper for the \code{table.matrix} function that assigns consecutive
29 | row or column names to the output matrix's unique rows or columns.
30 | }
31 | \details{
32 | An extension of the base \code{unique.matrix} function,
33 | \code{get.unique.matrix} returns a unique matrix
34 | (by removing duplicate rows or columns), as well as
35 | an index vector that maps each row/column in the original matrix
36 | to the corresponding unique row or column in the deduplicated unique matrix.
37 | }
38 | \author{
39 | Caitlin Collins \email{caitiecollins@gmail.com}
40 | }
41 | 


--------------------------------------------------------------------------------
/man/ggplotbg.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{ggplotbg}
 4 | \alias{ggplotbg}
 5 | \title{Mimic ggplot2 Background}
 6 | \usage{
 7 | ggplotbg(
 8 |   bg = transp("lightgray", 0.5),
 9 |   x.ax = FALSE,
10 |   y.ax = FALSE,
11 |   box = TRUE,
12 |   grid = TRUE,
13 |   grid.col = "white",
14 |   grid.nx = NULL,
15 |   grid.ny = NULL,
16 |   grid.lwd = 1,
17 |   grid.lty = 1
18 | )
19 | }
20 | \arguments{
21 | \item{bg}{The background colour, by default ``lightgray'' with 50\% transparency.}
22 | 
23 | \item{x.ax}{A logical specifying whether to re-draw the x-axis.}
24 | 
25 | \item{y.ax}{A logical specifying whether to re-draw the y-axis.}
26 | 
27 | \item{box}{A logical specifying whether to draw a box around the plotting area.}
28 | 
29 | \item{grid}{A logical specifying whether to draw a grid across the background within the plotting area.}
30 | 
31 | \item{grid.col}{The color of the gridlines, ``white'' by default. Only used if grid is set to TRUE.}
32 | 
33 | \item{grid.nx}{An optional integer to specify the number of gridlines to be drawn along the x-axis.}
34 | 
35 | \item{grid.ny}{An optional integer to specify the number of gridlines to be drawn along the y-axis.}
36 | 
37 | \item{grid.lwd}{An integer specifying the lwd (line weight) of the gridlines; by default, set to 1.}
38 | 
39 | \item{grid.lty}{An integer specifying the line type to be used for the gridlines; by default, set to 1 (i.e., solid lines).}
40 | }
41 | \description{
42 | Get an imitation ggplot2-style background for plots made outside ggplot2
43 | }
44 | \details{
45 | This function must be sandwiched between two instances
46 | of the function used to generate the (foreground) plot
47 | to which you are hoping to add this background.
48 | \emph{Before} running the \code{ggplot.bg} function, you need to run your plot function
49 | so that \code{ggplot.bg} knnows how to set the axes.
50 | \emph{After} running the \code{ggplot.bg} function, you need to run your plot function
51 | again \emph{with the added argument} \code{add=TRUE}
52 | so that your plot can be overlayed on top of the background.
53 | }
54 | \author{
55 | Caitlin Collins \email{caitiecollins@gmail.com}
56 | }
57 | 


--------------------------------------------------------------------------------
/man/heatmap.DNAbin.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/heatmap.DNAbin.R
 3 | \name{heatmap.DNAbin}
 4 | \alias{heatmap.DNAbin}
 5 | \title{Short one-phrase description.}
 6 | \usage{
 7 | heatmap.DNAbin(dna, dist.dna.model = "JC69")
 8 | }
 9 | \arguments{
10 | \item{dna}{A DNAbin object.}
11 | 
12 | \item{dist.dna.model}{A character string specifying the type of model to use in
13 | calculating the genetic distance between individual genomes (see ?dist.dna).}
14 | }
15 | \description{
16 | Longer proper discription of function...
17 | }
18 | \author{
19 | Caitlin Collins \email{caitiecollins@gmail.com}
20 | }
21 | 


--------------------------------------------------------------------------------
/man/keepFirstN.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{keepFirstN}
 4 | \alias{keepFirstN}
 5 | \title{Truncate to keep only the \emph{first} N characters.}
 6 | \usage{
 7 | keepFirstN(x, n)
 8 | }
 9 | \arguments{
10 | \item{x}{A vector whose element(s) will be truncated.}
11 | 
12 | \item{n}{An integer specifying the number of characters to \emph{keep}.}
13 | }
14 | \description{
15 | Truncate an element, or each element of a vector, by
16 | removing all but the first N characters of each element.
17 | }
18 | \author{
19 | Caitlin Collins \email{caitiecollins@gmail.com}
20 | }
21 | 


--------------------------------------------------------------------------------
/man/keepLastN.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{keepLastN}
 4 | \alias{keepLastN}
 5 | \title{Truncate to keep only the \emph{last} N characters.}
 6 | \usage{
 7 | keepLastN(x, n)
 8 | }
 9 | \arguments{
10 | \item{x}{A vector whose element(s) will be truncated.}
11 | 
12 | \item{n}{An integer specifying the number of characters to \emph{keep}.}
13 | }
14 | \description{
15 | Truncate an element, or each element of a vector, by
16 | removing all but the last N characters of each element.
17 | }
18 | \author{
19 | Caitlin Collins \email{caitiecollins@gmail.com}
20 | }
21 | 


--------------------------------------------------------------------------------
/man/manhattan.plot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot.sig.snps.R
 3 | \name{manhattan.plot}
 4 | \alias{manhattan.plot}
 5 | \title{Manhattan Plot}
 6 | \usage{
 7 | manhattan.plot(
 8 |   p.vals,
 9 |   x = c(1:length(p.vals)),
10 |   col = "funky",
11 |   transp = 0.25,
12 |   sig.thresh = NULL,
13 |   thresh.col = "red",
14 |   snps.assoc = NULL,
15 |   snps.assoc.col = "red",
16 |   jitter.amount = 1e-05,
17 |   min.p = NULL,
18 |   log10 = FALSE,
19 |   ylab = NULL,
20 |   main.title = "Manhattan plot"
21 | )
22 | }
23 | \arguments{
24 | \item{p.vals}{A numeric vector containing p-values or association score values for each genetic locus.}
25 | }
26 | \description{
27 | Generate a Manhattan plot showing the association score values or p-values (y-axis)
28 | for each locus (x-axis) tested by an association test.
29 | }
30 | \author{
31 | Caitlin Collins \email{caitiecollins@gmail.com}
32 | }
33 | 


--------------------------------------------------------------------------------
/man/memfree.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{memfree}
 4 | \alias{memfree}
 5 | \title{Get the current amount of available memory.}
 6 | \usage{
 7 | memfree(OS = NULL)
 8 | }
 9 | \arguments{
10 | \item{OS}{A character string indicating the operating system of the machine in question.
11 | Can be one of "Windows", "Mac" (or "Darwin"), or "Linux". If OS is NULL (the default),
12 | OS will be set to Sys.info()["sysname"].}
13 | }
14 | \description{
15 | Function to determine how much memory (in GB) is currently available for use
16 | on your PC.
17 | }
18 | \author{
19 | Caitlin Collins \email{caitiecollins@gmail.com}
20 | }
21 | 


--------------------------------------------------------------------------------
/man/pair.tests.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pair.tests.R
 3 | \name{pair.tests}
 4 | \alias{pair.tests}
 5 | \title{Pairwise tests for categorical phenotypes}
 6 | \usage{
 7 | pair.tests(x, y, z, method = "bonf", digits = 3)
 8 | }
 9 | \arguments{
10 | \item{x}{A contingency table (snps[,i] x phen) for score 1 (\code{terminal.test} 
11 | with \code{correct.prop = TRUE}, \code{categorical = TRUE}).}
12 | 
13 | \item{y}{A vector of values containing pairwise score 2 (\code{simultaneous.test} 
14 | with \code{categorical = TRUE}) results for snps[,i].}
15 | 
16 | \item{z}{A contingency table (snps.rec[,i] x phen.rec) for score 3 (\code{subsequent.test} 
17 | with \code{correct.prop = TRUE}, \code{categorical = TRUE}).}
18 | }
19 | \description{
20 | Internal function to calculate treeWAS 
21 | terminal, simultaneous, subsequent tests, 
22 | and chi-squared p-values for a given snp across pairs of
23 | phenotype levels.
24 | }
25 | \examples{
26 | ## Example ##
27 | \dontrun{
28 | ## basic use of fn
29 | out <- pair.tests(x, y, z)
30 | }
31 | 
32 | }
33 | \author{
34 | Caitlin Collins \email{caitiecollins@gmail.com}
35 | }
36 | 


--------------------------------------------------------------------------------
/man/phen.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{phen}
 5 | \alias{phen}
 6 | \title{A binary phenotype.}
 7 | \format{
 8 | A named vector of length 100.
 9 | }
10 | \usage{
11 | data(phen)
12 | }
13 | \description{
14 | This vector specifies the phenotype of each individual.
15 | In this case, the phenotype is a binary variable.
16 | Because the phenotypic vector is encoded as a factor
17 | with two possible phenotypic states, "A" and "B",
18 | which may be represented by the numeric values 1 and 2 (as in \code{str(phen)}).
19 | }
20 | \details{
21 | Each individual in the sample is represented by a unique identifier (name)
22 | which corresponds to the name of one element of the phenotypic vector.
23 | Each element of the phenotypic vector gives the phenotypic value of the named individual.
24 | }
25 | \author{
26 | Caitlin Collins \email{caitiecollins@gmail.com}
27 | }
28 | \keyword{data}
29 | \keyword{datasets}
30 | 


--------------------------------------------------------------------------------
/man/phen.cont.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{phen.cont}
 5 | \alias{phen.cont}
 6 | \title{A continuous phenotype.}
 7 | \format{
 8 | A named vector of length 533.
 9 | }
10 | \usage{
11 | data(phen.cont)
12 | }
13 | \description{
14 | This vector specifies the phenotype of each individual.
15 | In this case, the phenotype is a continuous numeric value.
16 | }
17 | \details{
18 | Each individual in the sample is represented by a unique identifier (name)
19 | which corresponds to the name of one element of the phenotypic vector.
20 | Each element of the phenotypic vector gives the phenotypic value of the named individual.
21 | 
22 | Note that, due to some skew in the distribution of this continuous variable,
23 | it may be useful to transform the phenotype by rank prior to analysis by treeWAS,
24 | as in \code{data(phen.cont.rank)} (see the treeWAS vignette).
25 | % (see \code{vignette("treeWAS")}).
26 | }
27 | \author{
28 | Caitlin Collins \email{caitiecollins@gmail.com}
29 | }
30 | \keyword{data}
31 | \keyword{datasets}
32 | 


--------------------------------------------------------------------------------
/man/phen.cont.rank.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{phen.cont.rank}
 5 | \alias{phen.cont.rank}
 6 | \title{A rank-transformed continuous phenotype.}
 7 | \format{
 8 | A named vector of length 533.
 9 | }
10 | \usage{
11 | data(phen.cont.rank)
12 | }
13 | \description{
14 | This vector specifies the phenotype of each individual.
15 | In this case, the phenotype is a rank, that has been derived by
16 | rank-ordering the elements of the original continuous phenotype (\code{data(phen.cont)})
17 | from lowest to highest.
18 | Transforming by rank prior to analysis by treeWAS can be useful
19 | for continuous phenotypic variables that are highly skewed or contain significant outliers
20 | (see the treeWAS vignette).
21 | % (see \code{vignette("treeWAS")}).
22 | }
23 | \details{
24 | Each individual in the sample is represented by a unique identifier (name)
25 | which corresponds to the name of one element of the phenotypic vector.
26 | Each element of the phenotypic vector gives the phenotypic value of the named individual.
27 | }
28 | \author{
29 | Caitlin Collins \email{caitiecollins@gmail.com}
30 | }
31 | \keyword{data}
32 | \keyword{datasets}
33 | 


--------------------------------------------------------------------------------
/man/phen.plot.col.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{phen.plot.col}
 5 | \alias{phen.plot.col}
 6 | \title{Phenotypic tree-colouring schemes.}
 7 | \format{
 8 | A list of length 5.
 9 | }
10 | \usage{
11 | data(phen.plot.col)
12 | }
13 | \description{
14 | A list containing the colour values that \code{plot_phen} generates to represent
15 | the states and substitutions of the phenotypic variable (\code{data(phen)})
16 | along the phylogenetic tree (\code{data(tree)}), with \code{plot_phen(tree, phen.nodes=phen)}.
17 | You are unlikely to have to interact with this list,
18 | as the colours are automatically plotted by the \code{plot_phen} function.
19 | }
20 | \details{
21 | The five elements of this list give the colour schemes used to indicate the phenotypic state at:
22 | edge.labels, edges, all.nodes, internal.nodes, and tip.labels.
23 | }
24 | \author{
25 | Caitlin Collins \email{caitiecollins@gmail.com}
26 | }
27 | \keyword{data}
28 | \keyword{datasets}
29 | 


--------------------------------------------------------------------------------
/man/phen.reconstruction.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{phen.reconstruction}
 5 | \alias{phen.reconstruction}
 6 | \title{The ancestral state reconstruction of a binary phenotype.}
 7 | \format{
 8 | A named vector of length 199.
 9 | }
10 | \usage{
11 | data(phen.reconstruction)
12 | }
13 | \description{
14 | This vector contains the terminal and ancestral states of a binary phenotypic variable (\code{data(phen)}).
15 | The observed phenotypic states of sampled individuals
16 | (i.e., those represented at the terminal nodes of a phylogenetic tree)
17 | are presented first, in elements 1:N (here 1:100).
18 | The unobserved ancestral states of the phenotype at internal nodes have been
19 | inferred via ancestral state reconstruction, using \code{asr(phen, tree)}.
20 | }
21 | \details{
22 | Like the original phenotypic vector (\code{data(phen)}),
23 | \code{phen.reconstruction} is a binary variable that is encoded as a factor
24 | with two possible phenotypic states, "A" and "B",
25 | which may be represented by the numeric values 1 and 2 (as in \code{str(phen.reconstruction)}).
26 | 
27 | Each individual in the sample is represented by a unique identifier (name)
28 | which corresponds to the name of one element of the phenotypic vector.
29 | (Internal node names have been generated during ancestral state reconstruction.)
30 | Each element of the phenotypic vector gives the phenotypic value of the named individual.
31 | }
32 | \author{
33 | Caitlin Collins \email{caitiecollins@gmail.com}
34 | }
35 | \keyword{data}
36 | \keyword{datasets}
37 | 


--------------------------------------------------------------------------------
/man/phen.sim.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/phen.sim.R
 3 | \name{phen.sim}
 4 | \alias{phen.sim}
 5 | \title{Short one-phrase description.}
 6 | \usage{
 7 | phen.sim(tree, n.subs = 15, grp.min = 0.2, n.subs.var = TRUE, seed = NULL)
 8 | }
 9 | \arguments{
10 | \item{tree}{An phylo object.}
11 | 
12 | \item{n.subs}{An integer controlling the phenotypic substition rate (see details).}
13 | 
14 | \item{grp.min}{An optional numeric value < 0.5 specifying the minimum accepted proportion of terminal nodes
15 | to be in the minor phenotypic group. It may be useful to specify a \code{grp.min} of,
16 | for example, 0.2 (the default) to prevent excessive imbalance in the phenotypic group sizes. However,
17 | it is important to note that (at least for the time being) \code{grp.min} values closer to
18 | 0.5 are likely to cause the computational time of \code{phen.sim} to increase substantially,
19 | as the function will run until acceptable group sizes are randomly generated.}
20 | 
21 | \item{seed}{An optional integer used to set the seed and control the pseudo-random process used in
22 | \code{phen.sim}, enabling the repeatable regeneration of identical output.}
23 | }
24 | \description{
25 | The parameter n.subs controls the simulation of the phenotype by specifying
26 | the expected value of the number of phenotypic substitions to occur on the tree provided.
27 | The true number of phenotypic substitions is drawn from a Poisson distribution with parameter n.subs.
28 | }
29 | \details{
30 | Longer proper discription of function...
31 | }
32 | \examples{
33 | 
34 | ## basic use of fn
35 | tree <- coalescent.tree.sim(n.ind = 100, seed = 1)
36 | 
37 | ## plot output
38 | plot(tree)
39 | 
40 | }
41 | \author{
42 | Caitlin Collins \email{caitiecollins@gmail.com}
43 | }
44 | 


--------------------------------------------------------------------------------
/man/plot_phen.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/plot.phen.R
  3 | \name{plot_phen}
  4 | \alias{plot_phen}
  5 | \title{Plot the states of a phenotype or genotype along a phylogenetic tree.}
  6 | \usage{
  7 | plot_phen(
  8 |   tree,
  9 |   phen.nodes,
 10 |   snp.nodes = NULL,
 11 |   plot = TRUE,
 12 |   RTL = FALSE,
 13 |   LTR.snp = FALSE,
 14 |   main.title = NULL,
 15 |   align.tip.label = FALSE,
 16 |   show.axis = TRUE,
 17 |   ...
 18 | )
 19 | }
 20 | \arguments{
 21 | \item{tree}{A phylo object.}
 22 | 
 23 | \item{phen.nodes}{A vector containing the phenotypic state of either
 24 | (i) only terminal nodes in tree or
 25 | (ii) all nodes, terminal and internal in tree.}
 26 | 
 27 | \item{snp.nodes}{An optional vector containing the states of
 28 | a second variable (e.g., a genotypic variable) for either
 29 | the terminal nodes or all nodes in the tree.}
 30 | 
 31 | \item{plot}{A logical specifying whether to display a plot
 32 | of the inputted phylogenetic tree with edges coloured to show the
 33 | simulated phenotypic substitution process.}
 34 | 
 35 | \item{RTL}{A logical variable indicating whether to plot the
 36 | first or only tree from right to left (TRUE),
 37 | or left to right (FALSE, the default).}
 38 | 
 39 | \item{LTR.snp}{A logical variable indicating whether to plot the
 40 | optional second tree from left to right (TRUE),
 41 | or right to left (FALSE, the default).}
 42 | 
 43 | \item{main.title}{Either NULL or a character vector specifying a main title for the plot.}
 44 | 
 45 | \item{align.tip.label}{A logical indicating whether to align tip labels with each other (TRUE) or
 46 | to place tip labels at terminal nodes (FALSE, the default).}
 47 | 
 48 | \item{show.axis}{A logical indicating whether to add an axis showing the scale of branch lengths
 49 | at the foot of the plot with \code{axisPhylo} (TRUE, the default) or not (FALSE).}
 50 | }
 51 | \description{
 52 | This function is designed to visualise the reconstructed ancestral states of a variable along a phylogenetic tree.
 53 | It uses colour to represent the states of terminal and internal nodes (if available),
 54 | indicating changes between states by grey branches (except in the case of truly continuous variables).
 55 | }
 56 | \details{
 57 | Ancestral states must be inferred in advance, for example, using function \code{asr}.
 58 | States are then shown in the colour of terminal node labels and the colour of the edges of the tree.
 59 | If only terminal states are available, these can be plotted along the tips of the tree.
 60 | If desired, a second variable, for example, a particular SNP or genetic locus, can be shown along
 61 | a second phylogeny. In this case, the second variable will be shown on a toplogically identical tree,
 62 | which will be plotted from right to left, mirroring the first tree along the vertical axis of the plotting window.
 63 | The \code{RTL} and \code{LTR.snp} arguments can be used to change the
 64 | orientation/direction of the first and/or second tree.
 65 | }
 66 | \examples{
 67 | 
 68 | ## Example 1 ##
 69 | \dontrun{
 70 | ## load phylogenetic and phenotypic data:
 71 | data(tree)
 72 | data(phen)
 73 | 
 74 | ## reconstruct phenotypic ancestral states:
 75 | phen.rec <- asr(var=phen, tree=tree, type="parsimony", method="discrete")
 76 | 
 77 | ## plot phenotype along tree:
 78 | plot_phen(tree, phen.nodes=phen.rec)
 79 | }
 80 | 
 81 | 
 82 | ## Example 2 ##
 83 | \dontrun{
 84 | ## load phylogenetic and phenotypic data:
 85 | data(tree)
 86 | data(phen)
 87 | 
 88 | ## load genotypic data:
 89 | data(snps)
 90 | 
 91 | ## reconstruct phenotypic ancestral states:
 92 | phen.rec <- asr(var=phen, tree=tree, type="parsimony", method="discrete")
 93 | 
 94 | ## reconstruct genotypic ancestral states:
 95 | snps.rec <- asr(var=snps, tree=tree, type="parsimony", method="discrete")
 96 | 
 97 | ## plot both the phenotype and a genotype along tree:
 98 | plot_phen(tree, phen.nodes=phen.rec, snp.nodes=snps.rec[,1])
 99 | }
100 | 
101 | }
102 | \author{
103 | Caitlin Collins \email{caitiecollins@gmail.com}
104 | }
105 | 


--------------------------------------------------------------------------------
/man/plot_prob_phen.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fwd.plot.prob.phen.R
 3 | \name{plot_prob_phen}
 4 | \alias{plot_prob_phen}
 5 | \title{Plot the probability of association, given \code{p} and \code{n.snps.assoc}.}
 6 | \usage{
 7 | plot_prob_phen(p = 0.5, n.snps.assoc = 10)
 8 | }
 9 | \arguments{
10 | \item{p}{A numeric value indicating the probability of substitution, at each site, along the tree.}
11 | 
12 | \item{n.snps.assoc}{An integer specifying the number of genetic loci that are associated with the phenotype.}
13 | }
14 | \description{
15 | [*For use with the 'fwd.-.sim' functions:*]
16 | Plot the cumulative probability of association (Pr(phen=1)), with a given value of \code{p},
17 | as the number of associated sites (SNPi=1) increases from i=0 to i=\code{n.snps.assoc}.
18 | }
19 | \examples{
20 | \dontrun{
21 | ## basic use of fn ##
22 | ## compare probability of having phenotype with 10 SNPs at varying p:
23 | plot_prob_phen(p=0.8, n.snps.assoc=10)
24 | plot_prob_phen(p=0.5, n.snps.assoc=10)
25 | plot_prob_phen(p=0.2, n.snps.assoc=10)
26 | }
27 | }
28 | \author{
29 | Caitlin Collins \email{caitiecollins@gmail.com}
30 | }
31 | 


--------------------------------------------------------------------------------
/man/plot_sig_snps.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot.sig.snps.R
 3 | \name{plot_sig_snps}
 4 | \alias{plot_sig_snps}
 5 | \title{Plot null distribution and significant sites.}
 6 | \usage{
 7 | plot_sig_snps(
 8 |   corr.dat,
 9 |   corr.sim,
10 |   corr.sim.subset = NULL,
11 |   sig.corrs = NULL,
12 |   sig.snps = NULL,
13 |   sig.thresh = NULL,
14 |   test = NULL,
15 |   sig.snps.col = "blue",
16 |   hist.col = rgb(0, 0, 1, 0.5),
17 |   hist.subset.col = rgb(1, 0, 0, 0.5),
18 |   thresh.col = "seasun",
19 |   snps.assoc = NULL,
20 |   snps.assoc.col = "red",
21 |   bg = "lightgray",
22 |   grid = TRUE,
23 |   freq = FALSE,
24 |   plot.null.dist = TRUE,
25 |   plot.dist = FALSE,
26 |   main.title = TRUE,
27 |   ...
28 | )
29 | }
30 | \arguments{
31 | \item{arg}{Description.}
32 | }
33 | \description{
34 | Plot a histogram of the null distribution,
35 | indicating the significance threshold and
36 | the names and association scores of significant sites.
37 | }
38 | \author{
39 | Caitlin Collins \email{caitiecollins@gmail.com}
40 | }
41 | 


--------------------------------------------------------------------------------
/man/print.treeWAS.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/treeWAS.R
 3 | \name{print.treeWAS}
 4 | \alias{print.treeWAS}
 5 | \title{Print \code{treeWAS} output.}
 6 | \usage{
 7 | \method{print}{treeWAS}(x, sort.by.p = FALSE, digits = 3)
 8 | }
 9 | \arguments{
10 | \item{x}{The output returned by \code{treeWAS}.}
11 | 
12 | \item{sort.by.p}{A logical indicating whether to sort the results by decreasing p-value (\code{TRUE})
13 | or by locus (\code{FALSE}, the default).}
14 | }
15 | \description{
16 | Print the results of \code{treeWAS}, excluding longer data elements within the output.
17 | }
18 | \author{
19 | Caitlin Collins \email{caitiecollins@gmail.com}
20 | }
21 | 


--------------------------------------------------------------------------------
/man/read.CFML.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/readCFML.R
 3 | \name{read.CFML}
 4 | \alias{read.CFML}
 5 | \title{Convert ClonalFrameML output.}
 6 | \usage{
 7 | read.CFML(prefix, tree = NULL, plot = TRUE, suff.length = 2)
 8 | }
 9 | \arguments{
10 | \item{prefix}{A character string containing the prefix of all file names to be read in.}
11 | }
12 | \value{
13 | read.CFML returns a list containing:
14 | (i) \code{tree}: The phylogenetic tree.
15 | (ii) \code{snps}: The binary genetic data matrix of polymorphic loci.
16 | (iii) \code{snps.rec}: The genetic data reconstruction matrix.
17 | (iv) \code{seqs}: The genetic data sequences (polymorphic loci only), a \code{DNAbin} object.
18 | (v) \code{index}: The index vector, indicating for each column in \code{seqs}
19 | the unique polymorphic column pattern to which it corresponds (0 = non-polymorphic).
20 | (vi) \code{n.subs}: The distribution of the number of substitutions per site.
21 | Note that all genetic data elements (ii - iv) are returned in expanded form; that is,
22 | they contain both unique and duplicate column patterns for all polymorphic loci as indicated in the \code{index} vector.
23 | }
24 | \description{
25 | Convert the output of ClonalFrameML into a form usable within \code{treeWAS}.
26 | }
27 | \details{
28 | The \code{prefix} must be the prefix to three files ending in:
29 | (i) "labelled_tree.newick", (ii) "ML_sequence.fasta", (iii) "position_cross_reference.txt".
30 | }
31 | \examples{
32 | ## Example ##
33 | \dontrun{
34 | ## basic use of fn
35 | out <- read.CFML(prefix="./filename_")
36 | }
37 | 
38 | }
39 | \author{
40 | Caitlin Collins \email{caitiecollins@gmail.com}
41 | }
42 | 


--------------------------------------------------------------------------------
/man/removeFirstN.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{removeFirstN}
 4 | \alias{removeFirstN}
 5 | \title{Truncate to remove all of the \emph{first} N characters.}
 6 | \usage{
 7 | removeFirstN(x, n)
 8 | }
 9 | \arguments{
10 | \item{x}{A vector whose element(s) will be truncated.}
11 | 
12 | \item{n}{An integer specifying the number of characters to \emph{remove}.}
13 | }
14 | \description{
15 | Truncate an element, or each element of a vector, by
16 | removing the first N characters of each element.
17 | }
18 | \author{
19 | Caitlin Collins \email{caitiecollins@gmail.com}
20 | }
21 | 


--------------------------------------------------------------------------------
/man/removeLastN.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{removeLastN}
 4 | \alias{removeLastN}
 5 | \title{Truncate to remove all of the \emph{last} N characters.}
 6 | \usage{
 7 | removeLastN(x, n)
 8 | }
 9 | \arguments{
10 | \item{x}{A vector whose element(s) will be truncated.}
11 | 
12 | \item{n}{An integer specifying the number of characters to \emph{remove}.}
13 | }
14 | \description{
15 | Truncate an element, or each element of a vector, by
16 | removing the last N characters of each element.
17 | }
18 | \author{
19 | Caitlin Collins \email{caitiecollins@gmail.com}
20 | }
21 | 


--------------------------------------------------------------------------------
/man/selectBiallelicSNP.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{selectBiallelicSNP}
 4 | \alias{selectBiallelicSNP}
 5 | \title{Short one-phrase description.}
 6 | \usage{
 7 | selectBiallelicSNP(x, DNA = TRUE)
 8 | }
 9 | \arguments{
10 | \item{x}{A character vector of length 1 containing a nucleotide to be converted.}
11 | 
12 | \item{DNA}{logical; if TRUE (default), uses DNA bases (ACGT), if FALSE, uses RNA bases (ACGU).}
13 | }
14 | \description{
15 | Longer proper discription of function...
16 | }
17 | \author{
18 | Caitlin Collins \email{caitiecollins@gmail.com}
19 | }
20 | 


--------------------------------------------------------------------------------
/man/set.args.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{set.args}
 4 | \alias{set.args}
 5 | \title{Set a list of arguments.}
 6 | \usage{
 7 | set.args(args, envir = sys.frame(which = 0L))
 8 | }
 9 | \arguments{
10 | \item{args}{A named list of arguments.}
11 | 
12 | \item{envir}{The environment in which these arguments will set.}
13 | }
14 | \description{
15 | Function to set a list of arguments without having to remove commas.
16 | Useful for troubleshooting. For example, if attempting to run a function
17 | (particualrly one with many arguments) line by line,
18 | \code{set.args} can be used to set a list of arguments in one go, by copying a
19 | comma-separated set of arguments from an existing function call or a new call to \code{args(fn)}.
20 | }
21 | \details{
22 | Please note that unless the \code{envir} argument is changed from its default (\code{sys.frame}),
23 | any arguments set with \code{set.args} will \emph{over-ride} any values currently assigned to those names.
24 | }
25 | \author{
26 | Caitlin Collins \email{caitiecollins@gmail.com}
27 | }
28 | 


--------------------------------------------------------------------------------
/man/simTest.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/simTest.R
 3 | \name{simTest}
 4 | \alias{simTest}
 5 | \title{Simulation Testing.}
 6 | \usage{
 7 | simTest(
 8 |   set.number = 3,
 9 |   n.reps = 1,
10 |   set.seed.as = "file.number",
11 |   working.dir = "~/",
12 |   from.file = FALSE,
13 |   file.n = NULL,
14 |   Windows = FALSE,
15 |   cluster = FALSE,
16 |   n.ind = 100,
17 |   n.snps = 10000,
18 |   n.subs = treeWAS::dist_0.01,
19 |   n.phen.subs = 15,
20 |   n.snps.assoc = 10,
21 |   assoc.prob = 90,
22 |   grp.min = 0.25,
23 |   s = 20,
24 |   af = 10,
25 |   coaltree = TRUE,
26 |   p.value = 0.01,
27 |   p.value.correct = "bonf",
28 |   p.value.by = "count",
29 |   sim.n.snps = 1e+05,
30 |   treeWAS.test = c("terminal", "simultaneous", "subsequent"),
31 |   snps.reconstruction = "parsimony",
32 |   phen.reconstruction = "parsimony"
33 | )
34 | }
35 | \arguments{
36 | \item{test}{A character string or vector containing one or more of the following available tests of association:
37 | "terminal", "simultaneous", "subsequent", "cor", "fisher". By default, the first three tests are run.
38 | See details for more information on what these tests do and when they may be appropriate.}
39 | }
40 | \description{
41 | Generic simulation-testing function used to validate treeWAS performance on simulated datasets. Not designed for public use!
42 | }
43 | \author{
44 | Caitlin Collins \email{caitiecollins@gmail.com}
45 | }
46 | 


--------------------------------------------------------------------------------
/man/simultaneous.test.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/simultaneous.test.R
 3 | \name{simultaneous.test}
 4 | \alias{simultaneous.test}
 5 | \title{Simultaneous test}
 6 | \usage{
 7 | simultaneous.test(
 8 |   snps.reconstruction,
 9 |   phen.reconstruction,
10 |   tree,
11 |   categorical = FALSE
12 | )
13 | }
14 | \arguments{
15 | \item{snps.reconstruction}{A matrix containing the terminal and reconstructed
16 | ancestral states of SNPs for all nodes in the tree.}
17 | 
18 | \item{phen.reconstruction}{A vector containing the terminal and reconstructed
19 | ancestral states of the phenotype for all nodes in the tree.}
20 | 
21 | \item{tree}{A phylo object containing the tree representing the ancestral relationships
22 | between the individuals for which snps and phen are known.}
23 | }
24 | \description{
25 | Calculates treeWAS score 2, the simultaneous test, as the number of 
26 | substitutions or changes in genotype (\code{snps.reconstruction}) and phenotype 
27 | (\code{phen.reconstruction}) that occur simultaneously on the same branches of the tree.
28 | }
29 | \author{
30 | Caitlin Collins \email{caitiecollins@gmail.com}
31 | }
32 | 


--------------------------------------------------------------------------------
/man/simultaneous.test.epi.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/simultaneous.test.epi.R
 3 | \name{simultaneous.test.epi}
 4 | \alias{simultaneous.test.epi}
 5 | \title{Test for association between genetic loci with Score 2.}
 6 | \usage{
 7 | simultaneous.test.epi(snps.reconstruction, tree, snps.subset = NULL)
 8 | }
 9 | \arguments{
10 | \item{snps.reconstruction}{A matrix containing the terminal and reconstructed
11 | ancestral states of SNPs for all nodes in the tree.}
12 | 
13 | \item{tree}{A phylo object containing the tree representing the ancestral relationships
14 | between the individuals for which snps and phen are known.}
15 | 
16 | \item{snps.subset}{An optional vector (see details); else, NULL. 
17 | The snps.subset vector can be a character vector, containing a subset of colnames(snps.rec), 
18 | a logical vector, using TRUE or FALSE to indicate which columns are to be retained and excluded,
19 | or an integer vector, specifying the column indices to be retained.}
20 | }
21 | \description{
22 | [*\emph{A work in progress; not curently integrated into treeWAS:}*]
23 | Use the simultaneous.test (Score 2) to test for associations between genetic loci, 
24 | which may indicate an epistatic interaction.
25 | This function can be used either to test 
26 | for pairwise association between all pairs of genetic loci
27 | or for associations between a subset of snps and all other snps 
28 | (recommended for large datasets; see details).
29 | }
30 | \details{
31 | The number of pairwise tests between all pairs of snps 
32 | grows rapidly as the number of snps columns increases. 
33 | As such, for datasets where ncol(snps.reconstruction) is large, we recommend that
34 | the snps.subset argument is used to reduce the number of tests, by
35 | indicating which snps to test for association with all other snps. 
36 | The snps.subset index can be used to select any subset of snps of interest. 
37 | For example, one may wish to test for interactions between all snps and a subset of snps that 
38 | had been deemed significantly associated with a particular phenotype in a previous run of treeWAS.
39 | }
40 | \author{
41 | Caitlin Collins \email{caitiecollins@gmail.com}
42 | }
43 | 


--------------------------------------------------------------------------------
/man/snp.sim.Q.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/snp.sim.Q.R, R/snp.sim.Q_old.R
  3 | \name{snp.sim.Q}
  4 | \alias{snp.sim.Q}
  5 | \title{Aternative SNPs simulation fn.}
  6 | \usage{
  7 | snp.sim.Q(
  8 |   n.snps = 10000,
  9 |   n.subs = 1,
 10 |   snp.root = NULL,
 11 |   n.snps.assoc = 10,
 12 |   assoc.prob = 100,
 13 |   Q = matrix(c(2, 0.75, 0.75, 1, 3, 0.5, 0.25, 3, 3, 0.25, 0.5, 3, 1, 0.75, 0.75, 2),
 14 |     nrow = 4, byrow = T, dimnames = rep(list(c("0|0", "0|1", "1|0", "1|1")), 2)),
 15 |   tree = coalescent.tree.sim(100),
 16 |   n.phen.subs = 15,
 17 |   phen.loci = NULL,
 18 |   heatmap = FALSE,
 19 |   reconstruct = FALSE,
 20 |   dist.dna.model = "JC69",
 21 |   grp.min = 0.25,
 22 |   row.names = NULL,
 23 |   set = 3,
 24 |   seed = 1
 25 | )
 26 | 
 27 | snp.sim.Q(
 28 |   n.snps = 10000,
 29 |   n.subs = 1,
 30 |   snp.root = NULL,
 31 |   n.snps.assoc = 10,
 32 |   assoc.prob = 100,
 33 |   Q = matrix(c(2, 0.75, 0.75, 1, 3, 0.5, 0.25, 3, 3, 0.25, 0.5, 3, 1, 0.75, 0.75, 2),
 34 |     nrow = 4, byrow = T, dimnames = rep(list(c("0|0", "0|1", "1|0", "1|1")), 2)),
 35 |   tree = coalescent.tree.sim(100),
 36 |   n.phen.subs = 15,
 37 |   phen.loci = NULL,
 38 |   heatmap = FALSE,
 39 |   reconstruct = FALSE,
 40 |   dist.dna.model = "JC69",
 41 |   grp.min = 0.25,
 42 |   row.names = NULL,
 43 |   set = 3,
 44 |   seed = 1
 45 | )
 46 | }
 47 | \arguments{
 48 | \item{n.snps}{An integer specifying the number of snps columns to be simulated.}
 49 | 
 50 | \item{tree}{A \code{phylo} object containing the phylogenetic tree; or, a character string,
 51 | one of \code{"NJ"}, \code{"BIONJ"} (the default), or \code{"parsimony"};
 52 | or, if NAs are present in the distance matrix, one of: \code{"NJ*"} or \code{"BIONJ*"},
 53 | specifying the method of phylogenetic reconstruction.}
 54 | 
 55 | \item{heatmap}{A logical indicating whether to produce a heatmap of the genetic distance
 56 | between the simulated genomes of the n.ind individuals.}
 57 | 
 58 | \item{reconstruct}{Either a logical indicating whether to attempt to reconstruct
 59 | a phylogenetic tree using the simulated genetic data, or one of c("UPGMA", "nj", "ml")
 60 | to specify that tree reconstruction is desired by one of these three methods
 61 | (Unweighted Pair Group Method with Arithmetic Mean, Neighbour-Joining, Maximum-Likelihood).}
 62 | 
 63 | \item{dist.dna.model}{A character string specifying the type of model to use in reconstructing the phylogenetic tree for
 64 | calculating the genetic distance between individual genomes, only used if \code{tree} is
 65 | a character string (see ?dist.dna).}
 66 | 
 67 | \item{grp.min}{(Not yet (re-)implemented in this function.)
 68 | An optional number between 0.1 and 0.9 to control the proportional size of the smaller phenotypic group.}
 69 | 
 70 | \item{row.names}{An optional vector containing row names for the individuals to be simulated.}
 71 | 
 72 | \item{seed}{An optional integer to control the pseudo-randomisation process and allow for identical repeat runs of the function;
 73 | else \code{NULL}.}
 74 | 
 75 | \item{phen.reconstruction}{Either a character string specifying \code{"parsimony"} (the default) or \code{"ML"} (maximum likelihood)
 76 | for the ancestral state reconstruction of the phenotypic variable,
 77 | or a vector containing this reconstruction if it has been performed elsewhere.}
 78 | 
 79 | \item{s}{If \code{set} is 3, the \code{s} parameter controls a baseline number of substitutions to be
 80 | experienced by the phenotype and associated loci: by default, 20.}
 81 | 
 82 | \item{af}{If \code{set} is 3, the \code{af} parameter provides an association factor,
 83 | controlling the preference for association over non-association at associated loci:  by default, 10 (for a 10x preference).}
 84 | 
 85 | \item{plot}{A logical indicating whether to generate a plot of the phylogenetic tree (\code{TRUE}) or not (\code{FALSE}, the default).}
 86 | }
 87 | \description{
 88 | Currently under development. Please use the regular snp.sim function to simulate genetic data.
 89 | 
 90 | NOT currently in use. Please use the regular snp.sim function to simulate genetic data.
 91 | }
 92 | \examples{
 93 | ## Example ##
 94 | 
 95 | ## Example ##
 96 | 
 97 | }
 98 | \author{
 99 | Caitlin Collins \email{caitiecollins@gmail.com}
100 | }
101 | 


--------------------------------------------------------------------------------
/man/snp.sim.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/snp.sim.R
 3 | \name{snp.sim}
 4 | \alias{snp.sim}
 5 | \title{Short one-phrase description.}
 6 | \usage{
 7 | snp.sim(
 8 |   n.snps = 10000,
 9 |   n.subs = 1,
10 |   snp.root = NULL,
11 |   n.snps.assoc = 0,
12 |   assoc.prob = 100,
13 |   tree = coalescent.tree.sim(100),
14 |   phen.loci = NULL,
15 |   heatmap = FALSE,
16 |   reconstruct = FALSE,
17 |   dist.dna.model = "JC69",
18 |   row.names = NULL,
19 |   set = NULL,
20 |   seed = 1
21 | )
22 | }
23 | \arguments{
24 | \item{n.snps}{An integer specifying the number of genetic loci to be simulated.}
25 | }
26 | \description{
27 | Longer proper discription of function...
28 | }
29 | \examples{
30 | ## Example ##
31 | 
32 | }
33 | \author{
34 | Caitlin Collins \email{caitiecollins@gmail.com}
35 | }
36 | 


--------------------------------------------------------------------------------
/man/snps.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{snps}
 5 | \alias{snps}
 6 | \title{A genetic data matrix.}
 7 | \format{
 8 | A binary matrix with 100 rows and 20,003 columns.
 9 | }
10 | \usage{
11 | data(snps)
12 | }
13 | \description{
14 | This binary matrix contains the allelic states of genetic variables,
15 | typically single-nucleotide polymorphisms (SNPs) (or the presence/absence states of accessory genes),
16 | showing individuals in the rows and genetic loci in the columns.
17 | }
18 | \details{
19 | Each individual in the sample is represented by a unique identifier (name)
20 | which corresponds to the name of one row of the snps matrix.
21 | Each genetic locus is also required to have a unique name.
22 | 
23 | In this \code{snps} matrix, redundant columns are present for biallelic loci,
24 | denoting the state of the second allele as the inverse of the previous column
25 | (e.g., compare locus 1.g and locus 1.a).
26 | These biallelic sites can be condensed into a more efficient binary form
27 | by using \code{get.binary.snps(snps)}
28 | (see the treeWAS vignette).
29 | % (see vignette("treeWAS")).
30 | }
31 | \author{
32 | Caitlin Collins \email{caitiecollins@gmail.com}
33 | }
34 | \keyword{data}
35 | \keyword{datasets}
36 | 


--------------------------------------------------------------------------------
/man/snps.assoc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{snps.assoc}
 5 | \alias{snps.assoc}
 6 | \title{The phenotypically-associated sites in the \code{snps} matrix.}
 7 | \format{
 8 | A vector of length 10.
 9 | }
10 | \usage{
11 | data(snps.assoc)
12 | }
13 | \description{
14 | This vector specifies the identities (names) of the loci in the genetic data matrix (see: \code{data(snps)})
15 | that have been simulated along the phylogenetic tree (see: \code{data(tree)})
16 | to be in statistical association with the phenotype (see: \code{data(phen)}).
17 | Comparing this vector of snps column names to the set of snps loci identified by treeWAS
18 | allows us to evaluate the performance of the treeWAS GWAS method.
19 | After applying treeWAS to the components of this dataset,
20 | using: \code{treeWAS(snps, phen, tree)},
21 | we can assess the ability of treeWAS to recover these "known" associated sites
22 | via any of its three association scores
23 | (see the treeWAS vignette).
24 | % (see vignette("treeWAS")).
25 | }
26 | \author{
27 | Caitlin Collins \email{caitiecollins@gmail.com}
28 | }
29 | \keyword{data}
30 | \keyword{datasets}
31 | 


--------------------------------------------------------------------------------
/man/snps.reconstruction.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{snps.reconstruction}
 5 | \alias{snps.reconstruction}
 6 | \title{The ancestral state reconstruction of a genetic data matrix.}
 7 | \format{
 8 | A binary matrix with 199 rows and 20,003 columns.
 9 | }
10 | \usage{
11 | data(snps.reconstruction)
12 | }
13 | \description{
14 | This binary matrix contains the terminal and ancestral allelic states of a set of genetic variables
15 | (for the original genetic data matrix, see: \code{data(snps)}),
16 | showing individuals in the rows and genetic loci in the columns.
17 | The observed genotypic states of sampled individuals
18 | (i.e., those represented at the terminal nodes of a phylogenetic tree)
19 | are presented first, in elements 1:N (here 1:100).
20 | These rows of the matrix are identical to the input \code{snps} matrix (see: \code{data(snps)}).
21 | The unobserved ancestral states of the genotype at internal nodes have been
22 | inferred via ancestral state reconstruction, using \code{asr(snps, tree)}.
23 | }
24 | \details{
25 | Each individual in the sample is represented by a unique identifier (name)
26 | which corresponds to the name of one row of the snps matrix.
27 | (Internal node names have been generated during ancestral state reconstruction.)
28 | Each genetic locus is also required to have a unique name.
29 | 
30 | In this \code{snps.reconstruction} matrix, redundant columns are present for biallelic loci,
31 | denoting the state of the second allele as the inverse of the previous column
32 | (e.g., compare locus 1.g and locus 1.a).
33 | These biallelic sites can be condensed into a more efficient binary form
34 | by using \code{get.binary.snps(snps)}
35 | (see the treeWAS vignette).
36 | % (see vignette("treeWAS")).
37 | }
38 | \author{
39 | Caitlin Collins \email{caitiecollins@gmail.com}
40 | }
41 | \keyword{data}
42 | \keyword{datasets}
43 | 


--------------------------------------------------------------------------------
/man/subsequent.test.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/subsequent.test.R
 3 | \name{subsequent.test}
 4 | \alias{subsequent.test}
 5 | \title{Subsequent test}
 6 | \usage{
 7 | subsequent.test(
 8 |   snps.reconstruction,
 9 |   phen.reconstruction,
10 |   tree,
11 |   correct.prop = FALSE,
12 |   categorical = FALSE
13 | )
14 | }
15 | \arguments{
16 | \item{tree}{A phylo object.}
17 | }
18 | \description{
19 | Calculates treeWAS score 3, the subsequent test.
20 | }
21 | \examples{
22 | 
23 | ## basic use of fn
24 | tree <- coalescent.tree.sim(n.ind = 100, seed = 1)
25 | 
26 | }
27 | \author{
28 | Caitlin Collins \email{caitiecollins@gmail.com}
29 | }
30 | 


--------------------------------------------------------------------------------
/man/table.matrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{table.matrix}
 4 | \alias{table.matrix}
 5 | \title{Cross-tabulate the rows or columns of a matrix.}
 6 | \usage{
 7 | table.matrix(data, MARGIN = 1)
 8 | }
 9 | \arguments{
10 | \item{data}{A matrix or data.frame, potentially containing
11 | non-unique patterns in its rows or columns.}
12 | 
13 | \item{MARGIN}{A single integer specifying the array margin to be held fixed.
14 | (To get unique \emph{rows}, select \code{MARGIN} = 1;
15 | for unique \emph{columns}, select \code{MARGIN} = 2.)}
16 | }
17 | \value{
18 | A list with the following elements:
19 | \itemize{
20 |    \item{\code{table} \item{A contingency table of the counts of the
21 |          number of occurrences of each unique row in the matrix.}}
22 |    \item{\code{index} \item{An index vector containing the indices (row numbers),
23 |          in a matrix composed only of unique rows,
24 |          to which each row in the original matrix maps.}}
25 |    \item{\code{unique.data} \item{A new matrix
26 |          containing only the unique rows of the input matrix.}}
27 | }
28 | }
29 | \description{
30 | A version of the base \code{table} function designed for matrices.
31 | Taking a matrix as input, \code{table.matrix} returns a contingency table,
32 | index vector, and unique matrix.
33 | }
34 | \details{
35 | To apply this function to the \emph{columns} of a matrix, simply
36 | transpose the matrix before executing the command, as in:
37 | \code{table.matrix(t(data))}.
38 | }
39 | \examples{
40 | \dontrun{
41 | ## load example data:
42 | data("snps.ace")
43 | x <- snps.ace
44 | 
45 | ## basic use of fn on rows of x:
46 | tab.out <- table.matrix(x)
47 | 
48 | ## apply fn to columns of x:
49 | tab.out <- table.matrix(t(x))
50 | }
51 | 
52 | }
53 | \author{
54 | Caitlin Collins \email{caitiecollins@gmail.com}
55 | }
56 | 


--------------------------------------------------------------------------------
/man/terminal.test.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/terminal.test.R
 3 | \name{terminal.test}
 4 | \alias{terminal.test}
 5 | \title{Terminal test}
 6 | \usage{
 7 | terminal.test(snps, phen, correct.prop = FALSE, categorical = FALSE)
 8 | }
 9 | \arguments{
10 | \item{tree}{A phylo object.}
11 | }
12 | \description{
13 | Calculates treeWAS score 1, the terminal test.
14 | }
15 | \examples{
16 | ## Example ##
17 | \dontrun{
18 | ## basic use of fn
19 | out <- terminal.test(snps, phen)
20 | }
21 | 
22 | }
23 | \author{
24 | Caitlin Collins \email{caitiecollins@gmail.com}
25 | }
26 | 


--------------------------------------------------------------------------------
/man/terminal.test.epi.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/terminal.test.epi.R
 3 | \name{terminal.test.epi}
 4 | \alias{terminal.test.epi}
 5 | \title{Test for epistasis between genetic loci with Score 1.}
 6 | \usage{
 7 | terminal.test.epi(snps, snps.subset = NULL)
 8 | }
 9 | \arguments{
10 | \item{snps}{A matrix containing the states of SNPs (in columns) for all individuals (in rows).}
11 | 
12 | \item{snps.subset}{An optional vector (see details); else, NULL. 
13 | The snps.subset vector can be a character vector, containing a subset of colnames(snps.rec), 
14 | a logical vector, using TRUE or FALSE to indicate which columns are to be retained and excluded,
15 | or an integer vector, specifying the column indices to be retained.}
16 | }
17 | \description{
18 | [*\emph{A work in progress; not curently integrated into treeWAS:}*]
19 | Use the terminal.test (Score 1) to test for associations between genetic loci, 
20 | which may indicate an epistatic interaction.
21 | This function can be used either to test 
22 | for pairwise association between all pairs of genetic loci
23 | or for associations between a subset of snps and all other snps 
24 | (recommended for large datasets; see details).
25 | }
26 | \details{
27 | The number of pairwise tests between all pairs of snps 
28 | grows rapidly as the number of snps columns increases. 
29 | As such, for datasets where ncol(snps.reconstruction) is large, we recommend that
30 | the snps.subset argument is used to reduce the number of tests, by
31 | indicating which snps to test for association with all other snps. 
32 | The snps.subset index can be used to select any subset of snps of interest. 
33 | For example, one may wish to test for interactions between all snps and a subset of snps that 
34 | had been deemed significantly associated with a particular phenotype in a previous run of treeWAS.
35 | }
36 | \author{
37 | Caitlin Collins \email{caitiecollins@gmail.com}
38 | }
39 | 


--------------------------------------------------------------------------------
/man/tree.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{tree}
 5 | \alias{tree}
 6 | \title{A phylogenetic tree.}
 7 | \format{
 8 | A phylo object with 100 terminal nodes and 99 internal nodes.
 9 | }
10 | \usage{
11 | data(tree)
12 | }
13 | \description{
14 | This phylogenetic tree is a phylo object (see \code{vignette("Trees", package="phangorn")})
15 | connecting the individuals represented in
16 | the rows of the example genetic data matrix (see: \code{data(snps)})
17 | and the elements of the example phenotypic vector (see: \code{data(phen)}).
18 | }
19 | \details{
20 | In this case, the tree was generated via simulation and used to simulate the genotypic and phenotypic data.
21 | In a typical empirical analysis, however, a phylogenetic tree would represent the inferred
22 | ancestral relationships between individuals, and it would be estimated from the available genetic data.
23 | For example, such a phylogeny could be reconstructed using \code{tree.reconstruct(snps, method="NJ")},
24 | or automatically generated within treeWAS, according to the \code{tree} argument, as in:
25 | \code{treeWAS(snps, phen, tree="NJ")}
26 | (see the treeWAS vignette).
27 | % (see vignette("treeWAS")).
28 | }
29 | \author{
30 | Caitlin Collins \email{caitiecollins@gmail.com}
31 | }
32 | \keyword{data}
33 | \keyword{datasets}
34 | 


--------------------------------------------------------------------------------
/man/tree.reconstruct.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tree.reconstruct.R
 3 | \name{tree.reconstruct}
 4 | \alias{tree.reconstruct}
 5 | \title{Short one-phrase description.}
 6 | \usage{
 7 | tree.reconstruct(
 8 |   dna,
 9 |   method = c("BIONJ", "NJ", "parsimony", "BIONJ*", "NJ*"),
10 |   dist.dna.model = "JC69",
11 |   plot = TRUE
12 | )
13 | }
14 | \arguments{
15 | \item{dna}{A matrix or DNAbin object containing genomes for (only)
16 | the terminal nodes of the tree to be reconstructed.
17 | Individuals should be in the rows and loci in the columns; rows and columns should be labelled.}
18 | 
19 | \item{method}{A character string specifying the method of phylogenetic reconstruction:
20 | one of \code{"NJ"}, \code{"BIONJ"} (the default), or \code{"parsimony"};
21 | or, if NAs are present in the distance matrix, one of: \code{"NJ*"} or \code{"BIONJ*"}.}
22 | 
23 | \item{dist.dna.model}{A character string specifying the type of model to use in
24 | calculating the genetic distance between individual genomes (see ?dist.dna).}
25 | 
26 | \item{plot}{A logical specifying whether to plot the reconstructed phylogenetic tree.}
27 | }
28 | \description{
29 | Longer proper discription of function...
30 | }
31 | \author{
32 | Caitlin Collins \email{caitiecollins@gmail.com}
33 | }
34 | 


--------------------------------------------------------------------------------
/man/treeWAS.example.out.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{treeWAS.example.out}
 5 | \alias{treeWAS.example.out}
 6 | \title{Example output of treeWAS.}
 7 | \format{
 8 | A treeWAS class object, comprising a list of length 5.
 9 | }
10 | \usage{
11 | data(treeWAS.example.out)
12 | }
13 | \description{
14 | This "treeWAS" class object is a list containing the output of a treeWAS analysis.
15 | This GWAS analysis was performed to identify associations between
16 | loci in the example genetic data matrix (see: \code{data(snps)})
17 | and phenotypic states in the example phenotypic vector (see: \code{data(phen)}),
18 | along the phylogenetic tree (see: \code{data(tree)}).
19 | }
20 | \details{
21 | This \code{treeWAS} output was returned by the function:
22 | \code{treeWAS(snps, phen, tree)}.
23 | treeWAS output contains elements describing
24 | the significant associations identified by each of the
25 | three association scores applied to all genetic loci.
26 | Additional elements of the output return all data that was
27 | used in the GWAS analysis, including both data input to treeWAS
28 | and all relevant data generated by treeWAS.
29 | 
30 | For a detailed description of the elements of this output,
31 | please scroll down to the "Value" section of the \code{treeWAS} function documentation,
32 | which can be accessed with: \code{?treeWAS}.
33 | More information can also be found in the treeWAS vignette.
34 | % (see vignette("treeWAS")).
35 | }
36 | \author{
37 | Caitlin Collins \email{caitiecollins@gmail.com}
38 | }
39 | \keyword{data}
40 | \keyword{datasets}
41 | 


--------------------------------------------------------------------------------
/man/write.treeWAS.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/treeWAS.R
 3 | \name{write.treeWAS}
 4 | \alias{write.treeWAS}
 5 | \title{Write \code{treeWAS} output to a CSV file.}
 6 | \usage{
 7 | write.treeWAS(x, filename = "./treeWAS_results")
 8 | }
 9 | \arguments{
10 | \item{x}{The output returned by \code{treeWAS}.}
11 | 
12 | \item{filename}{A character string containing the path and filename to which the .csv file will be saved;
13 | by default, \code{filename = "./treeWAS_results"} and so
14 | would be saved to the current working directory.}
15 | }
16 | \description{
17 | Save the results of \code{treeWAS} to a CSV file as a summary table of significant findings and scores
18 | (excluding longer data elements within the output).
19 | .
20 | }
21 | \examples{
22 | ## Example ##
23 | \dontrun{
24 | ## Load data:
25 | data(snps)
26 | data(phen)
27 | data(tree)
28 | 
29 | ## Run treeWAS:
30 | out <- treeWAS(snps, phen, tree, seed = 1)
31 | 
32 | ## Save results to home directory:
33 | write.treeWAS(x = out, filename = "~/treeWAS_results")
34 | }
35 | 
36 | }
37 | \author{
38 | Caitlin Collins \email{caitiecollins@gmail.com}
39 | }
40 | 


--------------------------------------------------------------------------------
/treeWAS.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/vignettes/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/.DS_Store


--------------------------------------------------------------------------------
/vignettes/figs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/.DS_Store


--------------------------------------------------------------------------------
/vignettes/figs/Eqn_Legend_genotype.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/Eqn_Legend_genotype.JPG


--------------------------------------------------------------------------------
/vignettes/figs/Eqn_Legend_genotype.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/Eqn_Legend_genotype.pdf


--------------------------------------------------------------------------------
/vignettes/figs/Eqn_Legend_genotype.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/Eqn_Legend_genotype.png


--------------------------------------------------------------------------------
/vignettes/figs/plot_hist_phen.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_hist_phen.pdf


--------------------------------------------------------------------------------
/vignettes/figs/plot_hist_phen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_hist_phen.png


--------------------------------------------------------------------------------
/vignettes/figs/plot_hist_phen_rank.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_hist_phen_rank.pdf


--------------------------------------------------------------------------------
/vignettes/figs/plot_hist_phen_rank.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_hist_phen_rank.png


--------------------------------------------------------------------------------
/vignettes/figs/plot_hist_simultaneous.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_hist_simultaneous.pdf


--------------------------------------------------------------------------------
/vignettes/figs/plot_hist_simultaneous.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_hist_simultaneous.png


--------------------------------------------------------------------------------
/vignettes/figs/plot_hist_subsequent.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_hist_subsequent.pdf


--------------------------------------------------------------------------------
/vignettes/figs/plot_hist_subsequent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_hist_subsequent.png


--------------------------------------------------------------------------------
/vignettes/figs/plot_hist_terminal.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_hist_terminal.pdf


--------------------------------------------------------------------------------
/vignettes/figs/plot_hist_terminal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_hist_terminal.png


--------------------------------------------------------------------------------
/vignettes/figs/plot_manhattan_simultaneous.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_manhattan_simultaneous.pdf


--------------------------------------------------------------------------------
/vignettes/figs/plot_manhattan_simultaneous.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_manhattan_simultaneous.png


--------------------------------------------------------------------------------
/vignettes/figs/plot_manhattan_subsequent.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_manhattan_subsequent.pdf


--------------------------------------------------------------------------------
/vignettes/figs/plot_manhattan_subsequent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_manhattan_subsequent.png


--------------------------------------------------------------------------------
/vignettes/figs/plot_manhattan_terminal.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_manhattan_terminal.pdf


--------------------------------------------------------------------------------
/vignettes/figs/plot_manhattan_terminal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_manhattan_terminal.png


--------------------------------------------------------------------------------
/vignettes/figs/plot_tree.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_tree.pdf


--------------------------------------------------------------------------------
/vignettes/figs/plot_tree.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_tree.png


--------------------------------------------------------------------------------
/vignettes/figs/plot_tree_parsimony.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_tree_parsimony.pdf


--------------------------------------------------------------------------------
/vignettes/figs/plot_tree_parsimony.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/plot_tree_parsimony.png


--------------------------------------------------------------------------------
/vignettes/figs/tree_phen_eg.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/tree_phen_eg.pdf


--------------------------------------------------------------------------------
/vignettes/figs/tree_phen_eg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/figs/tree_phen_eg.png


--------------------------------------------------------------------------------
/vignettes/old/ace.tree.cont.IC.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/ace.tree.cont.IC.pdf


--------------------------------------------------------------------------------
/vignettes/old/ace.tree.cont.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/ace.tree.cont.pdf


--------------------------------------------------------------------------------
/vignettes/old/ace_example_phen_R_0.Rdata:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/ace_example_phen_R_0.Rdata


--------------------------------------------------------------------------------
/vignettes/old/figsunnamed-chunk-12-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/figsunnamed-chunk-12-1.pdf


--------------------------------------------------------------------------------
/vignettes/old/figsunnamed-chunk-13-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/figsunnamed-chunk-13-1.pdf


--------------------------------------------------------------------------------
/vignettes/old/figsunnamed-chunk-14-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/figsunnamed-chunk-14-1.pdf


--------------------------------------------------------------------------------
/vignettes/old/figsunnamed-chunk-15-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/figsunnamed-chunk-15-1.pdf


--------------------------------------------------------------------------------
/vignettes/old/figsunnamed-chunk-16-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/figsunnamed-chunk-16-1.pdf


--------------------------------------------------------------------------------
/vignettes/old/figsunnamed-chunk-17-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/figsunnamed-chunk-17-1.pdf


--------------------------------------------------------------------------------
/vignettes/old/figsunnamed-chunk-7-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/figsunnamed-chunk-7-1.pdf


--------------------------------------------------------------------------------
/vignettes/old/phen_cont_skewed.Rdata:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/phen_cont_skewed.Rdata


--------------------------------------------------------------------------------
/vignettes/old/phen_cont_skewed_rank.Rdata:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/phen_cont_skewed_rank.Rdata


--------------------------------------------------------------------------------
/vignettes/old/treeWAS Vignette.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/treeWAS Vignette.pdf


--------------------------------------------------------------------------------
/vignettes/old/treeWAS_example.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #############
  4 | ## EXAMPLE ##
  5 | #############
  6 | 
  7 | #######################
  8 | ## Clear environment ##
  9 | #######################
 10 | ## NOTE TO USER: his step will delete all variables from your environment.
 11 | ## You may want to save unsaved variables or skip this step.
 12 | rm(list=ls())
 13 | 
 14 | ##############################
 15 | ## Load sample distribution ##
 16 | ##############################
 17 | ## (currently using the ClonalFrame Saureus output
 18 | ## just so we can see what happens when we
 19 | ## simulate data based on this distribution
 20 | ## AND then use it to inform treeWAS
 21 | ## (as compared to treeWAS's performance with dist=NULL))
 22 | data(dist)
 23 | 
 24 | ################################
 25 | ## Simulate a coalescent tree ##
 26 | ################################
 27 | tree <- coalescent.tree.sim(n.ind = 100, seed = 1)
 28 | 
 29 | #######################################################
 30 | ## Simulate a phenotype for individuals in this tree ##
 31 | #######################################################
 32 | ## get list of phenotype simulation output
 33 | phen.output <- phen.sim(tree, n.subs = 15)
 34 | 
 35 | ## get phenotype for terminal nodes only
 36 | phen <- phen.output$phen
 37 | 
 38 | ## get phenotype for all nodes,
 39 | ## terminal and internal
 40 | phen.nodes <- phen.output$phen.nodes
 41 | 
 42 | ## get the indices of phen.subs (ie. branches)
 43 | phen.loci <- phen.output$phen.loci
 44 | 
 45 | #################################
 46 | ## Plot Tree showing Phenotype ##
 47 | #################################
 48 | phen.plot.colours <- plot.phen(tree = tree,
 49 |                                phen.nodes = phen.nodes,
 50 |                                plot = TRUE)
 51 | 
 52 | ###################################################################
 53 | ## Simulate genetic data (SNPs) that fit this tree and phenotype ##
 54 | ###################################################################
 55 | snps.output <- snp.sim(n.snps = 10000, n.subs=dist,
 56 |                        n.snps.assoc = 10, assoc.prob = 90,
 57 |                        tree = tree,
 58 |                        phen.loci = phen.loci,
 59 |                        heatmap = FALSE, reconstruct = FALSE,
 60 |                        dist.dna.model="JC69",
 61 |                        seed = 1)
 62 | snps <- snps.output$snps
 63 | snps.assoc <- snps.output$snps.assoc
 64 | snps.names <- colnames(snps)
 65 | snps.indices <- c(1:ncol(snps))
 66 | 
 67 | ################################################################################
 68 | ## Note that all previous steps can be performed with this combined function: ##
 69 | ################################################################################
 70 | # sim.output <- coalescent.sim(n.ind=100,
 71 | #                       n.snps=10000, n.subs=1,
 72 | #                       n.snps.assoc=10, assoc.prob=90,
 73 | #                       n.phen.subs=15, phen=NULL,
 74 | #                       plot=TRUE,
 75 | #                       heatmap=FALSE, reconstruct=FALSE,
 76 | #                       seed=1)
 77 | # snps <- sim.output$snps
 78 | # tree <- sim.output$tree
 79 | # phen <- sim.output$phen
 80 | # snps.assoc <- sim.output$snps.assoc
 81 | 
 82 | 
 83 | #################
 84 | ## Run treeWAS ##
 85 | #################
 86 | 
 87 | ## First, we'll try treeWAS with dist=NULL
 88 | ## (so it will use the default Poisson with parameter 1 to
 89 | ## get the number of substitutions per site to simulate)
 90 | 
 91 | treeWAS.output <- treeWAS(snps, phen, n.subs = 1,
 92 |                           tree = tree,
 93 |                           dist.dna.model = NULL, plot.tree = FALSE,
 94 |                           test = "score",
 95 |                           p.value = 0.001, p.value.correct = "bonf", p.value.by = "count",
 96 |                           sim.n.snps = 10000, n.reps = 1,
 97 |                           plot.null.dist = TRUE, plot.dist = FALSE)
 98 | 
 99 | str(treeWAS.output)
100 | 
101 | # out <- treeWAS.output
102 | # corr.dat <- out$corr.dat
103 | # corr.sim <- out$corr.sim
104 | 
105 | 
106 | ##############
107 | ## EVALUATE ##
108 | ##############
109 | test.positive <- treeWAS.output$sig.snps$SNP.locus
110 | test.negative <- snps.indices[-which(snps.indices %in% test.positive)]
111 | ## get true positives
112 | snps.not <- snps.names[-which(snps.indices %in% snps.assoc)]
113 | true.positive <- test.positive[which(test.positive %in% snps.assoc)]
114 | TP <- length(true.positive)
115 | ## get true negatives
116 | true.negative <- test.negative[which(test.negative %in% snps.not)]
117 | TN <- length(true.negative)
118 | ## get false positives
119 | false.positive <- test.positive[which(test.positive %in% snps.not)]
120 | FP <- length(false.positive)
121 | ## get false negatives
122 | false.negative <- test.negative[which(test.negative %in% snps.assoc)]
123 | FN <- length(false.negative)
124 | 
125 | 
126 | #################
127 | ## sensitivity ##
128 | #################
129 | ## ie. How many truly ASSOCIATED SNPs did you manage to catch
130 | ## ~ Pr(Positive Test | SNP ASSOCIATED)
131 | ## --> Set 1: will be 0/0 = NaN
132 | sensitivity <- (TP / (TP + FN))
133 | sensitivity
134 | #################
135 | ## specificity ##
136 | #################
137 | ## ie. Of all the truly NOT associated SNPs, how many did you manage to rule out?
138 | ## ~ Pr(Negative Test | SNP NOT associated)
139 | specificity <- (TN / (TN + FP)) ## = (1 - FPR)
140 | specificity
141 | #########
142 | ## PPV ##
143 | #########
144 | ## ie. Of all the POSITIVE calls you made, how many were CORRECT/ identified truly ASSOCIATED SNPs
145 | ## ~ Pr(SNP ASSOCIATED | Positive Test)
146 | ## --> Set 1: will be 0 (UNLESS you made NO positive calls, then 0/0 = NaN)
147 | PPV <- (TP / (TP + FP)) ## = (1 - FDR)
148 | PPV
149 | 
150 | 
151 | #################    #################    #################    #################
152 | 
153 | ## COMPARE TO: ##
154 | 
155 | #################
156 | ## Run treeWAS ##
157 | #################
158 | 
159 | ## Second, we can try treeWAS with dist=dist
160 | ## (where dist comes from the .Rdata file loaded just before we ran coalescent.sim)
161 | ## (so it will use the true distribution to
162 | ## identify the number of substitutions per site to simulate)
163 | 
164 | treeWAS.output2 <- treeWAS(snps, phen, n.subs = dist,
165 |                            tree = tree,
166 |                            dist.dna.model = NULL, plot.tree = FALSE,
167 |                            test = "score",
168 |                            p.value = 0.001, p.value.correct = "bonf", p.value.by = "count",
169 |                            sim.n.snps = 10000, n.reps = 1,
170 |                            plot.null.dist = TRUE, plot.dist = FALSE)
171 | 
172 | str(treeWAS.output2)
173 | 
174 | ##############
175 | ## EVALUATE ##
176 | ##############
177 | test.positive <- treeWAS.output2$sig.snps$SNP.locus
178 | test.negative <- snps.indices[-which(snps.indices %in% test.positive)]
179 | ## get true positives
180 | snps.not <- snps.names[-which(snps.indices %in% snps.assoc)]
181 | true.positive <- test.positive[which(test.positive %in% snps.assoc)]
182 | TP <- length(true.positive)
183 | ## get true negatives
184 | true.negative <- test.negative[which(test.negative %in% snps.not)]
185 | TN <- length(true.negative)
186 | ## get false positives
187 | false.positive <- test.positive[which(test.positive %in% snps.not)]
188 | FP <- length(false.positive)
189 | ## get false negatives
190 | false.negative <- test.negative[which(test.negative %in% snps.assoc)]
191 | FN <- length(false.negative)
192 | 
193 | #################
194 | ## sensitivity ##
195 | #################
196 | ## ie. How many truly ASSOCIATED SNPs did you manage to catch
197 | ## ~ Pr(Positive Test | SNP ASSOCIATED)
198 | ## --> Set 1: will be 0/0 = NaN
199 | sensitivity <- (TP / (TP + FN))
200 | sensitivity
201 | #################
202 | ## specificity ##
203 | #################
204 | ## ie. Of all the truly NOT associated SNPs, how many did you manage to rule out?
205 | ## ~ Pr(Negative Test | SNP NOT associated)
206 | specificity <- (TN / (TN + FP)) ## = (1 - FPR)
207 | specificity
208 | #########
209 | ## PPV ##
210 | #########
211 | ## ie. Of all the POSITIVE calls you made, how many were CORRECT/ identified truly ASSOCIATED SNPs
212 | ## ~ Pr(SNP ASSOCIATED | Positive Test)
213 | ## --> Set 1: will be 0 (UNLESS you made NO positive calls, then 0/0 = NaN)
214 | PPV <- (TP / (TP + FP)) ## = (1 - FDR)
215 | PPV
216 | 
217 | #################    #################    #################    #################
218 | 
219 | ## COMPARE TO: ##
220 | 
221 | #################
222 | ## Run treeWAS ##
223 | #################
224 | 
225 | ## Third, we can try treeWAS with dist=NULL
226 | ## So we will use the Fitch parsimony functions from R pkg phangorn
227 | ## (reconfigured for our purposes in treeWAS function get.fitch.n.mts)
228 | ## to reconstruct the distribution of n.subs-per-site from the snps data and tree.
229 | 
230 | treeWAS.output3 <- treeWAS(snps, phen, n.subs = NULL,
231 |                            tree = tree,
232 |                            dist.dna.model = NULL, plot.tree = FALSE,
233 |                            test = "score",
234 |                            p.value = 0.001, p.value.correct = "bonf", p.value.by = "count",
235 |                            sim.n.snps = 10000, n.reps = 1,
236 |                            plot.null.dist = TRUE, plot.dist = FALSE)
237 | 
238 | str(treeWAS.output3)
239 | 
240 | ##############
241 | ## EVALUATE ##
242 | ##############
243 | test.positive <- treeWAS.output3$sig.snps$SNP.locus
244 | test.negative <- snps.indices[-which(snps.indices %in% test.positive)]
245 | ## get true positives
246 | snps.not <- snps.names[-which(snps.indices %in% snps.assoc)]
247 | true.positive <- test.positive[which(test.positive %in% snps.assoc)]
248 | TP <- length(true.positive)
249 | ## get true negatives
250 | true.negative <- test.negative[which(test.negative %in% snps.not)]
251 | TN <- length(true.negative)
252 | ## get false positives
253 | false.positive <- test.positive[which(test.positive %in% snps.not)]
254 | FP <- length(false.positive)
255 | ## get false negatives
256 | false.negative <- test.negative[which(test.negative %in% snps.assoc)]
257 | FN <- length(false.negative)
258 | 
259 | #################
260 | ## sensitivity ##
261 | #################
262 | ## ie. How many truly ASSOCIATED SNPs did you manage to catch
263 | ## ~ Pr(Positive Test | SNP ASSOCIATED)
264 | ## --> Set 1: will be 0/0 = NaN
265 | sensitivity <- (TP / (TP + FN))
266 | sensitivity
267 | #################
268 | ## specificity ##
269 | #################
270 | ## ie. Of all the truly NOT associated SNPs, how many did you manage to rule out?
271 | ## ~ Pr(Negative Test | SNP NOT associated)
272 | specificity <- (TN / (TN + FP)) ## = (1 - FPR)
273 | specificity
274 | #########
275 | ## PPV ##
276 | #########
277 | ## ie. Of all the POSITIVE calls you made, how many were CORRECT/ identified truly ASSOCIATED SNPs
278 | ## ~ Pr(SNP ASSOCIATED | Positive Test)
279 | ## --> Set 1: will be 0 (UNLESS you made NO positive calls, then 0/0 = NaN)
280 | PPV <- (TP / (TP + FP)) ## = (1 - FDR)
281 | PPV
282 | 


--------------------------------------------------------------------------------
/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-19-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-19-1.png


--------------------------------------------------------------------------------
/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-20-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-20-1.png


--------------------------------------------------------------------------------
/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-21-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-21-1.png


--------------------------------------------------------------------------------
/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-22-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-22-1.png


--------------------------------------------------------------------------------
/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-23-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-23-1.png


--------------------------------------------------------------------------------
/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-24-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-24-1.png


--------------------------------------------------------------------------------
/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-5-1.png


--------------------------------------------------------------------------------
/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caitiecollins/treeWAS/895dfd0c986445336043ab3f626f4e0ed7f153a8/vignettes/old/treeWAS_vignette_files/figure-markdown_strict/unnamed-chunk-8-1.png


--------------------------------------------------------------------------------