├── .Rbuildignore ├── .gitignore ├── .travis.yml ├── DESCRIPTION ├── NAMESPACE ├── R ├── AllClass.R ├── AllGeneric.R ├── RcppExports.R ├── associationTest.R ├── qtcatPackage.R ├── snpCluster.R ├── snpData.R └── snpImpute.R ├── README.md ├── appveyor.yml ├── inst └── extdata │ ├── phenodata.csv │ └── snpdata.csv ├── man ├── alleleFreq-snpMatrix-method.Rd ├── alleleFreq.Rd ├── as.matrix-snpMatrix-method.Rd ├── as.snpMatrix.Rd ├── clarans.Rd ├── cutClust.Rd ├── distCor.Rd ├── hetFreq-snpMatrix-method.Rd ├── hetFreq.Rd ├── identicals.Rd ├── imputeMedoids.Rd ├── imputeSnp.Rd ├── imputeSnpIter.Rd ├── imputeSnpMatrix.Rd ├── lmQtc.Rd ├── medoQtc.Rd ├── naFreq-snpMatrix-method.Rd ├── naFreq.Rd ├── plotQtc.Rd ├── plotSelFreq.Rd ├── qtcat-package.Rd ├── qtcatClust.Rd ├── qtcatGeno.Rd ├── qtcatHit.Rd ├── qtcatPheno.Rd ├── qtcatQtc.Rd ├── read.snpData.Rd ├── rename.leafs.Rd ├── snpInfo-qtcatHit-method.Rd ├── snpInfo-snpMatrix-method.Rd ├── snpInfo.Rd ├── snpMatrix-class.Rd └── sub-snpMatrix-ANY-ANY-missing-method.Rd └── src ├── RcppExports.cpp ├── ReadData.cpp ├── snpCluster.cpp └── snpData.cpp /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | tests/* 4 | ^packrat/ 5 | .Rprofile 6 | ^\.travis\.yml$ 7 | ^appveyor\.yml$ 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .Rprofile 4 | .RData 5 | qtcat.Rproj 6 | src/*.o 7 | src/*.so 8 | src/*.dll 9 | inst/doc 10 | packrat/lib* 11 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: r 2 | cache: packages 3 | 4 | matrix: 5 | include: 6 | - os: linux 7 | - os: osx 8 | 9 | warnings_are_errors: true 10 | 11 | # r_github_packages: 12 | # - jrklasen/hit 13 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: qtcat 2 | Title: Quantitative Trait Cluster Association Test 3 | Description: 4 | All SNPs are jointly associated to the phenotype and at the same time correlation among 5 | them is considered. Thus, correction for population structure becomes unnecessary, 6 | which in many cases results in a power advantages compared to other methods. 7 | Authors@R: person("Jonas", "Klasen", role = c("aut", "cre"), email = "qtcat@gmx.de") 8 | Version: 0.4.3 9 | URL: http://github.com/QTCAT/qtcat 10 | BugReports: https://github.com/QTCAT/qtcat/issues 11 | Depends: 12 | R (>= 3.2.0) 13 | Imports: 14 | methods, 15 | stats, 16 | parallel, 17 | graphics, 18 | utils, 19 | Rcpp (>= 0.11.0), 20 | fastcluster (>= 1.1.16), 21 | hit (>= 0.4.0) 22 | Suggests: 23 | knitr, 24 | rmarkdown 25 | LinkingTo: Rcpp 26 | License: GPL (>= 2) 27 | RoxygenNote: 6.0.1 28 | VignetteBuilder: knitr 29 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(alleleFreq) 4 | export(as.snpMatrix) 5 | export(clarans) 6 | export(cutClust) 7 | export(distCor) 8 | export(hetFreq) 9 | export(identicals) 10 | export(imputeSnpMatrix) 11 | export(lmQtc) 12 | export(medoQtc) 13 | export(naFreq) 14 | export(plotQtc) 15 | export(plotSelFreq) 16 | export(qtcatClust) 17 | export(qtcatGeno) 18 | export(qtcatHit) 19 | export(qtcatPheno) 20 | export(qtcatQtc) 21 | export(read.snpData) 22 | export(snpInfo) 23 | exportClasses(snpMatrix) 24 | exportMethods("[") 25 | exportMethods(alleleFreq) 26 | exportMethods(as.matrix) 27 | exportMethods(hetFreq) 28 | exportMethods(naFreq) 29 | exportMethods(snpInfo) 30 | importFrom(Rcpp,evalCpp) 31 | importFrom(fastcluster,hclust) 32 | importFrom(graphics,axis) 33 | importFrom(graphics,mtext) 34 | importFrom(graphics,plot) 35 | importFrom(hit,as.hierarchy) 36 | importFrom(hit,hit) 37 | importFrom(methods,is) 38 | importFrom(methods,new) 39 | importFrom(methods,setClass) 40 | importFrom(methods,setGeneric) 41 | importFrom(methods,setMethod) 42 | importFrom(methods,setOldClass) 43 | importFrom(methods,signature) 44 | importFrom(parallel,mclapply) 45 | importFrom(stats,as.dendrogram) 46 | importFrom(stats,cor) 47 | importFrom(stats,dendrapply) 48 | importFrom(stats,is.leaf) 49 | importFrom(stats,lm) 50 | importFrom(stats,na.omit) 51 | importFrom(stats,optimise) 52 | importFrom(stats,reorder) 53 | importFrom(utils,installed.packages) 54 | useDynLib(qtcat) 55 | -------------------------------------------------------------------------------- /R/AllClass.R: -------------------------------------------------------------------------------- 1 | #' @title A S4 class to represent a SNP-matrix 2 | #' 3 | #' @description A S4 class to represent a SNP matrix. Storing SNP information, by using a 4 | #' byte-level (raw) storage scheme, jointly with genomic position and allele information. 5 | #' 6 | #' @slot snpData a matrix of SNPs stored in type 'raw'. 00 is NA, 01 homozygote AA, 02 7 | #' heterozygote AB, and 03 homozygote BB. 8 | #' @slot snpInfo data.frame with four columns. The first col. contains the chromosomes, 9 | #' the second col. the positions, the third col. the first allele and the fourth the second 10 | #' allele. 11 | #' @slot dim an integer vector with exactly two non-negative values. 12 | #' @slot dimnames a list of length two; each component containing NULL or a character vector 13 | #' length equal the corresponding dim element. 14 | #' 15 | #' @importFrom methods setClass 16 | #' @export 17 | setClass("snpMatrix", 18 | slots = c(snpData = "matrix", 19 | snpInfo = "data.frame", 20 | dim = "integer", 21 | dimnames = "list")) 22 | -------------------------------------------------------------------------------- /R/AllGeneric.R: -------------------------------------------------------------------------------- 1 | #' @title Extract genomic position and allele information. 2 | #' 3 | #' @description Extract genomic position and allele information from object. 4 | #' 5 | #' @param object an object, for which a corresponding method exists. 6 | #' 7 | #' @importFrom methods setGeneric 8 | #' @export 9 | setGeneric("snpInfo", function(object) standardGeneric("snpInfo")) 10 | 11 | 12 | #' @title Allele Frequency. 13 | #' 14 | #' @description Frequency of alleles in data set. 15 | #' 16 | #' @param x an object, for which a corresponding method exists. 17 | #' @param maf logical, if true minor allele frequency (default), other ways allele 18 | #' frequency of 'allele.1'. 19 | #' 20 | #' @importFrom methods setGeneric 21 | #' @export 22 | setGeneric("alleleFreq", function(x, maf = TRUE) standardGeneric("alleleFreq")) 23 | 24 | 25 | #' @title Heterozygosity Frequency. 26 | #' 27 | #' @description Frequency of heterozygosity in data set. 28 | #' 29 | #' @param x an object, for which a corresponding method exists. 30 | #' @param dim dimension over which heterozygosity is estimated. 1 (default) is for rows 31 | #' (individuals), 2 is for columns (SNPs). 32 | #' 33 | #' @importFrom methods setGeneric 34 | #' @export 35 | setGeneric("hetFreq", function(x, dim = 1L) standardGeneric("hetFreq")) 36 | 37 | 38 | #' @title Missing Data Frequency. 39 | #' 40 | #' @description Frequency of missing Data in data set. 41 | #' 42 | #' @param x an object, for which a corresponding method exists. 43 | #' @param dim dimension over which heterozygosity is estimated. 1 (default) is for rows 44 | #' (individuals), 2 is for columns (SNPs). 45 | #' 46 | #' @importFrom methods setGeneric 47 | #' @export 48 | setGeneric("naFreq", function(x, dim = 1L) standardGeneric("naFreq")) 49 | -------------------------------------------------------------------------------- /R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | read_snpData <- function(file, sep, quote, rowNames, na_str, nrows) { 5 | .Call('_qtcat_read_snpData', PACKAGE = 'qtcat', file, sep, quote, rowNames, na_str, nrows) 6 | } 7 | 8 | corDist <- function(x, y) { 9 | .Call('_qtcat_corDist', PACKAGE = 'qtcat', x, y) 10 | } 11 | 12 | corDists <- function(x) { 13 | .Call('_qtcat_corDists', PACKAGE = 'qtcat', x) 14 | } 15 | 16 | corPreIdenticals <- function(x, step) { 17 | .Call('_qtcat_corPreIdenticals', PACKAGE = 'qtcat', x, step) 18 | } 19 | 20 | corIdenticals <- function(x, clustIdx) { 21 | .Call('_qtcat_corIdenticals', PACKAGE = 'qtcat', x, clustIdx) 22 | } 23 | 24 | joinCorIdenticals <- function(n, preclust, ClustMedo) { 25 | .Call('_qtcat_joinCorIdenticals', PACKAGE = 'qtcat', n, preclust, ClustMedo) 26 | } 27 | 28 | corClarans <- function(x, k, maxNeigbours) { 29 | .Call('_qtcat_corClarans', PACKAGE = 'qtcat', x, k, maxNeigbours) 30 | } 31 | 32 | corMedoids <- function(x, clusters) { 33 | .Call('_qtcat_corMedoids', PACKAGE = 'qtcat', x, clusters) 34 | } 35 | 36 | design <- function(x) { 37 | .Call('_qtcat_design', PACKAGE = 'qtcat', x) 38 | } 39 | 40 | afreq <- function(x, maf) { 41 | .Call('_qtcat_afreq', PACKAGE = 'qtcat', x, maf) 42 | } 43 | 44 | hetfreq <- function(x, dim) { 45 | .Call('_qtcat_hetfreq', PACKAGE = 'qtcat', x, dim) 46 | } 47 | 48 | nafreq <- function(x, dim) { 49 | .Call('_qtcat_nafreq', PACKAGE = 'qtcat', x, dim) 50 | } 51 | 52 | -------------------------------------------------------------------------------- /R/associationTest.R: -------------------------------------------------------------------------------- 1 | #' @title A genotype object constructor 2 | #' 3 | #' @description Constructs a S3-object containing a SNP design matrix and a hierarchy. If 4 | #' a SNP in the input object contains missing data, the clustering is used to impute 5 | #' information from highly correlated neighbor SNPs. The genotype object is needed for 6 | #' \code{\link{qtcatHit}} as input. 7 | #' 8 | #' @param snp an object of S4 class \linkS4class{snpMatrix}. 9 | #' @param snpClust an object of class \code{\link{qtcatClust}}. 10 | #' @param absCor a vector of absolute value of correlations considered in the hierarchy. 11 | #' @param min.absCor a minimum absolute value of correlation which is considered. A value 12 | #' in the range from 0 to 1. 13 | #' @param mc.cores a number of cores for parallelising. The maximum is \code{'B'}. For 14 | #' details see \code{\link[parallel]{mclapply}}. 15 | #' 16 | #' @examples 17 | #' # file containing example data for SNP data 18 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 19 | #' snp <- read.snpData(gfile, sep = ",") 20 | #' clust <- qtcatClust(snp) 21 | #' 22 | #' # Construct geotype object 23 | #' geno <- qtcatGeno(snp, clust) 24 | #' 25 | #' @importFrom hit as.hierarchy 26 | #' @importFrom methods is 27 | #' @importFrom stats reorder 28 | #' @export 29 | qtcatGeno <- function(snp, snpClust, absCor, min.absCor = 0.5, mc.cores = 1) { 30 | stopifnot(is(snp, "snpMatrix")) 31 | stopifnot(is(snpClust, "qtcatClust")) 32 | if (!setequal(names(snpClust$clusters), colnames(snp))) 33 | stop("Names of 'snp' and 'snpClust' differ") 34 | if (missing(absCor)) 35 | hier <- as.hierarchy(snpClust$dendrogram, 1 - min.absCor, names = colnames(snp)) 36 | else 37 | hier <- as.hierarchy(snpClust$dendrogram, height = 1 - absCor, names = colnames(snp)) 38 | if (any(naFreq(snp) > 0)) 39 | snp <- imputeMedoids(snp, snpClust, hier, .25, mc.cores) 40 | if (is.null(names <- snpClust$medoids)) 41 | names <- names(snpClust) 42 | snpnames <- colnames(snp)[colnames(snp) %in% names] 43 | desMat <- as.matrix(snp[, snpnames]) 44 | hier <- reorder(hier, snpnames) 45 | out <- list(x = desMat, 46 | hierarchy = hier, 47 | clusters = snpClust$clusters, 48 | medoids = snpClust$medoids, 49 | snpInfo = snpInfo(snp)) 50 | class(out) <- "qtcatGeno" 51 | out 52 | } 53 | 54 | #' @title A phenotype object constructor 55 | #' 56 | #' @description Constructs an S3-object containing phenotype and if additional covariats 57 | #' exist a design matrix of those. The phenotype object is needed as input for 58 | #' \code{\link{qtcatHit}}. 59 | #' 60 | #' @param names a vector of individual names of length 'n'. 61 | #' @param pheno a vector of length 'n' or a matrix size 'n x 2' in case of binomial family. 62 | #' This contains the phenotypic observations. 63 | #' @param family a character string specifying the family of the phenotype distribution. 64 | #' Either "gaussian" (default) or "binomial". 65 | #' @param covariates a matrix typically generated by a call of 66 | #' \code{\link[stats]{model.matrix}}. It contain additional variables influencing the 67 | #' phenotype e.g. environmental and experimental covariates. 68 | #' 69 | #' @examples 70 | #' # file containing example data for a phenotype. 71 | #' pfile <- system.file("extdata/phenodata.csv", package = "qtcat") 72 | #' pdat <- read.csv(pfile, header = TRUE) 73 | #' 74 | #' # Construct phenotype object 75 | #' pheno <- qtcatPheno(names = pdat[, 1], 76 | #' pheno = pdat[, 2], 77 | #' covariates = model.matrix(~ pdat[, 3])) 78 | #' 79 | #' @export 80 | qtcatPheno <- function(names, pheno, family = "gaussian", covariates = NULL) { 81 | match.arg(family, c("gaussian", "binomial")) 82 | nn <- length(names) 83 | if (any(is.na(pheno))) 84 | stop("Missing values in 'pheno' are not allowed") 85 | if (!is.null(covariates)) { 86 | if (is.matrix(covariates)) { 87 | nc <- nrow(covariates) 88 | if (all(covariates[, 1] == 1)) 89 | covariates <- covariates[, -1, drop = FALSE] 90 | } else { 91 | stop("'covariates' has to be a 'matrix'") 92 | } 93 | } else { 94 | covariates <- matrix(nrow = nn, ncol = 0L) 95 | nc <- nn 96 | } 97 | if (is.vector(pheno)) { 98 | np <- length(pheno) 99 | } else if (is.matrix(pheno) && family == "binomial") { 100 | if (ncol(pheno) > 2) 101 | stop("'phone' has more than two columns which is not allowed") 102 | np <- nrow(pheno) 103 | } else { 104 | stop("'pheno' has to be a 'vector' or in in case of binomial family a 'matrix'") 105 | } 106 | if (nn == nc && nn == np) 107 | out <- list(names = as.character(names), 108 | pheno = pheno, 109 | covariates = covariates, 110 | family = family) 111 | class(out) <- "qtcatPheno" 112 | out 113 | } 114 | 115 | 116 | #' @title Fitting Hierarchical Inference Testing 117 | #' 118 | #' @description Hierarchical inference testing for phenotype-SNP association. 119 | #' 120 | #' @param pheno an object of class \code{\link{qtcatPheno}}. 121 | #' @param geno an object of class \code{\link{qtcatGeno}}. 122 | #' @param B a integer indicating the number of sample-splits. 123 | #' @param p.samp1 a value specifying the fraction of data used for the LASSO sample-split. 124 | #' The ANOVA sample-split is \code{1 - p.samp1}. 125 | #' @param nfolds Number of folds (default is 5). See \code{\link[glmnet]{cv.glmnet}} for 126 | #' more details. 127 | #' @param overall.lambda Logical, if true, lambda is estimated once, if false (default), 128 | #' lambda is estimated for each sample split. 129 | #' @param lambda.opt a criterion for optimum selection of cross validated lasso. Either 130 | #' "lambda.1se" (default) or "lambda.min". See 131 | #' \code{\link[glmnet]{cv.glmnet}} for more details. 132 | #' @param alpha a single value in the range of 0 to 1 for the elastic net mixing parameter. 133 | #' @param gamma a vector of gamma-values used in significance estimation. 134 | #' @param max.p.esti a maximum for computed p-values. All p-values above this value are set 135 | #' to one. Small \code{max.p.esti} values reduce computing time. 136 | #' @param seed a RNG seed, see \code{\link{set.seed}}. 137 | #' @param mc.cores a number of cores for parallelising. The maximum is 138 | #' \code{'B'}. For details see \code{\link[parallel]{mclapply}}. 139 | #' @param trace logical, if \code{TRUE} it prints the current status of the program. 140 | #' @param ... additional arguments for \code{\link[glmnet]{cv.glmnet}}. 141 | #' 142 | #' @examples 143 | #' # If you want to run the examples, use: 144 | #' # example(qtcatHit, run.dontrun = TRUE) 145 | #' \dontrun{ 146 | #' # files containing example data for SNP data and the phenotype 147 | #' pfile <- system.file("extdata/phenodata.csv", package = "qtcat") 148 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 149 | #' pdat <- read.csv(pfile, header = TRUE) 150 | #' snp <- read.snpData(gfile, sep = ",") 151 | #' clust <- qtcatClust(snp) 152 | #' geno <- qtcatGeno(snp, clust) 153 | #' pheno <- qtcatPheno(names = pdat[, 1], 154 | #' pheno = pdat[, 2], 155 | #' covariates = model.matrix(~ pdat[, 3])) 156 | #' 157 | #' # fitting HIT 158 | #' fitted <- qtcatHit(pheno, geno) 159 | #' } 160 | #' 161 | #' @importFrom methods is 162 | #' @export 163 | qtcatHit <- function(pheno, geno, B = 50, p.samp1 = 0.35, 164 | nfolds = 5, overall.lambda = FALSE, lambda.opt = "lambda.1se", 165 | alpha = 1, gamma = seq(0.05, 0.99, by = 0.01), 166 | max.p.esti = 1, seed = 12321, mc.cores = 1, trace = FALSE, ...) { 167 | set.seed(seed) 168 | on.exit(set.seed(NULL)) 169 | stopifnot(is(pheno, "qtcatPheno")) 170 | stopifnot(is(geno, "qtcatGeno")) 171 | id <- intersect(pheno$names, rownames(geno$x)) 172 | if (!length(id)) 173 | stop("The ID intersect of 'pheno' and 'geno' is emty") 174 | if (length(id.uniqueGeno <- setdiff(rownames(geno$x), id))) 175 | cat("The following individuals are part of 'geno' but not of 'pheno':\n", 176 | paste(id.uniqueGeno, collapse = " "), "\n") 177 | if (length(id.uniquePheno <- setdiff(pheno$names, id))) 178 | cat("The following individuals are part of 'pheno' but not of 'geno':\n", 179 | paste(id.uniquePheno, collapse = " "), "\n") 180 | phenoInx <- which(pheno$names %in% id) 181 | genoInx <- match(pheno$names[phenoInx], rownames(geno$x)) 182 | if (ncol(pheno$covariates) == 0L) 183 | x <- geno$x[genoInx, ] 184 | else 185 | x <- cbind(geno$x[genoInx, ], pheno$covariates[phenoInx, ]) 186 | y <- pheno$pheno[phenoInx] 187 | fitHit <- hit(x, y, geno$hierarchy, pheno$family, 188 | B, p.samp1, nfolds, overall.lambda, lambda.opt, alpha, 189 | gamma, max.p.esti, mc.cores, 190 | trace, standardize = FALSE) 191 | out <- c(fitHit, geno[3L:5L]) 192 | class(out) <- c("qtcatHit", "hit") 193 | out 194 | } 195 | 196 | 197 | #' @title Summarize results of Hierarchical Inference Test 198 | #' 199 | #' @description Summarizing the QTCs (significant cluster of SNPs) and their position at 200 | #' the genome. 201 | #' 202 | #' @param object an object of class \code{\link{qtcatHit}}. 203 | #' @param alpha an alpha level for significance estimation. 204 | #' @param min.absCor a minimum absolute value of correlation to be considered. 205 | #' 206 | #' @examples 207 | #' # If you want to run the examples, use: 208 | #' # example(qtcatQtc, run.dontrun = TRUE) 209 | #' \dontrun{ 210 | #' # files containing example data for SNP data and the phenotype 211 | #' pfile <- system.file("extdata/phenodata.csv", package = "qtcat") 212 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 213 | #' pdat <- read.csv(pfile, header = TRUE) 214 | #' snp <- read.snpData(gfile, sep = ",") 215 | #' clust <- qtcatClust(snp) 216 | #' geno <- qtcatGeno(snp, clust) 217 | #' pheno <- qtcatPheno(names = pdat[, 1], 218 | #' pheno = pdat[, 2], 219 | #' covariates = model.matrix(~ pdat[, 3])) 220 | #' fitted <- qtcatHit(pheno, geno) 221 | #' 222 | #' # Summarizing the QTCs (loci37, loci260, and loci367 are causal) 223 | #' qtcatQtc(fitted) 224 | #' } 225 | #' 226 | #' @importFrom hit hit 227 | #' @importFrom methods is 228 | #' @export 229 | qtcatQtc <- function(object, alpha = 0.05, min.absCor = 0.05) { 230 | stopifnot(is(object, "qtcatHit")) 231 | y <- summary(object, alpha, 1 - min.absCor) 232 | signames <- rownames(y) 233 | sigclust <- match(signames, object$medoids) 234 | sigClust <- matrix(0, length(object$clusters), 3L) 235 | sigClust[, 2L] <- 1 236 | for (i in seq_along(signames)) { 237 | inx <- which(object$clusters == sigclust[i]) 238 | sigClust[inx, ] <- matrix(unlist(y[signames[i], ]), 239 | length(inx), 3L, byrow = TRUE) 240 | } 241 | rownames(sigClust) <- names(object$clusters) 242 | sigClust[, 2L] <- 1 - sigClust[, 2L] 243 | colnames(sigClust) <- c(colnames(y)[1L], "absCor", colnames(y)[3L]) 244 | sigClust <- as.data.frame(sigClust[sigClust[, 1L] != 0, ,drop = FALSE]) 245 | out <- cbind(snpInfo(object)[rownames(sigClust), 1:2], sigClust) 246 | out 247 | } 248 | 249 | 250 | setOldClass("qtcatHit") 251 | #' @title Get position from qtcatHit object 252 | #' 253 | #' @description Genetic position info from an object of class qtcatHit. 254 | #' 255 | #' @param object an object of class \code{\link{qtcatHit}}. 256 | #' 257 | #' 258 | #' @importFrom methods setMethod setOldClass 259 | #' @export 260 | setMethod("snpInfo", "qtcatHit", 261 | function(object) { 262 | out <- object$snpInfo 263 | if (is.null(out)) { 264 | cat("No position information available") 265 | } 266 | out 267 | } 268 | ) 269 | 270 | 271 | #' @title Find medoids of QTCs 272 | #' @description Find a medoid of for each quantitative trait cluster (QTC). 273 | #' 274 | #' @param object an object of class \code{\link{qtcatHit}}. 275 | #' @param geno an object of class \code{\link{qtcatGeno}}. 276 | #' @param alpha an alpha level for significance estimation. 277 | #' @param min.absCor minimum absolute value of correlation considered. 278 | #' 279 | #' @examples 280 | #' # If you want to run the examples, use: 281 | #' # example(medoQtc, run.dontrun = TRUE) 282 | #' \dontrun{ 283 | #' # files containing example data for SNP data and the phenotype 284 | #' pfile <- system.file("extdata/phenodata.csv", package = "qtcat") 285 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 286 | #' pdat <- read.csv(pfile, header = TRUE) 287 | #' snp <- read.snpData(gfile, sep = ",") 288 | #' clust <- qtcatClust(snp) 289 | #' geno <- qtcatGeno(snp, clust) 290 | #' pheno <- qtcatPheno(names = pdat[, 1], 291 | #' pheno = pdat[, 2], 292 | #' covariates = model.matrix(~ pdat[, 3])) 293 | #' fitted <- qtcatHit(pheno, geno) 294 | #' 295 | #' # QTC medoids 296 | #' medo <- medoQtc(fitted, geno) 297 | #' } 298 | #' 299 | #' @importFrom methods is 300 | #' @importFrom stats cor 301 | #' @export 302 | medoQtc <- function(object, geno, alpha = 0.05, min.absCor = 0.05) { 303 | stopifnot(is(object, "qtcatHit")) 304 | stopifnot(is(geno, "qtcatGeno")) 305 | sigClust <- summary(object, alpha, min.absCor) 306 | if (nrow(sigClust)) { 307 | clusters <- split(rownames(sigClust), sigClust$clusters) 308 | medoids <- sapply(clusters, function(names, geno) { 309 | if (length(names) > 1L) 310 | return(names[which.max(abs(cor(geno$x[, names])))]) 311 | else 312 | return(names) 313 | }, geno = geno) 314 | } else { 315 | medoids <- c() 316 | } 317 | medoids 318 | } 319 | 320 | 321 | #' @title Fitting a Linear Model to QTCs 322 | #' 323 | #' @description Linear model between phenotype and medoids of QTCs (significant SNP 324 | #' clusters). 325 | #' 326 | #' @param object an object of class \code{\link{qtcatHit}}. 327 | #' @param pheno an object of class \code{\link{qtcatPheno}}. 328 | #' @param geno an object of class \code{\link{qtcatGeno}}. 329 | #' @param alpha an alpha level for significance estimation. 330 | #' @param min.absCor minimum absolute value of correlation considered. 331 | #' 332 | #' @examples 333 | #' # If you want to run the examples, use: 334 | #' # example(lmQtc, run.dontrun = TRUE) 335 | #' \dontrun{ 336 | #' # files containing example data for SNP data and the phenotype 337 | #' pfile <- system.file("extdata/phenodata.csv", package = "qtcat") 338 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 339 | #' pdat <- read.csv(pfile, header = TRUE) 340 | #' snp <- read.snpData(gfile, sep = ",") 341 | #' clust <- qtcatClust(snp) 342 | #' geno <- qtcatGeno(snp, clust) 343 | #' pheno <- qtcatPheno(names = pdat[, 1], 344 | #' pheno = pdat[, 2], 345 | #' covariates = model.matrix(~ pdat[, 3])) 346 | #' fitted <- qtcatHit(pheno, geno) 347 | #' 348 | #' # fitting a LM to the phenotype and QTC medoids 349 | #' lmfitted <- lmQtc(fitted, pheno, geno) 350 | #' } 351 | #' 352 | #' @importFrom methods is 353 | #' @importFrom stats lm 354 | #' @export 355 | lmQtc <- function(object, pheno, geno, alpha = 0.05, min.absCor = 0.05) { 356 | stopifnot(is(object, "qtcatHit")) 357 | stopifnot(is(pheno, "qtcatPheno")) 358 | stopifnot(is(geno, "qtcatGeno")) 359 | id <- intersect(pheno$names, rownames(geno$x)) 360 | phenoInx <- which(pheno$names %in% id) 361 | if (!length(id)) 362 | stop("The ID intersect of 'pheno' and 'geno' is emty") 363 | if (length(id.uniqueGeno <- setdiff(rownames(geno$x), id))) 364 | cat("The following individuals are part of 'geno' but not of 'pheno':\n", 365 | paste(id.uniqueGeno, collapse = " "), "\n") 366 | if (length(id.uniquePheno <- setdiff(pheno$names, id))) 367 | cat("The following individuals are part of 'pheno' but not of 'geno':\n", 368 | paste(id.uniquePheno, collapse = " "), "\n") 369 | medoids <- medoQtc(object, geno, alpha, min.absCor) 370 | if (length(medoids)) { 371 | xg <- geno$x[, colnames(geno$x) %in% medoids, drop = FALSE] 372 | genoInx <- match(pheno$names[phenoInx], rownames(xg)) 373 | rownames(xg) <- NULL 374 | if (ncol(pheno$covariates)) { 375 | dat <- data.frame(y = pheno$pheno[phenoInx], 376 | pheno$covariates[phenoInx, ], 377 | xg[genoInx, ]) 378 | } else { 379 | dat <- data.frame(y = pheno$pheno[phenoInx], xg[genoInx, ]) 380 | } 381 | } else if (ncol(pheno$covariates)) { 382 | dat <- data.frame(y = pheno$pheno[phenoInx], 383 | pheno$covariates[phenoInx, ]) 384 | } else { 385 | dat <- data.frame(y = pheno$pheno[phenoInx]) 386 | } 387 | out <- lm(y ~ ., data = dat) 388 | out 389 | } 390 | 391 | 392 | #' @title Plot resulting QTCs of the Hierarchical Inference Test 393 | #' 394 | #' @description Plot the QTCs (significant cluster of SNPs) at their 395 | #' position at the genome. 396 | #' 397 | #' @param x an object of class \code{\link{qtcatHit}}. 398 | #' @param alpha an alpha level for significance estimation. 399 | #' @param xlab a title for the x axis. 400 | #' @param ylab a title for the y axis. 401 | #' @param col.axis colors for axis line, tick marks, and title respectively. 402 | #' @param ... other graphical parameters may also be passed as arguments to this function. 403 | #' 404 | #' @examples 405 | #' # If you want to run the examples, use: 406 | #' # example(plotQtc, run.dontrun = TRUE) 407 | #' \dontrun{ 408 | #' # files containing example data for SNP data and the phenotype 409 | #' pfile <- system.file("extdata/phenodata.csv", package = "qtcat") 410 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 411 | #' pdat <- read.csv(pfile, header = TRUE) 412 | #' snp <- read.snpData(gfile, sep = ",") 413 | #' clust <- qtcatClust(snp) 414 | #' geno <- qtcatGeno(snp, clust) 415 | #' pheno <- qtcatPheno(names = pdat[, 1], 416 | #' pheno = pdat[, 2], 417 | #' covariates = model.matrix(~ pdat[, 3])) 418 | #' fitted <- qtcatHit(pheno, geno) 419 | #' 420 | #' # Plot the QTCs (loci37, loci260, and loci367 are causal) 421 | #' plotQtc(fitted) 422 | #' } 423 | #' 424 | #' @importFrom graphics plot axis mtext 425 | #' @export 426 | plotQtc <- function(x, alpha = 0.05, xlab = "Chromosomes", 427 | ylab = expression(-log[10](italic(p))), col.axis = NULL, ...) { 428 | stopifnot(is(x, "qtcatHit")) 429 | # make positions linear with gaps between chr's 430 | pos <- snpInfo(x)[, 1:2] 431 | chrminmax <- vapply(split(pos[, 2L], pos[, 1L]), function(x) c(min(x), max(x)), c(1, 2)) 432 | chrgap <- sum(chrminmax[2L, ]) * .01 433 | chrsize <- cumsum(c(0, chrminmax[2L, -ncol(chrminmax)])) - 434 | cumsum(chrminmax[1L, ]) + 435 | cumsum(c(0, rep(chrgap, ncol(chrminmax) - 1L))) 436 | chr <- sort(unique(pos[, 1L])) 437 | for (i in seq_along(chr)) { 438 | inx <- which(pos[, 1L] == chr[i]) 439 | pos[inx, 2L] <- pos[inx, 2L] + chrsize[i] 440 | } 441 | chrstartend <- vapply(split(pos[, 2L], pos[, 1L]), function(x) c(min(x), max(x)), c(1, 2)) 442 | xlim <- c(chrstartend[1L, 1L] - chrgap, chrstartend[2L, ncol(chrstartend)] + chrgap) 443 | # get QTCs from result 444 | qtc <- qtcatQtc(x, alpha = alpha, min.absCor = .01) 445 | for (i in seq_along(chr)) { 446 | inx2 <- which(qtc[, 1L] == chr[i]) 447 | qtc[inx2, 2L] <- qtc[inx2, 2L] + chrsize[i] 448 | } 449 | qtc$pValues[qtc$pValues] <- qtc$pValues[qtc$pValues] + 1e-308 450 | qtc$log.pValues <- -log10(qtc$pValues) 451 | # plot 452 | plot(qtc$pos, qtc$log.pValues, xlim = xlim, ylim = c(0, max(9, qtc$log.pValues) + 1), 453 | axes = FALSE, xlab = "", ylab = "", ...) 454 | # x 455 | for (i in seq_along(chr)) 456 | axis(1, labels = FALSE, at = c(chrstartend[1, i], chrstartend[2, i]), col = col.axis) 457 | axis(1, at = colMeans(chrstartend), labels = chr, col = NA, col.axis = col.axis) 458 | mtext(xlab, 1, 2.5, col = col.axis) 459 | # y 460 | axis(2, col.axis = col.axis, col = col.axis) 461 | mtext(expression(-log[10](italic(p))), 2, 2.5, col = col.axis) 462 | } 463 | 464 | 465 | #' @title Plot markers selection frequencies of the Hierarchical Inference Test 466 | #' 467 | #' @description Plot markers selection frequencies at their 468 | #' position at the genome. 469 | #' 470 | #' @param x an object of class \code{\link{qtcatHit}}. 471 | #' @param xlab a title for the x axis. 472 | #' @param ylab a title for the y axis. 473 | #' @param col.axis colors for axis line, tick marks, and title respectively. 474 | #' @param ... other graphical parameters may also be passed as arguments to this function. 475 | #' 476 | #' @examples 477 | #' # If you want to run the examples, use: 478 | #' # example(plotSelFreq, run.dontrun = TRUE) 479 | #' \dontrun{ 480 | #' # files containing example data for SNP data and the phenotype 481 | #' pfile <- system.file("extdata/phenodata.csv", package = "qtcat") 482 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 483 | #' pdat <- read.csv(pfile, header = TRUE) 484 | #' snp <- read.snpData(gfile, sep = ",") 485 | #' clust <- qtcatClust(snp) 486 | #' geno <- qtcatGeno(snp, clust) 487 | #' pheno <- qtcatPheno(names = pdat[, 1], 488 | #' pheno = pdat[, 2], 489 | #' covariates = model.matrix(~ pdat[, 3])) 490 | #' fitted <- qtcatHit(pheno, geno) 491 | #' 492 | #' # Plot the selection frequncy of markers (loci37, loci260, and loci367 are causal) 493 | #' plotSelFreq(fitted) 494 | #' } 495 | #' 496 | #' @importFrom graphics plot axis mtext 497 | #' @export 498 | plotSelFreq <- function(x, xlab = "Chromosomes", ylab = "Sel. freq.", 499 | col.axis = NULL, ...) { 500 | stopifnot(is(x, "qtcatHit")) 501 | # make positions linear with gaps between chr's 502 | pos <- snpInfo(x)[, 1:2] 503 | chrminmax <- vapply(split(pos[, 2L], pos[, 1L]), function(x) c(min(x), max(x)), c(1, 2)) 504 | chrgap <- sum(chrminmax[2L, ]) * .01 505 | chrsize <- cumsum(c(0, chrminmax[2L, -ncol(chrminmax)])) - 506 | cumsum(chrminmax[1L, ]) + 507 | cumsum(c(0, rep(chrgap, ncol(chrminmax) - 1L))) 508 | chr <- sort(unique(pos[, 1L])) 509 | for (i in seq_along(chr)) { 510 | inx <- which(pos[, 1L] == chr[i]) 511 | pos[inx, 2L] <- pos[inx, 2L] + chrsize[i] 512 | } 513 | chrstartend <- vapply(split(pos[, 2L], pos[, 1L]), function(x) c(min(x), max(x)), c(1, 2)) 514 | xlim <- c(chrstartend[1L, 1L] - chrgap, chrstartend[2L, ncol(chrstartend)] + chrgap) 515 | # hit lasso selection fequency 516 | inx <- which(x$selectFreq > 0) 517 | selfreq <- cbind(pos[names(x$hier), 2L][inx], x$selectFreq[inx]) 518 | # hit lasso selection fequency plot 519 | plot(selfreq, xlim = xlim, ylim = c(0, 1.1), axes = FALSE, xlab = "", ylab = "", ...) 520 | # x 521 | for (i in seq_along(chr)) 522 | axis(1, labels = FALSE, at = c(chrstartend[1, i], chrstartend[2, i]), col = col.axis) 523 | axis(1, at = colMeans(chrstartend), labels = chr, col = NA, col.axis = col.axis) 524 | mtext(xlab, 1, 2.5, col = col.axis) 525 | # y 526 | axis(2, at = c(0, .5, 1), col = col.axis, col.axis = col.axis) 527 | mtext(ylab, 2, 2.5, col = col.axis) 528 | } 529 | -------------------------------------------------------------------------------- /R/qtcatPackage.R: -------------------------------------------------------------------------------- 1 | #' @title Quantitative Trait Cluster Association Test 2 | #' 3 | #' @description All SNPs are jointly associated to the phenotype and at the same time 4 | #' correlation among them is considered. Thus, correction for population structure becomes 5 | #' unnecessary, which in many cases results in a power advantages compared to other methods. 6 | #' 7 | #' @author Jonas Klasen 8 | #' 9 | #' @useDynLib qtcat 10 | #' @importFrom Rcpp evalCpp 11 | #' 12 | #' @docType package 13 | #' @name qtcat-package 14 | #' @aliases qtcat 15 | NULL 16 | -------------------------------------------------------------------------------- /R/snpCluster.R: -------------------------------------------------------------------------------- 1 | #' @title Correlation based distance between SNPs 2 | #' 3 | #' @description This function computes a distance matrix. The distance is estimated 4 | #' as one minus the absolute value of the correlation coefficient \code{1 - abs(cor)}. 5 | #' 6 | #' @param snp an object of class \linkS4class{snpMatrix}. 7 | #' 8 | #' @details See \code{\link[stats]{dist}} for details about the output object. 9 | #' @seealso \code{\link[stats]{dist}} 10 | #' 11 | #' @examples 12 | #' # file containing example data for SNP data 13 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 14 | #' snp <- read.snpData(gfile, sep = ",") 15 | #' 16 | #' dist <- distCor(snp[, 1:10]) 17 | #' 18 | #' @importFrom methods is 19 | #' @export 20 | distCor <- function(snp) { 21 | stopifnot(is(snp, "snpMatrix")) 22 | out <- corDists(snp@snpData) 23 | attr(out,"Labels") <- colnames(snp) 24 | attr(out,"Size") <- ncol(snp) 25 | attr(out,"Diag") <- FALSE 26 | attr(out,"Upper") <- FALSE 27 | attr(out,"method") <- "1-abs(cor(snp))" 28 | attr(out,"call") <- match.call() 29 | class(out) <- "dist" 30 | out 31 | } 32 | 33 | 34 | #' @title Perfect simiarity clusters of SNP 35 | #' 36 | #' @description Finds perfect similarity cluster of SNPs. This is specially usfull in 37 | #' artificial crossing populations. 38 | #' 39 | #' @param snp an object of class \linkS4class{snpMatrix}. 40 | #' @param mc.cores a positive integer for the number of cores for parallel computing. See 41 | #' \code{\link[parallel]{mclapply}} for details. 42 | #' 43 | #' @examples 44 | #' # file containing example data for SNP data 45 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 46 | #' snp <- read.snpData(gfile, sep = ",") 47 | #' 48 | #' ident <- identicals(snp) 49 | #' 50 | #' @importFrom parallel mclapply 51 | #' @importFrom methods is 52 | #' @importFrom stats optimise 53 | #' @export 54 | identicals <- function(snp, mc.cores = 1) { 55 | stopifnot(is(snp, "snpMatrix")) 56 | p <- ncol(snp@snpData) 57 | s <- optimise(function(s, p, m) p * s + p / s * (p / s - 1) / 2 * s / m, 58 | interval = c(2, p - 1), p = p, m = mc.cores)$minimum 59 | step <- as.integer(p / (s + 1)) 60 | preclust <- unlist(corPreIdenticals(snp@snpData, step), FALSE) 61 | kidenticals <- mclapply(preclust, function(i, snp) corIdenticals(snp, i), 62 | snp = snp@snpData, mc.cores = mc.cores) 63 | identclust <- joinCorIdenticals(p, preclust, kidenticals) 64 | clust <- identclust[[1L]] 65 | names(clust) <- colnames(snp) 66 | out <- list(clusters = clust, 67 | medoids = colnames(snp)[identclust[[2L]] + 1]) 68 | class(out) <- "identicals" 69 | out 70 | } 71 | 72 | 73 | #' @title K-medoids clustering of SNPs using randomized search 74 | #' 75 | #' @description Partitioning (clustering) into k clusters "around medoids" by randomized 76 | #' search. \code{1-abs(cor)} is used as distance between SNPs. 77 | #' 78 | #' @param snp an object of class \linkS4class{snpMatrix}. 79 | #' @param k a positive integer specifying the number of clusters, has to be greater than 80 | #' one and less than the number of SNPs. 81 | #' @param maxNeigbours a positive integer specifying the maximum number of randomized 82 | #' searches. 83 | #' @param nLocal a positive integer specifying the number of optimisation runs. 84 | #' @param mc.cores a positive integer for the number of cores for parallel computing. See 85 | #' \code{\link[parallel]{mclapply}} for details. 86 | #' 87 | #' @details The K-medoids clustering is implemented as clustering large applications based 88 | #' upon randomized search (CLARANS) algorithm (Ng and Han 2002). CLARANS is a modification 89 | #' of the partitioning around medoids (PAM) algorithm \code{\link[cluster]{pam}}. Where the 90 | #' PAM algorithm is estimating all distances between SNPs and the respective medoids, 91 | #' CLARANS is searching a random subset of the SNPs. This is independently repeated several 92 | #' times and the result which minimises the average distance the most is reported. This 93 | #' produces results close to those of the PAM algorithm (Ng and Han 2002), though the 94 | #' number of runs and the subset size have to be arbitrarily chosen by the user. The 95 | #' algorithm has two advantages: (i) the number of distance comparisons is dramatically 96 | #' reduced; and (ii) parallelizing is straightforward. 97 | #' 98 | #' @references Ng and J. Han (2002). CLARANS: A method for clustering objects for spatial 99 | #' data mining. \emph{IEEE Transactions on Knowledge and Data Engineering}. 100 | #' \url{http://dx.doi.org/10.1109/TKDE.2002.1033770}). 101 | #' 102 | #' @examples 103 | #' # file containing example data for SNP data 104 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 105 | #' snp <- read.snpData(gfile, sep = ",") 106 | #' 107 | #' clust <- clarans(snp, 3) 108 | #' 109 | #' @importFrom parallel mclapply 110 | #' @importFrom methods is 111 | #' @export 112 | clarans <- function(snp, k, maxNeigbours = 100, nLocal = 10, mc.cores = 1) { 113 | stopifnot(is(snp, "snpMatrix")) 114 | if (missing(k)) 115 | stop("'k' must be specifid") 116 | if (k < 2L) 117 | stop("'k' must be at least two") 118 | # cluster optimisation by clarans in parallel 119 | clarans.i <- function(i, snp, k, maxNeigbours) { 120 | # cluster optimisation by clarans 121 | out <- corClarans(snp@snpData, k, maxNeigbours) 122 | out 123 | } 124 | out.nLocal <- mclapply(1L:nLocal, clarans.i, 125 | snp, k, maxNeigbours, 126 | mc.cores = mc.cores) 127 | opt.func <- function(i, snp) {snp[[i]][[3L]]} 128 | all.objectives <- sapply(1:nLocal, opt.func, out.nLocal) 129 | out.opt <- out.nLocal[[which.min(all.objectives)]] 130 | clusters <- out.opt[[1L]] 131 | names(clusters) <- colnames(snp) 132 | medoids <- out.opt[[2L]] + 1 133 | names(medoids) <- colnames(snp)[medoids] 134 | # output 135 | out <- list(clusters = clusters, 136 | medoids = medoids, 137 | objective = out.opt[[3L]], 138 | all.objectives = all.objectives) 139 | class(out) <- "k-medoids" 140 | out 141 | } 142 | 143 | 144 | #' @title Hierarchical clustering for big SNP data sets. 145 | #' 146 | #' @description A three step approximated hierarchical clustering of SNPs suitable to 147 | #' large data sets. 148 | #' 149 | #' @param snp an object of class \linkS4class{snpMatrix}. 150 | #' @param k a positive integer specifying the number of clusters, less than the number of 151 | #' observations. 152 | #' @param identicals logical, if zero clustering. 153 | #' @param maxNeigbours a positive integer, specifying the maximum number of randomized 154 | #' searches. 155 | #' @param nLocal a positive integer, specifying the number of optimisation runs. Columns 156 | #' have to be similar to \code{snp}. 157 | #' @param method see hclust. 158 | #' @param mc.cores a number of cores for parallel computing. See \code{mclapply} in package 159 | #' parallel for details. 160 | #' @param trace logical, if \code{TRUE} it prints current status of the program. 161 | #' @param ... additional argruments for \code{\link[fastcluster]{hclust}} 162 | #' 163 | #' @seealso clarans 164 | #' 165 | #' @examples 166 | #' # file containing example data for SNP data 167 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 168 | #' snp <- read.snpData(gfile, sep = ",") 169 | #' 170 | #' clust <- qtcatClust(snp) 171 | #' 172 | #' @importFrom fastcluster hclust 173 | #' @importFrom stats as.dendrogram 174 | #' @importFrom parallel mclapply 175 | #' @importFrom methods is 176 | #' @importFrom utils installed.packages 177 | #' @export 178 | qtcatClust <- function(snp, k, identicals = TRUE, maxNeigbours = 100, nLocal = 10, 179 | method = "complete", mc.cores = 1, trace = FALSE, ...) { 180 | stopifnot(is(snp, "snpMatrix")) 181 | if (identicals) { 182 | # identicals 183 | if (trace) 184 | cat("Step 1: Search for identicals is running\n") 185 | identicalFit <- identicals(snp, mc.cores) 186 | snp <- snp[, identicalFit$medoids] 187 | } else if (trace) { 188 | cat("Step 1: Search for identicals is switch off\n") 189 | } 190 | # CLARANS 191 | if (missing(k)) 192 | k <- as.integer(ncol(snp) / 10000L) 193 | if (k >= 2L) { 194 | if (identicals && length(identicalFit$medoids) <= k * 2) 195 | stop("Number of medoids from pefect correlated clustering is < k * 2") 196 | if (trace) 197 | cat("Step 2: CLARANS is running, 'k' is:", k, "\n") 198 | clarFit <- clarans(snp, k, maxNeigbours, nLocal, mc.cores) 199 | if (trace) 200 | cat(" objectives:", 201 | format(clarFit$all.objectives, sort = TRUE, digits = 4), "\n") 202 | # if cluster < 2 add to a bigger cluster 203 | clust.inx <- seq_len(k) 204 | cluster.size <- rep(NA, k) 205 | for (i in clust.inx) 206 | cluster.size[i] <- sum(clarFit$clusters == i) 207 | smallclust <- which(cluster.size < 2) 208 | if (length(smallclust)) { 209 | clust.inx <- clust.inx[-smallclust] 210 | min.bigclust <- clust.inx[which.min(min(cluster.size[clust.inx]))] 211 | clarFit$clusters[clarFit$clusters %in% smallclust] <- min.bigclust 212 | } 213 | # HClust 214 | if (trace) 215 | cat("Step 3: HClust is running\n") 216 | hclust.sub <- function(i, snp, clarFit, method, ...) { 217 | inx.i <- which(clarFit$clusters == i) 218 | out <- as.dendrogram(hclust(distCor(snp[, inx.i]), method, ...)) 219 | out 220 | } 221 | hclustFit <- mclapply(clust.inx, hclust.sub, 222 | snp, clarFit, method, ..., 223 | mc.cores = mc.cores) 224 | dendro <- do.call(merge, c(hclustFit, height = 1, adjust = "add.max")) 225 | } else { 226 | # HClust 227 | if (trace) 228 | cat("Step 2: CLARANS is switch off\nStep 3: HClust is running\n") 229 | dendro <- as.dendrogram(hclust(distCor(snp), method, ...)) 230 | } 231 | if (identicals) { 232 | out <- list(dendrogram = dendro, 233 | clusters = identicalFit$clusters, 234 | medoids = identicalFit$medoids) 235 | } else { 236 | medos <- labels(dendro) 237 | clust <- 1:length(medos) 238 | names(clust) <- medos 239 | out <- list(dendrogram = dendro, 240 | clusters = clust, 241 | medoids = medos) 242 | } 243 | class(out) <- "qtcatClust" 244 | out 245 | } 246 | 247 | 248 | #' @title Cut a qtcatClust object 249 | #' 250 | #' @description Cut a qtcatClust object at an specific height. 251 | #' 252 | #' @param snp an object of class \linkS4class{snpMatrix}. 253 | #' @param snpClust an object of class \code{\link{qtcatClust}}. 254 | #' @param absCor a cutting height in absolute value of correlation. 255 | #' 256 | #' @examples 257 | #' # file containing example data for SNP data 258 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 259 | #' snp <- read.snpData(gfile, sep = ",") 260 | #' clust <- qtcatClust(snp) 261 | #' 262 | #' cclust <- cutClust(snp, clust, .5) 263 | #' 264 | #' @importFrom methods is 265 | #' @importFrom stats na.omit 266 | #' @export 267 | cutClust <- function(snp, snpClust, absCor = 1) { 268 | stopifnot(is(snp, "snpMatrix")) 269 | stopifnot(is(snpClust, "qtcatClust")) 270 | stopifnot(!missing(absCor)) 271 | dend <- snpClust$dendrogram 272 | clust <- snpClust$clusters 273 | if ((1 - absCor) >= attr(dend, "height")) { 274 | stop("'absCor' outside of range") 275 | } else { 276 | cut.dend <- cut(dend, h = 1 - absCor) 277 | clust.member <- function(i, dlist, clust) { 278 | namesclust <- names(clust)[clust %in% unique(clust[labels(dlist[[i]])])] 279 | clusti <- rep(i, length(namesclust)) 280 | names(clusti) <- namesclust 281 | return(clusti) 282 | } 283 | clust <- unlist(lapply(1:length(cut.dend$lower), clust.member, cut.dend$lower, clust)) 284 | medo <- names(clust)[corMedoids(snp[, names(clust)]@snpData, clust)] 285 | dend <- rename.leafs(cut.dend$upper, medo) 286 | clust <- clust[na.omit(match(colnames(snp), names(clust)))] 287 | } 288 | out <- list(dendrogram = dend, 289 | clusters = clust, 290 | medoids = medo) 291 | class(out) <- "qtcatClust" 292 | out 293 | } 294 | 295 | 296 | #' @title Rename dendrogram leafs 297 | #' 298 | #' @description Rename dendrogram leafs. 299 | #' 300 | #' @param dend a dendrogram. 301 | #' @param labels a vector of new names. 302 | #' 303 | #' @importFrom stats dendrapply is.leaf 304 | #' @keywords internal 305 | rename.leafs <- function(dend, labels){ 306 | dendlabel <- function(n) { 307 | if(is.leaf(n)) { 308 | i <<- i + 1L 309 | attr(n, "label") <- labels[i] 310 | } 311 | n 312 | } 313 | i <- 0L 314 | dendrapply(dend, dendlabel) 315 | } 316 | -------------------------------------------------------------------------------- /R/snpData.R: -------------------------------------------------------------------------------- 1 | #' @title Read SNP tables as a snpMatrix object 2 | #' 3 | #' @description Reads a file in table format and as a \linkS4class{snpMatrix} object. 4 | #' 5 | #' @param file the name of the file which the data are to be read from. If it does not 6 | #' contain an absolute path, the file name is relative to the current working directory, 7 | #' \code{getwd()}. Tilde-expansion is performed where supported. 8 | #' @param sep a field separator character. Values on each line of the file are separated 9 | #' by this character. 10 | #' @param quote the set of quoting characters. To disable quoting altogether, use 11 | #' \code{quote = ""}. 12 | #' @param na.string a string which is interpreted as NA value. 13 | #' @param nrows a integer, the maximum number of rows to read. 14 | #' 15 | #' @examples 16 | #' # file containing example data for SNP data 17 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 18 | #' snp <- read.snpData(gfile, sep = ",") 19 | #' 20 | #' @importFrom methods new 21 | #' @export 22 | read.snpData <- function(file, sep = " ", quote = "\"", 23 | na.string = "NA", nrows = -1L) { 24 | if (!file.exists(file)) 25 | stop("No such file or directory") 26 | file <- normalizePath(file) 27 | testRead <- strsplit(readLines(file, n = 2L), sep) 28 | if (sep != "") 29 | testRead <- lapply(testRead, function(x, sep) gsub(sep, '',x), sep = sep) 30 | if (length(testRead[[1L]]) <= 3L) 31 | stop("In line one the separator character 'sep' doesn't exist") 32 | if (length(testRead[[1L]]) != length(testRead[[2L]])) 33 | stop("Line one and two are of differnt length") 34 | if (!identical(unique(testRead[[1L]]), testRead[[1L]])) 35 | stop("Column names are not unique") 36 | firstCols <- tolower(substring(testRead[[1L]][1L:3L], 1L, 3L)) 37 | if (identical(firstCols, c("nam", "chr", "pos"))) { 38 | rowNames <- TRUE 39 | snp1 <- testRead[[2L]][-1L:-3L] 40 | } else if (identical(firstCols[1L:2L], c("chr", "pos"))) { 41 | rowNames <- FALSE 42 | snp1 <- testRead[[2L]][-1L:-2L] 43 | } else { 44 | stop("Either the first three columns contain 'names', 'chr', and 'pos' or alternatively the first two columns contain 'chr', and 'pos'") 45 | } 46 | if (any(nchar(snp1) != 2L)) 47 | stop("Every position in the SNP-matrix has to be specified by two characters, missing values are not allowed") 48 | if (na.string != "") { 49 | na.string <- as.character(na.string) 50 | na.string[is.na(na.string)] <- "NA" 51 | } 52 | temp <- read_snpData(file, sep, quote, rowNames, na.string, nrows) 53 | if (!length(temp$lociNames)) { 54 | lociNames <- paste0("loci", seq_len(ncol(temp$snpData))) 55 | } else { 56 | lociNames <- make.unique(temp$lociNames) 57 | } 58 | chr <- suppressWarnings(as.numeric(temp$chr)) 59 | if (any(is.na(chr))) 60 | chr <- temp$chr 61 | out <- new("snpMatrix", 62 | snpData = temp$snpData, 63 | snpInfo = data.frame(chr = chr, 64 | pos = temp$pos, 65 | allele = t(temp$alleles), 66 | row.names = lociNames), 67 | dim = dim(temp$snpData), 68 | dimnames = list(temp$indivNames, lociNames)) 69 | out 70 | } 71 | 72 | 73 | #' @title A snpMatrix constructor 74 | #' 75 | #' @description Constructs a \linkS4class{snpMatrix} object from the given data. 76 | #' 77 | #' @param x a matrix with individuals in rows and SNPs in columns. 78 | #' @param chr a vector with chromosoms at which SNPs are located. 79 | #' @param pos a vector with genomic positions at which SNPs are located. 80 | #' @param alleleCoding a coding scheme of \code{x} for hom (AA), het (AB), and hom (BB). 81 | #' @param allele.1 labels of allele one, for each SNP. 82 | #' @param allele.2 labels of allele two, for each SNP. 83 | #' 84 | #' @importFrom methods new 85 | #' @importFrom stats na.omit 86 | #' @export 87 | as.snpMatrix <- function(x, chr, pos, alleleCoding = c(-1, 0, 1), 88 | allele.1 = NULL, allele.2 = NULL) { 89 | if (!is.matrix(x)) 90 | x <- as.matrix(x) 91 | if (is.null(colnames(x))) 92 | stop("Column names are missing for 'x'") 93 | if (ncol(x) < 2L) 94 | stop("'x' has less than two columns") 95 | if (missing(chr)) 96 | stop("'chr' must be specified") 97 | if (missing(pos)) 98 | stop("'pos' must be specified") 99 | if (!is.vector(alleleCoding)) 100 | stop("'alleleCoding' is not a vector") 101 | x.allele <- na.omit(unique(c(x))) 102 | if (!all(c(x.allele %in% alleleCoding, alleleCoding %in% x.allele))) 103 | stop("'alleleCoding' do not match to 'x'") 104 | if (is.null(allele.1) || is.null(allele.2)) { 105 | allele.1 <- rep("A", ncol(x)) 106 | allele.2 <- rep("B", ncol(x)) 107 | } 108 | if (is.null(rownames(x))) { 109 | indiv.names <- paste0("indiv", seq_len(nrow(x))) 110 | } else { 111 | indiv.names <- rownames(x) 112 | } 113 | loci.names <- colnames(x) 114 | attr(x, 'dimnames') <- NULL 115 | nLabels <- length(alleleCoding) 116 | if (nLabels == 2L) { 117 | newLabels <- as.raw(c(1, 3)) 118 | } else if (nLabels == 3L) { 119 | newLabels <- as.raw(c(1, 2, 3)) 120 | } 121 | y <- matrix(raw(0), nrow(x), ncol(x)) 122 | for (i in 1:nLabels) 123 | y[alleleCoding[i] == x] <- newLabels[i] 124 | out <- new("snpMatrix", 125 | snpData = y, 126 | snpInfo = data.frame(chr = chr, 127 | pos = pos, 128 | allele.1 = allele.1, 129 | allele.2 = allele.2, 130 | row.names = loci.names), 131 | dim = dim(y), 132 | dimnames = list(indiv.names, loci.names)) 133 | out 134 | } 135 | 136 | 137 | #' @title Subsetting snpMatrix 138 | #' 139 | #' @description Subsetting an object of class \linkS4class{snpMatrix}. 140 | #' 141 | #' @param x an object of class \linkS4class{snpMatrix}. 142 | #' @param i a indices specifying elements to extract or replace. Indices are booleans, 143 | #' numeric or character vectors. 144 | #' @param j indices specifying elements to extract or replace. Indices are booleans, 145 | #' numeric or character vectors. 146 | #' @param ... not implemented. 147 | #' @param drop not implemented. 148 | #' 149 | #' @importFrom methods setMethod signature new 150 | #' @export 151 | setMethod("[", signature(x = "snpMatrix", i = "ANY", j = "ANY", drop = "missing"), 152 | function(x, i, j, ..., drop) { 153 | if (!missing(i)) { 154 | if (is.character(i)) { 155 | i <- match(i, rownames(x)) 156 | } 157 | } 158 | if (!missing(j)) { 159 | if (is.character(j)) { 160 | j <- match(j, colnames(x)) 161 | } 162 | } 163 | snpData <- x@snpData[i, j, drop = FALSE] 164 | out <- new("snpMatrix", 165 | snpData = snpData, 166 | snpInfo = x@snpInfo[j, ], 167 | dim = dim(snpData), 168 | dimnames = list(rownames(x)[i], colnames(x)[j])) 169 | out 170 | } 171 | ) 172 | 173 | 174 | #' @title As matrix method for snpMatrix 175 | #' 176 | #' @description As matrix method for an object of class \linkS4class{snpMatrix}. 177 | #' 178 | #' @param x an object of class \linkS4class{snpMatrix}. 179 | #' 180 | #' @examples 181 | #' # file containing example data for SNP data 182 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 183 | #' snp <- read.snpData(gfile, sep = ",") 184 | #' snpmat <- as.matrix(snp) 185 | #' 186 | #' @importFrom methods setMethod signature 187 | #' @export 188 | setMethod("as.matrix", signature(x = "snpMatrix"), 189 | function(x) { 190 | out <- design(x@snpData) 191 | dimnames(out) <- dimnames(x) 192 | out 193 | } 194 | ) 195 | 196 | 197 | #' @title Get position from snpMatrix 198 | #' 199 | #' @description Genetic position info from an object of class 200 | #' \linkS4class{snpMatrix}. 201 | #' 202 | #' @param object an object of class \linkS4class{snpMatrix}. 203 | #' 204 | #' @examples 205 | #' # file containing example data for SNP data 206 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 207 | #' snp <- read.snpData(gfile, sep = ",") 208 | #' info <- snpInfo(snp) 209 | #' 210 | #' @importFrom methods setMethod signature 211 | #' @export 212 | setMethod("snpInfo", signature(object = "snpMatrix"), 213 | function(object) { 214 | out <- object@snpInfo 215 | if (is.null(out)) { 216 | cat("No position information available") 217 | } 218 | out 219 | } 220 | ) 221 | 222 | 223 | #' @title Allele frequency 224 | #' 225 | #' @description Allele frequency an object of class \linkS4class{snpMatrix}. 226 | #' 227 | #' @param x an object of class \linkS4class{snpMatrix}. 228 | #' @param maf logical, if true minor allele frequency (default), other ways allele 229 | #' frequency of 'allele.1'. 230 | #' 231 | #' @examples 232 | #' # file containing example data for SNP data 233 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 234 | #' snp <- read.snpData(gfile, sep = ",") 235 | #' af <- alleleFreq(snp) 236 | #' 237 | #' @importFrom methods setMethod signature 238 | #' @export 239 | setMethod("alleleFreq", signature(x = "snpMatrix"), 240 | function(x, maf = TRUE) { 241 | out <- afreq(x@snpData, maf) 242 | names(out) <- colnames(x) 243 | out 244 | } 245 | ) 246 | 247 | 248 | #' @title Heterozygosity 249 | #' 250 | #' @description Heterozygosity an object of class \linkS4class{snpMatrix}. 251 | #' 252 | #' @param x an object of class \linkS4class{snpMatrix}. 253 | #' @param dim a integer for dimension. 1 (default) is for rows (individuals), 2 is for 254 | #' columns (SNPs). 255 | #' 256 | #' @examples 257 | #' # file containing example data for SNP data 258 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 259 | #' snp <- read.snpData(gfile, sep = ",") 260 | #' hf1 <- hetFreq(snp, 1) 261 | #' hf2 <- hetFreq(snp, 2) 262 | #' 263 | #' @importFrom methods setMethod signature 264 | #' @export 265 | setMethod("hetFreq", signature(x = "snpMatrix"), 266 | function(x, dim = 1) { 267 | if (dim != 1L && dim != 2L) 268 | stop("'dim' must be '1' or '2'") 269 | out <- hetfreq(x@snpData, dim) 270 | if (dim == 1L) 271 | names(out) <- rownames(x) 272 | else if (dim == 2L) 273 | names(out) <- colnames(x) 274 | return(out) 275 | } 276 | ) 277 | 278 | 279 | #' @title NA frequency 280 | #' 281 | #' @description NA frequency in an object of class \linkS4class{snpMatrix}. 282 | #' 283 | #' @param x an object of class \linkS4class{snpMatrix}. 284 | #' @param dim a integer for dimension. 1 (default) is for rows (individuals), 2 is for 285 | #' columns (SNPs). 286 | #' 287 | #' @examples 288 | #' # file containing example data for SNP data 289 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 290 | #' snp <- read.snpData(gfile, sep = ",") 291 | #' na1 <- naFreq(snp, 1) 292 | #' na2 <- naFreq(snp, 2) 293 | #' 294 | #' @importFrom methods setMethod signature 295 | #' @export 296 | setMethod("naFreq", signature(x = "snpMatrix"), 297 | function(x, dim = 1) { 298 | if (dim != 1L && dim != 2L) 299 | stop("'dim' must be '1' or '2'") 300 | out <- nafreq(x@snpData, dim) 301 | if (dim == 1L) 302 | names(out) <- rownames(x) 303 | else if (dim == 2L) 304 | names(out) <- colnames(x) 305 | return(out) 306 | } 307 | ) 308 | -------------------------------------------------------------------------------- /R/snpImpute.R: -------------------------------------------------------------------------------- 1 | #' @title Impute allele in formation to SNPs with missing data 2 | #' 3 | #' @description Uses neighbor SNPs from the clustering hierarchy to impute alleles to 4 | #' positions with missing values. 5 | #' 6 | #' @param snp an object of class \linkS4class{snpMatrix}. 7 | #' @param snpClust an object of class \code{\link{qtcatClust}}. 8 | #' @param min.absCor a minimum value of correlation. If missing values still exist if this 9 | #' point in the hierarchy is reached, imputing is done via allele frequencies. 10 | #' @param mc.cores a number of cores for parallelising. Theoretical maximum is 11 | #' \code{'B'}. For details see \code{\link[parallel]{mclapply}}. 12 | #' 13 | #' @examples 14 | #' # file containing example data for SNP data 15 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 16 | #' snp1 <- read.snpData(gfile, sep = ",") 17 | #' # delete SNP information from Matrix, 33.33% NAs (-> 66.67% SNP info) 18 | #' snp2 <- snp1 19 | #' nainx <- sample(1:length(snp2@snpData), length(snp2@snpData) / 3) 20 | #' snp2@snpData[nainx] <- as.raw(0) 21 | #' # clustering 22 | #' snp2clust <- qtcatClust(snp2) 23 | #' 24 | #' # imputing 25 | #' snp3 <- imputeSnpMatrix(snp2, snp2clust) 26 | #' # comparison of the full and the imputed data set 27 | #' snpmat1 <- as.matrix(snp1) 28 | #' snpmat3 <- as.matrix(snp3) 29 | #' (1 - sum(abs(snpmat1- snpmat3)) / length(snpmat1)) * 100 30 | #' 31 | #' @importFrom hit as.hierarchy 32 | #' @export 33 | imputeSnpMatrix <- function(snp, snpClust, min.absCor = .1, mc.cores = 1) { 34 | stopifnot(is(snp, "snpMatrix")) 35 | stopifnot(is(snpClust, "qtcatClust")) 36 | snpnames <- colnames(snp) 37 | hier <- as.hierarchy(snpClust$dendrogram, names = snpnames) 38 | snp <- imputeMedoids(snp, snpClust, hier, min.absCor, mc.cores) 39 | # impute non medoid SNPs (if exist) 40 | nonMedo <- which(!(names(snpClust$clusters) %in% snpClust$medoids)) 41 | if (length(nonMedo)) { 42 | flipAlleles <- as.numeric(alleleFreq(snp, FALSE) <= .5) 43 | snpList <- list() 44 | for (i in 1:ncol(snp)) { 45 | snpList[[i]] <- snp@snpData[, i] 46 | if (i %in% nonMedo) { 47 | m <- which(snpClust$medoids[snpClust$clusters[i]] == snpnames) 48 | js <- which(snpList[[i]] == is.raw(0)) 49 | jAllele <- snp@snpData[js, m] 50 | if (flipAlleles[i] != flipAlleles[m]) { 51 | j1 <- which(jAllele == as.raw(1)) 52 | j3 <- which(jAllele == as.raw(3)) 53 | jAllele[j1] <- as.raw(3) 54 | jAllele[j3] <- as.raw(1) 55 | } 56 | snpList[[i]][js] <- jAllele 57 | } 58 | } 59 | snp@snpData <- do.call(cbind, snpList) 60 | } 61 | snp 62 | } 63 | 64 | 65 | #' @title Impute missing information at medoid SNPs 66 | #' 67 | #' @description Uses neighboring SNPs in the clustering hierarchy to impute alleles to 68 | #' positions with missing values at medoid SNPs. 69 | #' 70 | #' @param snp an object of class \linkS4class{snpMatrix}. 71 | #' @param snpClust an object of class \code{\link{qtcatClust}}. 72 | #' @param hier an object of class hierarchy. 73 | #' @param min.absCor a minimum value of correlation. If missing values still exist if this 74 | #' point in the hierarchy is reached, imputing is done via allele frequencies. 75 | #' @param mc.cores a number of cores for parallelising. Theoretical maximum is 76 | #' \code{'B'}. For details see \code{\link[parallel]{mclapply}}. 77 | #' 78 | #' @importFrom parallel mclapply 79 | #' @keywords internal 80 | imputeMedoids <- function(snp, snpClust, hier, min.absCor = .25, mc.cores = 1) { 81 | hierLeafs <- which(sapply(hier, function(x) is.null(attr(x, which = "subset")))) 82 | naSnps <- which(naFreq(snp, 2) > 0 & colnames(snp) %in% labels(hier)) 83 | medoSnps <- names(snpClust$clusters) %in% snpClust$medoids 84 | flipAlleles <- as.numeric(alleleFreq(snp, FALSE) <= .5) 85 | # run thru all SNPs 86 | snpList <- mclapply(1:ncol(snp), imputeSnp, 87 | snp, hier, hierLeafs, snpClust$clusters, medoSnps, 88 | naSnps, flipAlleles, min.absCor, 89 | mc.cores = mc.cores) 90 | snp@snpData <- do.call(cbind, snpList) 91 | snp 92 | } 93 | 94 | 95 | #' @title Impute missing information at a medoid SNPs from a group of neighbors 96 | #' 97 | #' @description Uses neighboring SNPs in the clustering hierarchy to impute as many as 98 | #' possible alleles to positions with missing values at medoid SNPs. 99 | #' 100 | #' @param inxSnpOfInt a vertor of the snp of interest. 101 | #' @param snp an object of class \linkS4class{snpMatrix}. 102 | #' @param hier an object of class hierarchy. 103 | #' @param hierLeafs a vector of leafs of the hierarchy. 104 | #' @param clust a named vector of clusters. 105 | #' @param medoSnps a vector of medo turue o false. 106 | #' @param naSnps a vector of NA indeces. 107 | #' @param flipAlleles a vertor of telling for each SNP if allele one has allele freq. > 0.5 108 | #' or not. 109 | #' @param min.absCor a minimum value of correlation. If missing values still exist if this 110 | #' point in the hierarchy is reached, imputing is done via allele frequencies. 111 | #' 112 | #' @keywords internal 113 | imputeSnp <- function(inxSnpOfInt, snp, hier, hierLeafs, clust, medoSnps, naSnps, 114 | flipAlleles, min.absCor) { 115 | snpOfInt <- snp@snpData[, inxSnpOfInt] 116 | if (medoSnps[inxSnpOfInt] && (inxSnpOfInt %in% naSnps)) { 117 | unsolved <- TRUE 118 | inxSnpsNotComp <- inxSnpOfInt 119 | # check in clusters of identicals 120 | inxSnpGrp <- which(clust == clust[inxSnpOfInt]) 121 | inxSnpsNotComp <- inxSnpGrp[!(inxSnpGrp %in% inxSnpsNotComp)] 122 | if (length(inxSnpsNotComp)) { 123 | temp <- imputeSnpIter(snp, snpOfInt, inxSnpsNotComp, 124 | flipAlleles[inxSnpOfInt], flipAlleles) 125 | snpOfInt <- temp[[1L]] 126 | unsolved <- temp[[2L]] 127 | } 128 | if (unsolved) { 129 | # run thru the heirarchy until NAs of the SNP are filled with information or the 130 | # height threshold is reached 131 | hierSnpOfInt <- hierLeafs[sapply(hier[hierLeafs], function(x) any(x == inxSnpOfInt))] 132 | super <- attr(hier[[hierSnpOfInt]], "superset") 133 | inxSnpGrp <- hier[[super]] 134 | h <- attr(inxSnpGrp, "height") 135 | while (unsolved && h <= (1 - min.absCor)) { 136 | inxSnpsNotComp <- c(inxSnpsNotComp, inxSnpsNotComp) 137 | inxSnpsNotComp <- inxSnpGrp[!(inxSnpGrp %in% inxSnpsNotComp)] 138 | if (length(inxSnpsNotComp)) { 139 | temp <- imputeSnpIter(snp, snpOfInt, inxSnpsNotComp, 140 | flipAlleles[inxSnpOfInt], flipAlleles) 141 | snpOfInt <- temp[[1L]] 142 | unsolved <- temp[[2L]] 143 | } 144 | super <- attr(hier[[super]], "superset") 145 | if (is.null(super)) 146 | break 147 | inxSnpGrp <- hier[[super]] 148 | h <- attr(inxSnpGrp, "height") 149 | } 150 | } 151 | # if height threshold is reached use alle frequency for random imputing 152 | if (unsolved) { 153 | js <- which(snpOfInt == as.raw(0L)) 154 | alleleNo <- table(as.integer(snpOfInt), exclude = 0L) 155 | alleles <- as.raw(names(alleleNo)) 156 | prob <- alleleNo / sum(alleleNo) 157 | snpOfInt[js] <- sample(alleles, length(js), TRUE, prob) 158 | } 159 | } 160 | snpOfInt 161 | } 162 | 163 | 164 | #' @title Impute missing information at a medoid SNPs from a group of neighbors 165 | #' 166 | #' @description Uses neighboring SNPs in the clustering hierarchy to impute as many as 167 | #' possible alleles to positions with missing values at medoid SNPs. 168 | #' 169 | #' @param snp an object of class \linkS4class{snpMatrix}. 170 | #' @param snpOfInt a vertor of the snp of interest. 171 | #' @param inxSnpsToComp a index of neighbors. 172 | #' @param snpOfIntFlip flip status of the snp of interest. 173 | #' @param flipAlleles a vertor of telling for each SNP if allele one has allele freq. > 0.5 174 | #' or not. 175 | #' @param min.absCor a minimum value of correlation. If missing values still exist if this 176 | #' point in the hierarchy is reached, imputing is done via allele frequencies. 177 | #' 178 | #' @keywords internal 179 | imputeSnpIter <- function(snp, snpOfInt, inxSnpsToComp, snpOfIntFlip, flipAlleles) { 180 | unsolved <- TRUE 181 | n <- length(inxSnpsToComp) 182 | i <- 1L 183 | while (unsolved && i <= n) { 184 | js <- which(snpOfInt == as.raw(0L)) 185 | jAllele <- snp@snpData[js, inxSnpsToComp[i]] 186 | if (snpOfIntFlip != flipAlleles[inxSnpsToComp[i]]) { 187 | j1 <- which(jAllele == as.raw(1L)) 188 | j3 <- which(jAllele == as.raw(3L)) 189 | jAllele[j1] <- as.raw(3L) 190 | jAllele[j3] <- as.raw(1L) 191 | } 192 | snpOfInt[js] <- jAllele 193 | unsolved <- any(jAllele == as.raw(0L)) 194 | i <- i + 1L 195 | } 196 | list(snpOfInt, unsolved) 197 | } 198 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 |
3 | 4 | 5 | 6 |
7 | 8 | -------------------------------------------------------------------------------- 9 | 10 | # Quantitative Trait Cluster Association Test 11 | This project has no active development and is unmaintained. 12 | 13 | 14 | ## Description: 15 | All SNPs are jointly associated to the phenotype and at the same time correlation among 16 | them is considered. Thus, correction for population structure becomes unnecessary, which in 17 | many cases results in a power advantages compared to other methods. 18 | 19 | **Klasen, J. R. et al. (2016)**. *A multi-marker association method for genome-wide 20 | association studies without the need for population structure correction*. Nature 21 | Communications. [Paper](http://www.nature.com/articles/ncomms13299) 22 | 23 | ## Installation: 24 | The package can be installed from an R console via [`devtools`](https://github.com/hadley/devtools#updating-to-the-latest-version-of-devtools) 25 | (If you haven't yet installed `devtools` please do so first). 26 | 27 | ```R 28 | # install.packages("devtools") 29 | devtools::install_github("QTCAT/qtcat") 30 | 31 | ``` 32 | 33 | ## Example: 34 | The `qtcatQtc`-function example gives an overview of the functionality of 35 | the package and can be accessed once the package is loaded. 36 | 37 | ```R 38 | library(qtcat) 39 | example(qtcatQtc, run.dontrun = TRUE) 40 | 41 | ``` 42 | The data used in the example can be found under [inst/extdata](https://github.com/QTCAT/qtcat/tree/master/inst/extdata) 43 | 44 | There is also a [Arabidopsis example](https://github.com/QTCAT/qtcat.data) available. 45 | 46 | -------------------------------------------------------------------------------- 47 | 48 | [![License](https://img.shields.io/badge/license-GPL%20%28%3E=%202%29-brightgreen.svg)](https://www.gnu.org/licenses/gpl-2.0.html) 49 | © 2016 JR Klasen 50 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | # DO NOT CHANGE the "init" and "install" sections below 2 | 3 | # Download script file from GitHub 4 | init: 5 | ps: | 6 | $ErrorActionPreference = "Stop" 7 | Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1" 8 | Import-Module '..\appveyor-tool.ps1' 9 | 10 | install: 11 | ps: Bootstrap 12 | 13 | # Adapt as necessary starting from here 14 | 15 | environment: 16 | global: 17 | WARNINGS_ARE_ERRORS: 1 18 | 19 | matrix: 20 | - R_VERSION: release 21 | R_ARCH: x64 22 | 23 | - R_VERSION: patched 24 | 25 | build_script: 26 | - travis-tool.sh install_deps 27 | - travis-tool.sh install_github QTCAT/hit 28 | 29 | test_script: 30 | - travis-tool.sh run_tests 31 | 32 | on_failure: 33 | - 7z a failure.zip *.Rcheck\* 34 | - appveyor PushArtifact failure.zip 35 | 36 | artifacts: 37 | - path: '*.Rcheck\**\*.log' 38 | name: Logs 39 | 40 | - path: '*.Rcheck\**\*.out' 41 | name: Logs 42 | 43 | - path: '*.Rcheck\**\*.fail' 44 | name: Logs 45 | 46 | - path: '*.Rcheck\**\*.Rout' 47 | name: Logs 48 | 49 | - path: '\*_*.tar.gz' 50 | name: Bits 51 | 52 | - path: '\*_*.zip' 53 | name: Bits 54 | -------------------------------------------------------------------------------- /inst/extdata/phenodata.csv: -------------------------------------------------------------------------------- 1 | name,phenotype,replication 2 | Indiv1,-0.712864882314292,I 3 | Indiv2,4.05700145305166,I 4 | Indiv3,2.68457752825909,I 5 | Indiv4,-0.978722003892421,I 6 | Indiv5,2.11833460559996,I 7 | Indiv6,4.69596379863478,I 8 | Indiv7,1.81492380500098,I 9 | Indiv8,8.33973545250998,I 10 | Indiv9,3.21727105571782,I 11 | Indiv10,1.75615681901381,I 12 | Indiv11,2.20340927227642,I 13 | Indiv12,3.29008788696637,I 14 | Indiv13,5.22242372780827,I 15 | Indiv14,3.9616977925181,I 16 | Indiv15,6.4184630775582,I 17 | Indiv16,2.52617864225462,I 18 | Indiv17,2.71675134747193,I 19 | Indiv18,9.08630499778771,I 20 | Indiv19,3.01776165508853,I 21 | Indiv20,2.12967794232666,I 22 | Indiv21,6.19881599227966,I 23 | Indiv22,2.60912725791431,I 24 | Indiv23,6.74137903134634,I 25 | Indiv24,5.43854561008488,I 26 | Indiv25,5.63126674897179,I 27 | Indiv26,5.3700172742071,I 28 | Indiv27,2.97767963958376,I 29 | Indiv28,5.02236976056541,I 30 | Indiv29,8.73017558942648,I 31 | Indiv30,7.67488336483054,I 32 | Indiv31,4.41931825555875,I 33 | Indiv32,5.76329762287536,I 34 | Indiv33,5.69297223264722,I 35 | Indiv34,4.43795994668944,I 36 | Indiv35,0.546417450296849,I 37 | Indiv36,5.14011399361257,I 38 | Indiv37,5.06931125177772,I 39 | Indiv38,2.6091396053827,I 40 | Indiv39,5.27906736993309,I 41 | Indiv40,7.59780299611099,I 42 | Indiv41,3.6971545640507,I 43 | Indiv42,4.30324604452561,I 44 | Indiv43,3.59785828183773,I 45 | Indiv44,5.09832009873271,I 46 | Indiv45,3.76780907857732,I 47 | Indiv46,0.844484552582176,I 48 | Indiv47,4.74916534660669,I 49 | Indiv48,5.67195293439532,I 50 | Indiv49,2.93964234735589,I 51 | Indiv50,4.90681722956387,I 52 | Indiv51,3.74911685226859,I 53 | Indiv52,1.92666742002455,I 54 | Indiv53,4.60404498058726,I 55 | Indiv54,3.53578869920731,I 56 | Indiv55,4.36984245698348,I 57 | Indiv56,3.26681032690425,I 58 | Indiv57,4.18302568636384,I 59 | Indiv58,3.70971187197379,I 60 | Indiv59,2.27999270087654,I 61 | Indiv60,3.05181399805921,I 62 | Indiv61,2.43549816911518,I 63 | Indiv62,2.34800409902198,I 64 | Indiv63,5.31821730546112,I 65 | Indiv64,-0.713122051020195,I 66 | Indiv65,4.50338394253097,I 67 | Indiv66,4.62922637029452,I 68 | Indiv67,2.24256710689108,I 69 | Indiv68,3.59909039731827,I 70 | Indiv69,6.20966957535184,I 71 | Indiv70,4.00384637556956,I 72 | Indiv71,2.6824335168794,I 73 | Indiv72,6.83166810179122,I 74 | Indiv73,2.49389386433214,I 75 | Indiv74,2.96109283271708,I 76 | Indiv75,5.90123212589518,I 77 | Indiv76,4.42447675875507,I 78 | Indiv77,4.96833384873987,I 79 | Indiv78,7.12738639539622,I 80 | Indiv79,4.42632757597075,I 81 | Indiv80,5.27826128836297,I 82 | Indiv81,4.2832569668242,I 83 | Indiv82,2.35271359835162,I 84 | Indiv83,6.75091008120157,I 85 | Indiv84,3.39601817943395,I 86 | Indiv85,0.792062124318047,I 87 | Indiv86,4.49112716920309,I 88 | Indiv87,5.48080731018814,I 89 | Indiv88,5.53831911353581,I 90 | Indiv89,3.8073404958687,I 91 | Indiv90,8.11792267567341,I 92 | Indiv91,5.24450049724668,I 93 | Indiv92,8.06636067197744,I 94 | Indiv93,3.76240790598956,I 95 | Indiv94,4.57106970441781,I 96 | Indiv95,0.930562501758845,I 97 | Indiv96,5.89207888898974,I 98 | Indiv97,4.76510770162231,I 99 | Indiv98,6.13315220689909,I 100 | Indiv99,5.15975639021241,I 101 | Indiv100,4.97858970802809,I 102 | Indiv1,0.176206906929316,II 103 | Indiv2,3.85129791007974,II 104 | Indiv3,0.89679511088741,II 105 | Indiv4,3.50501419315986,II 106 | Indiv5,2.74144800827057,II 107 | Indiv6,2.89870296149524,II 108 | Indiv7,2.83813356575242,II 109 | Indiv8,6.69393926686158,II 110 | Indiv9,0.445190879937969,II 111 | Indiv10,1.81268165056905,II 112 | Indiv11,4.18471268209546,II 113 | Indiv12,3.54037632849923,II 114 | Indiv13,7.54041530100953,II 115 | Indiv14,5.86490355924926,II 116 | Indiv15,0.541745439242708,II 117 | Indiv16,2.69319603761256,II 118 | Indiv17,4.212108002269,II 119 | Indiv18,5.48944504498334,II 120 | Indiv19,4.55532775386045,II 121 | Indiv20,4.42315823029467,II 122 | Indiv21,4.20476815196306,II 123 | Indiv22,7.19077440353425,II 124 | Indiv23,7.85927666811885,II 125 | Indiv24,4.7649811402712,II 126 | Indiv25,5.33833106226095,II 127 | Indiv26,9.51369408166689,II 128 | Indiv27,2.92203513544632,II 129 | Indiv28,6.32725491154529,II 130 | Indiv29,6.95849521528492,II 131 | Indiv30,4.49056269636047,II 132 | Indiv31,4.64224668507529,II 133 | Indiv32,6.49903099179706,II 134 | Indiv33,2.76900451424559,II 135 | Indiv34,5.10198462028238,II 136 | Indiv35,1.33235508902282,II 137 | Indiv36,3.99616006339745,II 138 | Indiv37,6.9482881756113,II 139 | Indiv38,2.21201425259469,II 140 | Indiv39,3.30168447239249,II 141 | Indiv40,5.70770160656621,II 142 | Indiv41,5.51655628789299,II 143 | Indiv42,3.22099848037914,II 144 | Indiv43,6.83821240540428,II 145 | Indiv44,1.05611152692071,II 146 | Indiv45,2.3793891845802,II 147 | Indiv46,3.486717253706,II 148 | Indiv47,4.45135169448376,II 149 | Indiv48,1.70633725047442,II 150 | Indiv49,7.78933321655033,II 151 | Indiv50,5.93711762234058,II 152 | Indiv51,2.63282544384585,II 153 | Indiv52,5.12336178663091,II 154 | Indiv53,3.13670189353915,II 155 | Indiv54,0.575601823259657,II 156 | Indiv55,5.3577809898076,II 157 | Indiv56,3.64994372110131,II 158 | Indiv57,1.95200830822741,II 159 | Indiv58,2.20290632954696,II 160 | Indiv59,3.98471425274528,II 161 | Indiv60,1.46737912491768,II 162 | Indiv61,2.92074054315639,II 163 | Indiv62,3.92929791664308,II 164 | Indiv63,2.19530471281698,II 165 | Indiv64,1.13804915005779,II 166 | Indiv65,4.15958191761742,II 167 | Indiv66,3.99613481122336,II 168 | Indiv67,1.52917842389308,II 169 | Indiv68,3.82975810221317,II 170 | Indiv69,2.06278495341624,II 171 | Indiv70,5.73101019063431,II 172 | Indiv71,5.43947438490607,II 173 | Indiv72,3.03603922924573,II 174 | Indiv73,2.288226558559,II 175 | Indiv74,2.09090076017277,II 176 | Indiv75,5.00102930990027,II 177 | Indiv76,5.97477767911573,II 178 | Indiv77,5.70601446968208,II 179 | Indiv78,5.02323274457706,II 180 | Indiv79,5.44894123457085,II 181 | Indiv80,5.43591657545378,II 182 | Indiv81,2.51784481532873,II 183 | Indiv82,3.10671706906678,II 184 | Indiv83,8.72487282279824,II 185 | Indiv84,1.27498342463781,II 186 | Indiv85,3.7876125919506,II 187 | Indiv86,5.12474566303836,II 188 | Indiv87,5.2718274186796,II 189 | Indiv88,7.85916592955344,II 190 | Indiv89,4.91608517979929,II 191 | Indiv90,5.01900455357883,II 192 | Indiv91,4.07100812407142,II 193 | Indiv92,6.6107211538395,II 194 | Indiv93,3.51727716642754,II 195 | Indiv94,4.1753329858646,II 196 | Indiv95,3.0353098664466,II 197 | Indiv96,3.32270339015034,II 198 | Indiv97,6.26606071672542,II 199 | Indiv98,7.52163666481343,II 200 | Indiv99,3.20389084994409,II 201 | Indiv100,5.63788046229403,II 202 | Indiv1,2.02029439261033,III 203 | Indiv2,1.3310022466908,III 204 | Indiv3,1.88109268308407,III 205 | Indiv4,2.0747606653117,III 206 | Indiv5,0.71136509236296,III 207 | Indiv6,5.69396750256412,III 208 | Indiv7,3.88933752587519,III 209 | Indiv8,5.40513402423043,III 210 | Indiv9,2.9082007446537,III 211 | Indiv10,5.01449395435981,III 212 | Indiv11,0.404922306483191,III 213 | Indiv12,5.16241985621497,III 214 | Indiv13,6.30167091059181,III 215 | Indiv14,3.02761398080251,III 216 | Indiv15,4.78362934229522,III 217 | Indiv16,2.62219124504208,III 218 | Indiv17,2.20120211280138,III 219 | Indiv18,7.81735976730744,III 220 | Indiv19,5.47309512628714,III 221 | Indiv20,0.326478522987693,III 222 | Indiv21,4.17417013688955,III 223 | Indiv22,4.6634139839633,III 224 | Indiv23,5.11293773267319,III 225 | Indiv24,3.83476050228082,III 226 | Indiv25,6.10600197974025,III 227 | Indiv26,4.88123830253315,III 228 | Indiv27,5.93398647240647,III 229 | Indiv28,6.79058601815249,III 230 | Indiv29,4.08974418429866,III 231 | Indiv30,6.36966316478642,III 232 | Indiv31,6.77187881311789,III 233 | Indiv32,4.2356086186249,III 234 | Indiv33,1.89829225573216,III 235 | Indiv34,8.16035874246949,III 236 | Indiv35,-0.56002492101901,III 237 | Indiv36,4.5986854641122,III 238 | Indiv37,6.90187522768069,III 239 | Indiv38,0.209604370609421,III 240 | Indiv39,4.77510809790155,III 241 | Indiv40,8.50368809488841,III 242 | Indiv41,3.08657316770405,III 243 | Indiv42,3.52485436085774,III 244 | Indiv43,5.98808046655028,III 245 | Indiv44,1.16351360622373,III 246 | Indiv45,7.69353448228304,III 247 | Indiv46,4.93097609274219,III 248 | Indiv47,3.15220320609129,III 249 | Indiv48,2.68712804101995,III 250 | Indiv49,7.71421329580518,III 251 | Indiv50,2.87368671700662,III 252 | Indiv51,3.94179831275021,III 253 | Indiv52,5.10124526695668,III 254 | Indiv53,2.92799161101486,III 255 | Indiv54,0.96951652051614,III 256 | Indiv55,6.14384764255529,III 257 | Indiv56,2.73228444251163,III 258 | Indiv57,4.41322465728952,III 259 | Indiv58,6.15032268108312,III 260 | Indiv59,0.877839915335282,III 261 | Indiv60,2.92317102138906,III 262 | Indiv61,1.94411888999608,III 263 | Indiv62,0.188646943059274,III 264 | Indiv63,3.99239953854629,III 265 | Indiv64,4.5826438337587,III 266 | Indiv65,2.52512157516696,III 267 | Indiv66,7.04756283086153,III 268 | Indiv67,3.35134198261861,III 269 | Indiv68,3.26565145307141,III 270 | Indiv69,4.13587004514256,III 271 | Indiv70,6.50598959703055,III 272 | Indiv71,1.21868886446736,III 273 | Indiv72,3.63896545676015,III 274 | Indiv73,4.93741312391645,III 275 | Indiv74,-1.87541241257004,III 276 | Indiv75,3.31873034768445,III 277 | Indiv76,4.52245145391965,III 278 | Indiv77,5.11374257847258,III 279 | Indiv78,5.48074339885391,III 280 | Indiv79,6.41669576635036,III 281 | Indiv80,2.89227943325531,III 282 | Indiv81,4.59381627240809,III 283 | Indiv82,5.33877112921947,III 284 | Indiv83,5.36755681458729,III 285 | Indiv84,2.69754684643688,III 286 | Indiv85,1.13013661082002,III 287 | Indiv86,5.16369038077365,III 288 | Indiv87,3.88016289688369,III 289 | Indiv88,7.60604371237198,III 290 | Indiv89,1.74340317670462,III 291 | Indiv90,7.43364608481622,III 292 | Indiv91,5.57022305368588,III 293 | Indiv92,4.62469210352858,III 294 | Indiv93,4.84210640577904,III 295 | Indiv94,3.54461123156264,III 296 | Indiv95,-0.0820107650207224,III 297 | Indiv96,5.4477657365215,III 298 | Indiv97,7.78709432577761,III 299 | Indiv98,5.84336889734014,III 300 | Indiv99,4.62500167547687,III 301 | Indiv100,4.9071448387676,III 302 | -------------------------------------------------------------------------------- /man/alleleFreq-snpMatrix-method.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/snpData.R 3 | \docType{methods} 4 | \name{alleleFreq,snpMatrix-method} 5 | \alias{alleleFreq,snpMatrix-method} 6 | \title{Allele frequency} 7 | \usage{ 8 | \S4method{alleleFreq}{snpMatrix}(x, maf = TRUE) 9 | } 10 | \arguments{ 11 | \item{x}{an object of class \linkS4class{snpMatrix}.} 12 | 13 | \item{maf}{logical, if true minor allele frequency (default), other ways allele 14 | frequency of 'allele.1'.} 15 | } 16 | \description{ 17 | Allele frequency an object of class \linkS4class{snpMatrix}. 18 | } 19 | \examples{ 20 | # file containing example data for SNP data 21 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 22 | snp <- read.snpData(gfile, sep = ",") 23 | af <- alleleFreq(snp) 24 | 25 | } 26 | -------------------------------------------------------------------------------- /man/alleleFreq.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AllGeneric.R 3 | \name{alleleFreq} 4 | \alias{alleleFreq} 5 | \title{Allele Frequency.} 6 | \usage{ 7 | alleleFreq(x, maf = TRUE) 8 | } 9 | \arguments{ 10 | \item{x}{an object, for which a corresponding method exists.} 11 | 12 | \item{maf}{logical, if true minor allele frequency (default), other ways allele 13 | frequency of 'allele.1'.} 14 | } 15 | \description{ 16 | Frequency of alleles in data set. 17 | } 18 | -------------------------------------------------------------------------------- /man/as.matrix-snpMatrix-method.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/snpData.R 3 | \docType{methods} 4 | \name{as.matrix,snpMatrix-method} 5 | \alias{as.matrix,snpMatrix-method} 6 | \title{As matrix method for snpMatrix} 7 | \usage{ 8 | \S4method{as.matrix}{snpMatrix}(x) 9 | } 10 | \arguments{ 11 | \item{x}{an object of class \linkS4class{snpMatrix}.} 12 | } 13 | \description{ 14 | As matrix method for an object of class \linkS4class{snpMatrix}. 15 | } 16 | \examples{ 17 | # file containing example data for SNP data 18 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 19 | snp <- read.snpData(gfile, sep = ",") 20 | snpmat <- as.matrix(snp) 21 | 22 | } 23 | -------------------------------------------------------------------------------- /man/as.snpMatrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/snpData.R 3 | \name{as.snpMatrix} 4 | \alias{as.snpMatrix} 5 | \title{A snpMatrix constructor} 6 | \usage{ 7 | as.snpMatrix(x, chr, pos, alleleCoding = c(-1, 0, 1), allele.1 = NULL, 8 | allele.2 = NULL) 9 | } 10 | \arguments{ 11 | \item{x}{a matrix with individuals in rows and SNPs in columns.} 12 | 13 | \item{chr}{a vector with chromosoms at which SNPs are located.} 14 | 15 | \item{pos}{a vector with genomic positions at which SNPs are located.} 16 | 17 | \item{alleleCoding}{a coding scheme of \code{x} for hom (AA), het (AB), and hom (BB).} 18 | 19 | \item{allele.1}{labels of allele one, for each SNP.} 20 | 21 | \item{allele.2}{labels of allele two, for each SNP.} 22 | } 23 | \description{ 24 | Constructs a \linkS4class{snpMatrix} object from the given data. 25 | } 26 | -------------------------------------------------------------------------------- /man/clarans.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/snpCluster.R 3 | \name{clarans} 4 | \alias{clarans} 5 | \title{K-medoids clustering of SNPs using randomized search} 6 | \usage{ 7 | clarans(snp, k, maxNeigbours = 100, nLocal = 10, mc.cores = 1) 8 | } 9 | \arguments{ 10 | \item{snp}{an object of class \linkS4class{snpMatrix}.} 11 | 12 | \item{k}{a positive integer specifying the number of clusters, has to be greater than 13 | one and less than the number of SNPs.} 14 | 15 | \item{maxNeigbours}{a positive integer specifying the maximum number of randomized 16 | searches.} 17 | 18 | \item{nLocal}{a positive integer specifying the number of optimisation runs.} 19 | 20 | \item{mc.cores}{a positive integer for the number of cores for parallel computing. See 21 | \code{\link[parallel]{mclapply}} for details.} 22 | } 23 | \description{ 24 | Partitioning (clustering) into k clusters "around medoids" by randomized 25 | search. \code{1-abs(cor)} is used as distance between SNPs. 26 | } 27 | \details{ 28 | The K-medoids clustering is implemented as clustering large applications based 29 | upon randomized search (CLARANS) algorithm (Ng and Han 2002). CLARANS is a modification 30 | of the partitioning around medoids (PAM) algorithm \code{\link[cluster]{pam}}. Where the 31 | PAM algorithm is estimating all distances between SNPs and the respective medoids, 32 | CLARANS is searching a random subset of the SNPs. This is independently repeated several 33 | times and the result which minimises the average distance the most is reported. This 34 | produces results close to those of the PAM algorithm (Ng and Han 2002), though the 35 | number of runs and the subset size have to be arbitrarily chosen by the user. The 36 | algorithm has two advantages: (i) the number of distance comparisons is dramatically 37 | reduced; and (ii) parallelizing is straightforward. 38 | } 39 | \examples{ 40 | # file containing example data for SNP data 41 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 42 | snp <- read.snpData(gfile, sep = ",") 43 | 44 | clust <- clarans(snp, 3) 45 | 46 | } 47 | \references{ 48 | Ng and J. Han (2002). CLARANS: A method for clustering objects for spatial 49 | data mining. \emph{IEEE Transactions on Knowledge and Data Engineering}. 50 | \url{http://dx.doi.org/10.1109/TKDE.2002.1033770}). 51 | } 52 | -------------------------------------------------------------------------------- /man/cutClust.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/snpCluster.R 3 | \name{cutClust} 4 | \alias{cutClust} 5 | \title{Cut a qtcatClust object} 6 | \usage{ 7 | cutClust(snp, snpClust, absCor = 1) 8 | } 9 | \arguments{ 10 | \item{snp}{an object of class \linkS4class{snpMatrix}.} 11 | 12 | \item{snpClust}{an object of class \code{\link{qtcatClust}}.} 13 | 14 | \item{absCor}{a cutting height in absolute value of correlation.} 15 | } 16 | \description{ 17 | Cut a qtcatClust object at an specific height. 18 | } 19 | \examples{ 20 | # file containing example data for SNP data 21 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 22 | snp <- read.snpData(gfile, sep = ",") 23 | clust <- qtcatClust(snp) 24 | 25 | cclust <- cutClust(snp, clust, .5) 26 | 27 | } 28 | -------------------------------------------------------------------------------- /man/distCor.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/snpCluster.R 3 | \name{distCor} 4 | \alias{distCor} 5 | \title{Correlation based distance between SNPs} 6 | \usage{ 7 | distCor(snp) 8 | } 9 | \arguments{ 10 | \item{snp}{an object of class \linkS4class{snpMatrix}.} 11 | } 12 | \description{ 13 | This function computes a distance matrix. The distance is estimated 14 | as one minus the absolute value of the correlation coefficient \code{1 - abs(cor)}. 15 | } 16 | \details{ 17 | See \code{\link[stats]{dist}} for details about the output object. 18 | } 19 | \examples{ 20 | # file containing example data for SNP data 21 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 22 | snp <- read.snpData(gfile, sep = ",") 23 | 24 | dist <- distCor(snp[, 1:10]) 25 | 26 | } 27 | \seealso{ 28 | \code{\link[stats]{dist}} 29 | } 30 | -------------------------------------------------------------------------------- /man/hetFreq-snpMatrix-method.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/snpData.R 3 | \docType{methods} 4 | \name{hetFreq,snpMatrix-method} 5 | \alias{hetFreq,snpMatrix-method} 6 | \title{Heterozygosity} 7 | \usage{ 8 | \S4method{hetFreq}{snpMatrix}(x, dim = 1) 9 | } 10 | \arguments{ 11 | \item{x}{an object of class \linkS4class{snpMatrix}.} 12 | 13 | \item{dim}{a integer for dimension. 1 (default) is for rows (individuals), 2 is for 14 | columns (SNPs).} 15 | } 16 | \description{ 17 | Heterozygosity an object of class \linkS4class{snpMatrix}. 18 | } 19 | \examples{ 20 | # file containing example data for SNP data 21 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 22 | snp <- read.snpData(gfile, sep = ",") 23 | hf1 <- hetFreq(snp, 1) 24 | hf2 <- hetFreq(snp, 2) 25 | 26 | } 27 | -------------------------------------------------------------------------------- /man/hetFreq.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AllGeneric.R 3 | \name{hetFreq} 4 | \alias{hetFreq} 5 | \title{Heterozygosity Frequency.} 6 | \usage{ 7 | hetFreq(x, dim = 1L) 8 | } 9 | \arguments{ 10 | \item{x}{an object, for which a corresponding method exists.} 11 | 12 | \item{dim}{dimension over which heterozygosity is estimated. 1 (default) is for rows 13 | (individuals), 2 is for columns (SNPs).} 14 | } 15 | \description{ 16 | Frequency of heterozygosity in data set. 17 | } 18 | -------------------------------------------------------------------------------- /man/identicals.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/snpCluster.R 3 | \name{identicals} 4 | \alias{identicals} 5 | \title{Perfect simiarity clusters of SNP} 6 | \usage{ 7 | identicals(snp, mc.cores = 1) 8 | } 9 | \arguments{ 10 | \item{snp}{an object of class \linkS4class{snpMatrix}.} 11 | 12 | \item{mc.cores}{a positive integer for the number of cores for parallel computing. See 13 | \code{\link[parallel]{mclapply}} for details.} 14 | } 15 | \description{ 16 | Finds perfect similarity cluster of SNPs. This is specially usfull in 17 | artificial crossing populations. 18 | } 19 | \examples{ 20 | # file containing example data for SNP data 21 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 22 | snp <- read.snpData(gfile, sep = ",") 23 | 24 | ident <- identicals(snp) 25 | 26 | } 27 | -------------------------------------------------------------------------------- /man/imputeMedoids.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/snpImpute.R 3 | \name{imputeMedoids} 4 | \alias{imputeMedoids} 5 | \title{Impute missing information at medoid SNPs} 6 | \usage{ 7 | imputeMedoids(snp, snpClust, hier, min.absCor = 0.25, mc.cores = 1) 8 | } 9 | \arguments{ 10 | \item{snp}{an object of class \linkS4class{snpMatrix}.} 11 | 12 | \item{snpClust}{an object of class \code{\link{qtcatClust}}.} 13 | 14 | \item{hier}{an object of class hierarchy.} 15 | 16 | \item{min.absCor}{a minimum value of correlation. If missing values still exist if this 17 | point in the hierarchy is reached, imputing is done via allele frequencies.} 18 | 19 | \item{mc.cores}{a number of cores for parallelising. Theoretical maximum is 20 | \code{'B'}. For details see \code{\link[parallel]{mclapply}}.} 21 | } 22 | \description{ 23 | Uses neighboring SNPs in the clustering hierarchy to impute alleles to 24 | positions with missing values at medoid SNPs. 25 | } 26 | \keyword{internal} 27 | -------------------------------------------------------------------------------- /man/imputeSnp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/snpImpute.R 3 | \name{imputeSnp} 4 | \alias{imputeSnp} 5 | \title{Impute missing information at a medoid SNPs from a group of neighbors} 6 | \usage{ 7 | imputeSnp(inxSnpOfInt, snp, hier, hierLeafs, clust, medoSnps, naSnps, 8 | flipAlleles, min.absCor) 9 | } 10 | \arguments{ 11 | \item{inxSnpOfInt}{a vertor of the snp of interest.} 12 | 13 | \item{snp}{an object of class \linkS4class{snpMatrix}.} 14 | 15 | \item{hier}{an object of class hierarchy.} 16 | 17 | \item{hierLeafs}{a vector of leafs of the hierarchy.} 18 | 19 | \item{clust}{a named vector of clusters.} 20 | 21 | \item{medoSnps}{a vector of medo turue o false.} 22 | 23 | \item{naSnps}{a vector of NA indeces.} 24 | 25 | \item{flipAlleles}{a vertor of telling for each SNP if allele one has allele freq. > 0.5 26 | or not.} 27 | 28 | \item{min.absCor}{a minimum value of correlation. If missing values still exist if this 29 | point in the hierarchy is reached, imputing is done via allele frequencies.} 30 | } 31 | \description{ 32 | Uses neighboring SNPs in the clustering hierarchy to impute as many as 33 | possible alleles to positions with missing values at medoid SNPs. 34 | } 35 | \keyword{internal} 36 | -------------------------------------------------------------------------------- /man/imputeSnpIter.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/snpImpute.R 3 | \name{imputeSnpIter} 4 | \alias{imputeSnpIter} 5 | \title{Impute missing information at a medoid SNPs from a group of neighbors} 6 | \usage{ 7 | imputeSnpIter(snp, snpOfInt, inxSnpsToComp, snpOfIntFlip, flipAlleles) 8 | } 9 | \arguments{ 10 | \item{snp}{an object of class \linkS4class{snpMatrix}.} 11 | 12 | \item{snpOfInt}{a vertor of the snp of interest.} 13 | 14 | \item{inxSnpsToComp}{a index of neighbors.} 15 | 16 | \item{snpOfIntFlip}{flip status of the snp of interest.} 17 | 18 | \item{flipAlleles}{a vertor of telling for each SNP if allele one has allele freq. > 0.5 19 | or not.} 20 | 21 | \item{min.absCor}{a minimum value of correlation. If missing values still exist if this 22 | point in the hierarchy is reached, imputing is done via allele frequencies.} 23 | } 24 | \description{ 25 | Uses neighboring SNPs in the clustering hierarchy to impute as many as 26 | possible alleles to positions with missing values at medoid SNPs. 27 | } 28 | \keyword{internal} 29 | -------------------------------------------------------------------------------- /man/imputeSnpMatrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/snpImpute.R 3 | \name{imputeSnpMatrix} 4 | \alias{imputeSnpMatrix} 5 | \title{Impute allele in formation to SNPs with missing data} 6 | \usage{ 7 | imputeSnpMatrix(snp, snpClust, min.absCor = 0.1, mc.cores = 1) 8 | } 9 | \arguments{ 10 | \item{snp}{an object of class \linkS4class{snpMatrix}.} 11 | 12 | \item{snpClust}{an object of class \code{\link{qtcatClust}}.} 13 | 14 | \item{min.absCor}{a minimum value of correlation. If missing values still exist if this 15 | point in the hierarchy is reached, imputing is done via allele frequencies.} 16 | 17 | \item{mc.cores}{a number of cores for parallelising. Theoretical maximum is 18 | \code{'B'}. For details see \code{\link[parallel]{mclapply}}.} 19 | } 20 | \description{ 21 | Uses neighbor SNPs from the clustering hierarchy to impute alleles to 22 | positions with missing values. 23 | } 24 | \examples{ 25 | # file containing example data for SNP data 26 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 27 | snp1 <- read.snpData(gfile, sep = ",") 28 | # delete SNP information from Matrix, 33.33\% NAs (-> 66.67\% SNP info) 29 | snp2 <- snp1 30 | nainx <- sample(1:length(snp2@snpData), length(snp2@snpData) / 3) 31 | snp2@snpData[nainx] <- as.raw(0) 32 | # clustering 33 | snp2clust <- qtcatClust(snp2) 34 | 35 | # imputing 36 | snp3 <- imputeSnpMatrix(snp2, snp2clust) 37 | # comparison of the full and the imputed data set 38 | snpmat1 <- as.matrix(snp1) 39 | snpmat3 <- as.matrix(snp3) 40 | (1 - sum(abs(snpmat1- snpmat3)) / length(snpmat1)) * 100 41 | 42 | } 43 | -------------------------------------------------------------------------------- /man/lmQtc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/associationTest.R 3 | \name{lmQtc} 4 | \alias{lmQtc} 5 | \title{Fitting a Linear Model to QTCs} 6 | \usage{ 7 | lmQtc(object, pheno, geno, alpha = 0.05, min.absCor = 0.05) 8 | } 9 | \arguments{ 10 | \item{object}{an object of class \code{\link{qtcatHit}}.} 11 | 12 | \item{pheno}{an object of class \code{\link{qtcatPheno}}.} 13 | 14 | \item{geno}{an object of class \code{\link{qtcatGeno}}.} 15 | 16 | \item{alpha}{an alpha level for significance estimation.} 17 | 18 | \item{min.absCor}{minimum absolute value of correlation considered.} 19 | } 20 | \description{ 21 | Linear model between phenotype and medoids of QTCs (significant SNP 22 | clusters). 23 | } 24 | \examples{ 25 | # If you want to run the examples, use: 26 | # example(lmQtc, run.dontrun = TRUE) 27 | \dontrun{ 28 | # files containing example data for SNP data and the phenotype 29 | pfile <- system.file("extdata/phenodata.csv", package = "qtcat") 30 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 31 | pdat <- read.csv(pfile, header = TRUE) 32 | snp <- read.snpData(gfile, sep = ",") 33 | clust <- qtcatClust(snp) 34 | geno <- qtcatGeno(snp, clust) 35 | pheno <- qtcatPheno(names = pdat[, 1], 36 | pheno = pdat[, 2], 37 | covariates = model.matrix(~ pdat[, 3])) 38 | fitted <- qtcatHit(pheno, geno) 39 | 40 | # fitting a LM to the phenotype and QTC medoids 41 | lmfitted <- lmQtc(fitted, pheno, geno) 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /man/medoQtc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/associationTest.R 3 | \name{medoQtc} 4 | \alias{medoQtc} 5 | \title{Find medoids of QTCs} 6 | \usage{ 7 | medoQtc(object, geno, alpha = 0.05, min.absCor = 0.05) 8 | } 9 | \arguments{ 10 | \item{object}{an object of class \code{\link{qtcatHit}}.} 11 | 12 | \item{geno}{an object of class \code{\link{qtcatGeno}}.} 13 | 14 | \item{alpha}{an alpha level for significance estimation.} 15 | 16 | \item{min.absCor}{minimum absolute value of correlation considered.} 17 | } 18 | \description{ 19 | Find a medoid of for each quantitative trait cluster (QTC). 20 | } 21 | \examples{ 22 | # If you want to run the examples, use: 23 | # example(medoQtc, run.dontrun = TRUE) 24 | \dontrun{ 25 | # files containing example data for SNP data and the phenotype 26 | pfile <- system.file("extdata/phenodata.csv", package = "qtcat") 27 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 28 | pdat <- read.csv(pfile, header = TRUE) 29 | snp <- read.snpData(gfile, sep = ",") 30 | clust <- qtcatClust(snp) 31 | geno <- qtcatGeno(snp, clust) 32 | pheno <- qtcatPheno(names = pdat[, 1], 33 | pheno = pdat[, 2], 34 | covariates = model.matrix(~ pdat[, 3])) 35 | fitted <- qtcatHit(pheno, geno) 36 | 37 | # QTC medoids 38 | medo <- medoQtc(fitted, geno) 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /man/naFreq-snpMatrix-method.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/snpData.R 3 | \docType{methods} 4 | \name{naFreq,snpMatrix-method} 5 | \alias{naFreq,snpMatrix-method} 6 | \title{NA frequency} 7 | \usage{ 8 | \S4method{naFreq}{snpMatrix}(x, dim = 1) 9 | } 10 | \arguments{ 11 | \item{x}{an object of class \linkS4class{snpMatrix}.} 12 | 13 | \item{dim}{a integer for dimension. 1 (default) is for rows (individuals), 2 is for 14 | columns (SNPs).} 15 | } 16 | \description{ 17 | NA frequency in an object of class \linkS4class{snpMatrix}. 18 | } 19 | \examples{ 20 | # file containing example data for SNP data 21 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 22 | snp <- read.snpData(gfile, sep = ",") 23 | na1 <- naFreq(snp, 1) 24 | na2 <- naFreq(snp, 2) 25 | 26 | } 27 | -------------------------------------------------------------------------------- /man/naFreq.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AllGeneric.R 3 | \name{naFreq} 4 | \alias{naFreq} 5 | \title{Missing Data Frequency.} 6 | \usage{ 7 | naFreq(x, dim = 1L) 8 | } 9 | \arguments{ 10 | \item{x}{an object, for which a corresponding method exists.} 11 | 12 | \item{dim}{dimension over which heterozygosity is estimated. 1 (default) is for rows 13 | (individuals), 2 is for columns (SNPs).} 14 | } 15 | \description{ 16 | Frequency of missing Data in data set. 17 | } 18 | -------------------------------------------------------------------------------- /man/plotQtc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/associationTest.R 3 | \name{plotQtc} 4 | \alias{plotQtc} 5 | \title{Plot resulting QTCs of the Hierarchical Inference Test} 6 | \usage{ 7 | plotQtc(x, alpha = 0.05, xlab = "Chromosomes", 8 | ylab = expression(-log[10](italic(p))), col.axis = NULL, ...) 9 | } 10 | \arguments{ 11 | \item{x}{an object of class \code{\link{qtcatHit}}.} 12 | 13 | \item{alpha}{an alpha level for significance estimation.} 14 | 15 | \item{xlab}{a title for the x axis.} 16 | 17 | \item{ylab}{a title for the y axis.} 18 | 19 | \item{col.axis}{colors for axis line, tick marks, and title respectively.} 20 | 21 | \item{...}{other graphical parameters may also be passed as arguments to this function.} 22 | } 23 | \description{ 24 | Plot the QTCs (significant cluster of SNPs) at their 25 | position at the genome. 26 | } 27 | \examples{ 28 | # If you want to run the examples, use: 29 | # example(plotQtc, run.dontrun = TRUE) 30 | \dontrun{ 31 | # files containing example data for SNP data and the phenotype 32 | pfile <- system.file("extdata/phenodata.csv", package = "qtcat") 33 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 34 | pdat <- read.csv(pfile, header = TRUE) 35 | snp <- read.snpData(gfile, sep = ",") 36 | clust <- qtcatClust(snp) 37 | geno <- qtcatGeno(snp, clust) 38 | pheno <- qtcatPheno(names = pdat[, 1], 39 | pheno = pdat[, 2], 40 | covariates = model.matrix(~ pdat[, 3])) 41 | fitted <- qtcatHit(pheno, geno) 42 | 43 | # Plot the QTCs (loci37, loci260, and loci367 are causal) 44 | plotQtc(fitted) 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /man/plotSelFreq.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/associationTest.R 3 | \name{plotSelFreq} 4 | \alias{plotSelFreq} 5 | \title{Plot markers selection frequencies of the Hierarchical Inference Test} 6 | \usage{ 7 | plotSelFreq(x, xlab = "Chromosomes", ylab = "Sel. freq.", col.axis = NULL, 8 | ...) 9 | } 10 | \arguments{ 11 | \item{x}{an object of class \code{\link{qtcatHit}}.} 12 | 13 | \item{xlab}{a title for the x axis.} 14 | 15 | \item{ylab}{a title for the y axis.} 16 | 17 | \item{col.axis}{colors for axis line, tick marks, and title respectively.} 18 | 19 | \item{...}{other graphical parameters may also be passed as arguments to this function.} 20 | } 21 | \description{ 22 | Plot markers selection frequencies at their 23 | position at the genome. 24 | } 25 | \examples{ 26 | # If you want to run the examples, use: 27 | # example(plotSelFreq, run.dontrun = TRUE) 28 | \dontrun{ 29 | # files containing example data for SNP data and the phenotype 30 | pfile <- system.file("extdata/phenodata.csv", package = "qtcat") 31 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 32 | pdat <- read.csv(pfile, header = TRUE) 33 | snp <- read.snpData(gfile, sep = ",") 34 | clust <- qtcatClust(snp) 35 | geno <- qtcatGeno(snp, clust) 36 | pheno <- qtcatPheno(names = pdat[, 1], 37 | pheno = pdat[, 2], 38 | covariates = model.matrix(~ pdat[, 3])) 39 | fitted <- qtcatHit(pheno, geno) 40 | 41 | # Plot the selection frequncy of markers (loci37, loci260, and loci367 are causal) 42 | plotSelFreq(fitted) 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /man/qtcat-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/qtcatPackage.R 3 | \docType{package} 4 | \name{qtcat-package} 5 | \alias{qtcat-package} 6 | \alias{qtcat} 7 | \title{Quantitative Trait Cluster Association Test} 8 | \description{ 9 | All SNPs are jointly associated to the phenotype and at the same time 10 | correlation among them is considered. Thus, correction for population structure becomes 11 | unnecessary, which in many cases results in a power advantages compared to other methods. 12 | } 13 | \author{ 14 | Jonas Klasen 15 | } 16 | -------------------------------------------------------------------------------- /man/qtcatClust.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/snpCluster.R 3 | \name{qtcatClust} 4 | \alias{qtcatClust} 5 | \title{Hierarchical clustering for big SNP data sets.} 6 | \usage{ 7 | qtcatClust(snp, k, identicals = TRUE, maxNeigbours = 100, nLocal = 10, 8 | method = "complete", mc.cores = 1, trace = FALSE, ...) 9 | } 10 | \arguments{ 11 | \item{snp}{an object of class \linkS4class{snpMatrix}.} 12 | 13 | \item{k}{a positive integer specifying the number of clusters, less than the number of 14 | observations.} 15 | 16 | \item{identicals}{logical, if zero clustering.} 17 | 18 | \item{maxNeigbours}{a positive integer, specifying the maximum number of randomized 19 | searches.} 20 | 21 | \item{nLocal}{a positive integer, specifying the number of optimisation runs. Columns 22 | have to be similar to \code{snp}.} 23 | 24 | \item{method}{see hclust.} 25 | 26 | \item{mc.cores}{a number of cores for parallel computing. See \code{mclapply} in package 27 | parallel for details.} 28 | 29 | \item{trace}{logical, if \code{TRUE} it prints current status of the program.} 30 | 31 | \item{...}{additional argruments for \code{\link[fastcluster]{hclust}}} 32 | } 33 | \description{ 34 | A three step approximated hierarchical clustering of SNPs suitable to 35 | large data sets. 36 | } 37 | \examples{ 38 | # file containing example data for SNP data 39 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 40 | snp <- read.snpData(gfile, sep = ",") 41 | 42 | clust <- qtcatClust(snp) 43 | 44 | } 45 | \seealso{ 46 | clarans 47 | } 48 | -------------------------------------------------------------------------------- /man/qtcatGeno.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/associationTest.R 3 | \name{qtcatGeno} 4 | \alias{qtcatGeno} 5 | \title{A genotype object constructor} 6 | \usage{ 7 | qtcatGeno(snp, snpClust, absCor, min.absCor = 0.5, mc.cores = 1) 8 | } 9 | \arguments{ 10 | \item{snp}{an object of S4 class \linkS4class{snpMatrix}.} 11 | 12 | \item{snpClust}{an object of class \code{\link{qtcatClust}}.} 13 | 14 | \item{absCor}{a vector of absolute value of correlations considered in the hierarchy.} 15 | 16 | \item{min.absCor}{a minimum absolute value of correlation which is considered. A value 17 | in the range from 0 to 1.} 18 | 19 | \item{mc.cores}{a number of cores for parallelising. The maximum is \code{'B'}. For 20 | details see \code{\link[parallel]{mclapply}}.} 21 | } 22 | \description{ 23 | Constructs a S3-object containing a SNP design matrix and a hierarchy. If 24 | a SNP in the input object contains missing data, the clustering is used to impute 25 | information from highly correlated neighbor SNPs. The genotype object is needed for 26 | \code{\link{qtcatHit}} as input. 27 | } 28 | \examples{ 29 | # file containing example data for SNP data 30 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 31 | snp <- read.snpData(gfile, sep = ",") 32 | clust <- qtcatClust(snp) 33 | 34 | # Construct geotype object 35 | geno <- qtcatGeno(snp, clust) 36 | 37 | } 38 | -------------------------------------------------------------------------------- /man/qtcatHit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/associationTest.R 3 | \name{qtcatHit} 4 | \alias{qtcatHit} 5 | \title{Fitting Hierarchical Inference Testing} 6 | \usage{ 7 | qtcatHit(pheno, geno, B = 50, p.samp1 = 0.35, nfolds = 5, 8 | overall.lambda = FALSE, lambda.opt = "lambda.1se", alpha = 1, 9 | gamma = seq(0.05, 0.99, by = 0.01), max.p.esti = 1, seed = 12321, 10 | mc.cores = 1, trace = FALSE, ...) 11 | } 12 | \arguments{ 13 | \item{pheno}{an object of class \code{\link{qtcatPheno}}.} 14 | 15 | \item{geno}{an object of class \code{\link{qtcatGeno}}.} 16 | 17 | \item{B}{a integer indicating the number of sample-splits.} 18 | 19 | \item{p.samp1}{a value specifying the fraction of data used for the LASSO sample-split. 20 | The ANOVA sample-split is \code{1 - p.samp1}.} 21 | 22 | \item{nfolds}{Number of folds (default is 5). See \code{\link[glmnet]{cv.glmnet}} for 23 | more details.} 24 | 25 | \item{overall.lambda}{Logical, if true, lambda is estimated once, if false (default), 26 | lambda is estimated for each sample split.} 27 | 28 | \item{lambda.opt}{a criterion for optimum selection of cross validated lasso. Either 29 | "lambda.1se" (default) or "lambda.min". See 30 | \code{\link[glmnet]{cv.glmnet}} for more details.} 31 | 32 | \item{alpha}{a single value in the range of 0 to 1 for the elastic net mixing parameter.} 33 | 34 | \item{gamma}{a vector of gamma-values used in significance estimation.} 35 | 36 | \item{max.p.esti}{a maximum for computed p-values. All p-values above this value are set 37 | to one. Small \code{max.p.esti} values reduce computing time.} 38 | 39 | \item{seed}{a RNG seed, see \code{\link{set.seed}}.} 40 | 41 | \item{mc.cores}{a number of cores for parallelising. The maximum is 42 | \code{'B'}. For details see \code{\link[parallel]{mclapply}}.} 43 | 44 | \item{trace}{logical, if \code{TRUE} it prints the current status of the program.} 45 | 46 | \item{...}{additional arguments for \code{\link[glmnet]{cv.glmnet}}.} 47 | } 48 | \description{ 49 | Hierarchical inference testing for phenotype-SNP association. 50 | } 51 | \examples{ 52 | # If you want to run the examples, use: 53 | # example(qtcatHit, run.dontrun = TRUE) 54 | \dontrun{ 55 | # files containing example data for SNP data and the phenotype 56 | pfile <- system.file("extdata/phenodata.csv", package = "qtcat") 57 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 58 | pdat <- read.csv(pfile, header = TRUE) 59 | snp <- read.snpData(gfile, sep = ",") 60 | clust <- qtcatClust(snp) 61 | geno <- qtcatGeno(snp, clust) 62 | pheno <- qtcatPheno(names = pdat[, 1], 63 | pheno = pdat[, 2], 64 | covariates = model.matrix(~ pdat[, 3])) 65 | 66 | # fitting HIT 67 | fitted <- qtcatHit(pheno, geno) 68 | } 69 | 70 | } 71 | -------------------------------------------------------------------------------- /man/qtcatPheno.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/associationTest.R 3 | \name{qtcatPheno} 4 | \alias{qtcatPheno} 5 | \title{A phenotype object constructor} 6 | \usage{ 7 | qtcatPheno(names, pheno, family = "gaussian", covariates = NULL) 8 | } 9 | \arguments{ 10 | \item{names}{a vector of individual names of length 'n'.} 11 | 12 | \item{pheno}{a vector of length 'n' or a matrix size 'n x 2' in case of binomial family. 13 | This contains the phenotypic observations.} 14 | 15 | \item{family}{a character string specifying the family of the phenotype distribution. 16 | Either "gaussian" (default) or "binomial".} 17 | 18 | \item{covariates}{a matrix typically generated by a call of 19 | \code{\link[stats]{model.matrix}}. It contain additional variables influencing the 20 | phenotype e.g. environmental and experimental covariates.} 21 | } 22 | \description{ 23 | Constructs an S3-object containing phenotype and if additional covariats 24 | exist a design matrix of those. The phenotype object is needed as input for 25 | \code{\link{qtcatHit}}. 26 | } 27 | \examples{ 28 | # file containing example data for a phenotype. 29 | pfile <- system.file("extdata/phenodata.csv", package = "qtcat") 30 | pdat <- read.csv(pfile, header = TRUE) 31 | 32 | # Construct phenotype object 33 | pheno <- qtcatPheno(names = pdat[, 1], 34 | pheno = pdat[, 2], 35 | covariates = model.matrix(~ pdat[, 3])) 36 | 37 | } 38 | -------------------------------------------------------------------------------- /man/qtcatQtc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/associationTest.R 3 | \name{qtcatQtc} 4 | \alias{qtcatQtc} 5 | \title{Summarize results of Hierarchical Inference Test} 6 | \usage{ 7 | qtcatQtc(object, alpha = 0.05, min.absCor = 0.05) 8 | } 9 | \arguments{ 10 | \item{object}{an object of class \code{\link{qtcatHit}}.} 11 | 12 | \item{alpha}{an alpha level for significance estimation.} 13 | 14 | \item{min.absCor}{a minimum absolute value of correlation to be considered.} 15 | } 16 | \description{ 17 | Summarizing the QTCs (significant cluster of SNPs) and their position at 18 | the genome. 19 | } 20 | \examples{ 21 | # If you want to run the examples, use: 22 | # example(qtcatQtc, run.dontrun = TRUE) 23 | \dontrun{ 24 | # files containing example data for SNP data and the phenotype 25 | pfile <- system.file("extdata/phenodata.csv", package = "qtcat") 26 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 27 | pdat <- read.csv(pfile, header = TRUE) 28 | snp <- read.snpData(gfile, sep = ",") 29 | clust <- qtcatClust(snp) 30 | geno <- qtcatGeno(snp, clust) 31 | pheno <- qtcatPheno(names = pdat[, 1], 32 | pheno = pdat[, 2], 33 | covariates = model.matrix(~ pdat[, 3])) 34 | fitted <- qtcatHit(pheno, geno) 35 | 36 | # Summarizing the QTCs (loci37, loci260, and loci367 are causal) 37 | qtcatQtc(fitted) 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /man/read.snpData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/snpData.R 3 | \name{read.snpData} 4 | \alias{read.snpData} 5 | \title{Read SNP tables as a snpMatrix object} 6 | \usage{ 7 | read.snpData(file, sep = " ", quote = "\\"", na.string = "NA", 8 | nrows = -1L) 9 | } 10 | \arguments{ 11 | \item{file}{the name of the file which the data are to be read from. If it does not 12 | contain an absolute path, the file name is relative to the current working directory, 13 | \code{getwd()}. Tilde-expansion is performed where supported.} 14 | 15 | \item{sep}{a field separator character. Values on each line of the file are separated 16 | by this character.} 17 | 18 | \item{quote}{the set of quoting characters. To disable quoting altogether, use 19 | \code{quote = ""}.} 20 | 21 | \item{na.string}{a string which is interpreted as NA value.} 22 | 23 | \item{nrows}{a integer, the maximum number of rows to read.} 24 | } 25 | \description{ 26 | Reads a file in table format and as a \linkS4class{snpMatrix} object. 27 | } 28 | \examples{ 29 | # file containing example data for SNP data 30 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 31 | snp <- read.snpData(gfile, sep = ",") 32 | 33 | } 34 | -------------------------------------------------------------------------------- /man/rename.leafs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/snpCluster.R 3 | \name{rename.leafs} 4 | \alias{rename.leafs} 5 | \title{Rename dendrogram leafs} 6 | \usage{ 7 | rename.leafs(dend, labels) 8 | } 9 | \arguments{ 10 | \item{dend}{a dendrogram.} 11 | 12 | \item{labels}{a vector of new names.} 13 | } 14 | \description{ 15 | Rename dendrogram leafs. 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/snpInfo-qtcatHit-method.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/associationTest.R 3 | \docType{methods} 4 | \name{snpInfo,qtcatHit-method} 5 | \alias{snpInfo,qtcatHit-method} 6 | \title{Get position from qtcatHit object} 7 | \usage{ 8 | \S4method{snpInfo}{qtcatHit}(object) 9 | } 10 | \arguments{ 11 | \item{object}{an object of class \code{\link{qtcatHit}}.} 12 | } 13 | \description{ 14 | Genetic position info from an object of class qtcatHit. 15 | } 16 | -------------------------------------------------------------------------------- /man/snpInfo-snpMatrix-method.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/snpData.R 3 | \docType{methods} 4 | \name{snpInfo,snpMatrix-method} 5 | \alias{snpInfo,snpMatrix-method} 6 | \title{Get position from snpMatrix} 7 | \usage{ 8 | \S4method{snpInfo}{snpMatrix}(object) 9 | } 10 | \arguments{ 11 | \item{object}{an object of class \linkS4class{snpMatrix}.} 12 | } 13 | \description{ 14 | Genetic position info from an object of class 15 | \linkS4class{snpMatrix}. 16 | } 17 | \examples{ 18 | # file containing example data for SNP data 19 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat") 20 | snp <- read.snpData(gfile, sep = ",") 21 | info <- snpInfo(snp) 22 | 23 | } 24 | -------------------------------------------------------------------------------- /man/snpInfo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AllGeneric.R 3 | \name{snpInfo} 4 | \alias{snpInfo} 5 | \title{Extract genomic position and allele information.} 6 | \usage{ 7 | snpInfo(object) 8 | } 9 | \arguments{ 10 | \item{object}{an object, for which a corresponding method exists.} 11 | } 12 | \description{ 13 | Extract genomic position and allele information from object. 14 | } 15 | -------------------------------------------------------------------------------- /man/snpMatrix-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AllClass.R 3 | \docType{class} 4 | \name{snpMatrix-class} 5 | \alias{snpMatrix-class} 6 | \title{A S4 class to represent a SNP-matrix} 7 | \description{ 8 | A S4 class to represent a SNP matrix. Storing SNP information, by using a 9 | byte-level (raw) storage scheme, jointly with genomic position and allele information. 10 | } 11 | \section{Slots}{ 12 | 13 | \describe{ 14 | \item{\code{snpData}}{a matrix of SNPs stored in type 'raw'. 00 is NA, 01 homozygote AA, 02 15 | heterozygote AB, and 03 homozygote BB.} 16 | 17 | \item{\code{snpInfo}}{data.frame with four columns. The first col. contains the chromosomes, 18 | the second col. the positions, the third col. the first allele and the fourth the second 19 | allele.} 20 | 21 | \item{\code{dim}}{an integer vector with exactly two non-negative values.} 22 | 23 | \item{\code{dimnames}}{a list of length two; each component containing NULL or a character vector 24 | length equal the corresponding dim element.} 25 | }} 26 | 27 | -------------------------------------------------------------------------------- /man/sub-snpMatrix-ANY-ANY-missing-method.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/snpData.R 3 | \docType{methods} 4 | \name{[,snpMatrix,ANY,ANY,missing-method} 5 | \alias{[,snpMatrix,ANY,ANY,missing-method} 6 | \title{Subsetting snpMatrix} 7 | \usage{ 8 | \S4method{[}{snpMatrix,ANY,ANY,missing}(x, i, j, ..., drop = TRUE) 9 | } 10 | \arguments{ 11 | \item{x}{an object of class \linkS4class{snpMatrix}.} 12 | 13 | \item{i}{a indices specifying elements to extract or replace. Indices are booleans, 14 | numeric or character vectors.} 15 | 16 | \item{j}{indices specifying elements to extract or replace. Indices are booleans, 17 | numeric or character vectors.} 18 | 19 | \item{...}{not implemented.} 20 | 21 | \item{drop}{not implemented.} 22 | } 23 | \description{ 24 | Subsetting an object of class \linkS4class{snpMatrix}. 25 | } 26 | -------------------------------------------------------------------------------- /src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | 6 | using namespace Rcpp; 7 | 8 | // read_snpData 9 | Rcpp::List read_snpData(Rcpp::CharacterVector file, char sep, char quote, bool rowNames, Rcpp::CharacterVector na_str, int nrows); 10 | RcppExport SEXP _qtcat_read_snpData(SEXP fileSEXP, SEXP sepSEXP, SEXP quoteSEXP, SEXP rowNamesSEXP, SEXP na_strSEXP, SEXP nrowsSEXP) { 11 | BEGIN_RCPP 12 | Rcpp::RObject rcpp_result_gen; 13 | Rcpp::RNGScope rcpp_rngScope_gen; 14 | Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type file(fileSEXP); 15 | Rcpp::traits::input_parameter< char >::type sep(sepSEXP); 16 | Rcpp::traits::input_parameter< char >::type quote(quoteSEXP); 17 | Rcpp::traits::input_parameter< bool >::type rowNames(rowNamesSEXP); 18 | Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type na_str(na_strSEXP); 19 | Rcpp::traits::input_parameter< int >::type nrows(nrowsSEXP); 20 | rcpp_result_gen = Rcpp::wrap(read_snpData(file, sep, quote, rowNames, na_str, nrows)); 21 | return rcpp_result_gen; 22 | END_RCPP 23 | } 24 | // corDist 25 | double corDist(RawVector x, RawVector y); 26 | RcppExport SEXP _qtcat_corDist(SEXP xSEXP, SEXP ySEXP) { 27 | BEGIN_RCPP 28 | Rcpp::RObject rcpp_result_gen; 29 | Rcpp::RNGScope rcpp_rngScope_gen; 30 | Rcpp::traits::input_parameter< RawVector >::type x(xSEXP); 31 | Rcpp::traits::input_parameter< RawVector >::type y(ySEXP); 32 | rcpp_result_gen = Rcpp::wrap(corDist(x, y)); 33 | return rcpp_result_gen; 34 | END_RCPP 35 | } 36 | // corDists 37 | NumericVector corDists(RawMatrix x); 38 | RcppExport SEXP _qtcat_corDists(SEXP xSEXP) { 39 | BEGIN_RCPP 40 | Rcpp::RObject rcpp_result_gen; 41 | Rcpp::RNGScope rcpp_rngScope_gen; 42 | Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP); 43 | rcpp_result_gen = Rcpp::wrap(corDists(x)); 44 | return rcpp_result_gen; 45 | END_RCPP 46 | } 47 | // corPreIdenticals 48 | List corPreIdenticals(RawMatrix x, const int step); 49 | RcppExport SEXP _qtcat_corPreIdenticals(SEXP xSEXP, SEXP stepSEXP) { 50 | BEGIN_RCPP 51 | Rcpp::RObject rcpp_result_gen; 52 | Rcpp::RNGScope rcpp_rngScope_gen; 53 | Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP); 54 | Rcpp::traits::input_parameter< const int >::type step(stepSEXP); 55 | rcpp_result_gen = Rcpp::wrap(corPreIdenticals(x, step)); 56 | return rcpp_result_gen; 57 | END_RCPP 58 | } 59 | // corIdenticals 60 | List corIdenticals(RawMatrix x, IntegerVector clustIdx); 61 | RcppExport SEXP _qtcat_corIdenticals(SEXP xSEXP, SEXP clustIdxSEXP) { 62 | BEGIN_RCPP 63 | Rcpp::RObject rcpp_result_gen; 64 | Rcpp::RNGScope rcpp_rngScope_gen; 65 | Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP); 66 | Rcpp::traits::input_parameter< IntegerVector >::type clustIdx(clustIdxSEXP); 67 | rcpp_result_gen = Rcpp::wrap(corIdenticals(x, clustIdx)); 68 | return rcpp_result_gen; 69 | END_RCPP 70 | } 71 | // joinCorIdenticals 72 | List joinCorIdenticals(int n, List preclust, List ClustMedo); 73 | RcppExport SEXP _qtcat_joinCorIdenticals(SEXP nSEXP, SEXP preclustSEXP, SEXP ClustMedoSEXP) { 74 | BEGIN_RCPP 75 | Rcpp::RObject rcpp_result_gen; 76 | Rcpp::RNGScope rcpp_rngScope_gen; 77 | Rcpp::traits::input_parameter< int >::type n(nSEXP); 78 | Rcpp::traits::input_parameter< List >::type preclust(preclustSEXP); 79 | Rcpp::traits::input_parameter< List >::type ClustMedo(ClustMedoSEXP); 80 | rcpp_result_gen = Rcpp::wrap(joinCorIdenticals(n, preclust, ClustMedo)); 81 | return rcpp_result_gen; 82 | END_RCPP 83 | } 84 | // corClarans 85 | List corClarans(RawMatrix x, const int k, const int maxNeigbours); 86 | RcppExport SEXP _qtcat_corClarans(SEXP xSEXP, SEXP kSEXP, SEXP maxNeigboursSEXP) { 87 | BEGIN_RCPP 88 | Rcpp::RObject rcpp_result_gen; 89 | Rcpp::RNGScope rcpp_rngScope_gen; 90 | Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP); 91 | Rcpp::traits::input_parameter< const int >::type k(kSEXP); 92 | Rcpp::traits::input_parameter< const int >::type maxNeigbours(maxNeigboursSEXP); 93 | rcpp_result_gen = Rcpp::wrap(corClarans(x, k, maxNeigbours)); 94 | return rcpp_result_gen; 95 | END_RCPP 96 | } 97 | // corMedoids 98 | IntegerVector corMedoids(RawMatrix x, IntegerVector clusters); 99 | RcppExport SEXP _qtcat_corMedoids(SEXP xSEXP, SEXP clustersSEXP) { 100 | BEGIN_RCPP 101 | Rcpp::RObject rcpp_result_gen; 102 | Rcpp::RNGScope rcpp_rngScope_gen; 103 | Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP); 104 | Rcpp::traits::input_parameter< IntegerVector >::type clusters(clustersSEXP); 105 | rcpp_result_gen = Rcpp::wrap(corMedoids(x, clusters)); 106 | return rcpp_result_gen; 107 | END_RCPP 108 | } 109 | // design 110 | NumericMatrix design(RawMatrix x); 111 | RcppExport SEXP _qtcat_design(SEXP xSEXP) { 112 | BEGIN_RCPP 113 | Rcpp::RObject rcpp_result_gen; 114 | Rcpp::RNGScope rcpp_rngScope_gen; 115 | Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP); 116 | rcpp_result_gen = Rcpp::wrap(design(x)); 117 | return rcpp_result_gen; 118 | END_RCPP 119 | } 120 | // afreq 121 | NumericVector afreq(RawMatrix x, bool maf); 122 | RcppExport SEXP _qtcat_afreq(SEXP xSEXP, SEXP mafSEXP) { 123 | BEGIN_RCPP 124 | Rcpp::RObject rcpp_result_gen; 125 | Rcpp::RNGScope rcpp_rngScope_gen; 126 | Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP); 127 | Rcpp::traits::input_parameter< bool >::type maf(mafSEXP); 128 | rcpp_result_gen = Rcpp::wrap(afreq(x, maf)); 129 | return rcpp_result_gen; 130 | END_RCPP 131 | } 132 | // hetfreq 133 | NumericVector hetfreq(RawMatrix x, int dim); 134 | RcppExport SEXP _qtcat_hetfreq(SEXP xSEXP, SEXP dimSEXP) { 135 | BEGIN_RCPP 136 | Rcpp::RObject rcpp_result_gen; 137 | Rcpp::RNGScope rcpp_rngScope_gen; 138 | Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP); 139 | Rcpp::traits::input_parameter< int >::type dim(dimSEXP); 140 | rcpp_result_gen = Rcpp::wrap(hetfreq(x, dim)); 141 | return rcpp_result_gen; 142 | END_RCPP 143 | } 144 | // nafreq 145 | NumericVector nafreq(RawMatrix x, int dim); 146 | RcppExport SEXP _qtcat_nafreq(SEXP xSEXP, SEXP dimSEXP) { 147 | BEGIN_RCPP 148 | Rcpp::RObject rcpp_result_gen; 149 | Rcpp::RNGScope rcpp_rngScope_gen; 150 | Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP); 151 | Rcpp::traits::input_parameter< int >::type dim(dimSEXP); 152 | rcpp_result_gen = Rcpp::wrap(nafreq(x, dim)); 153 | return rcpp_result_gen; 154 | END_RCPP 155 | } 156 | 157 | static const R_CallMethodDef CallEntries[] = { 158 | {"_qtcat_read_snpData", (DL_FUNC) &_qtcat_read_snpData, 6}, 159 | {"_qtcat_corDist", (DL_FUNC) &_qtcat_corDist, 2}, 160 | {"_qtcat_corDists", (DL_FUNC) &_qtcat_corDists, 1}, 161 | {"_qtcat_corPreIdenticals", (DL_FUNC) &_qtcat_corPreIdenticals, 2}, 162 | {"_qtcat_corIdenticals", (DL_FUNC) &_qtcat_corIdenticals, 2}, 163 | {"_qtcat_joinCorIdenticals", (DL_FUNC) &_qtcat_joinCorIdenticals, 3}, 164 | {"_qtcat_corClarans", (DL_FUNC) &_qtcat_corClarans, 3}, 165 | {"_qtcat_corMedoids", (DL_FUNC) &_qtcat_corMedoids, 2}, 166 | {"_qtcat_design", (DL_FUNC) &_qtcat_design, 1}, 167 | {"_qtcat_afreq", (DL_FUNC) &_qtcat_afreq, 2}, 168 | {"_qtcat_hetfreq", (DL_FUNC) &_qtcat_hetfreq, 2}, 169 | {"_qtcat_nafreq", (DL_FUNC) &_qtcat_nafreq, 2}, 170 | {NULL, NULL, 0} 171 | }; 172 | 173 | RcppExport void R_init_qtcat(DllInfo *dll) { 174 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 175 | R_useDynamicSymbols(dll, FALSE); 176 | } 177 | -------------------------------------------------------------------------------- /src/ReadData.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | using namespace std; 7 | 8 | void split(const string &s, char delim, vector &elems); 9 | 10 | // [[Rcpp::export]] 11 | Rcpp::List read_snpData(Rcpp::CharacterVector file, char sep, char quote, 12 | bool rowNames, Rcpp::CharacterVector na_str, 13 | int nrows) { 14 | string oneLine; 15 | vector lineElements; 16 | const string fname = Rcpp::as(file); 17 | const string naStr = Rcpp::as(na_str); 18 | // Input file stream 19 | ifstream fileIn(fname.c_str()); 20 | // Column names (Individual names) 21 | getline(fileIn, oneLine); 22 | // Delete qoutes if exist 23 | if (quote != ' ') { 24 | oneLine.erase(remove(oneLine.begin(), oneLine.end(), quote), oneLine.end()); 25 | } 26 | // check if sep is part of first line 27 | if (oneLine.find(sep) == string::npos) { 28 | Rcpp::stop("In the first line the separator character 'sep' doesn't exist"); 29 | } 30 | // split string in to vector of strings 31 | split(oneLine, sep, lineElements); 32 | unsigned int row = lineElements.size(); 33 | // Individual names 34 | Rcpp::CharacterVector indivNames = Rcpp::wrap(lineElements); 35 | // 36 | int nosnpcount = 0; 37 | vector lociNames; 38 | vector chr; 39 | vector pos; 40 | set allelesSet; 41 | vector allelepos; 42 | string AA; 43 | string AB; 44 | string BA; 45 | string BB; 46 | vector snpData; 47 | int col = 0; 48 | int posStart = 0; 49 | int dataStart = 2; 50 | if (rowNames) { 51 | dataStart ++; 52 | posStart ++; 53 | } 54 | int line = 0; 55 | while(getline(fileIn, oneLine)) { 56 | line ++; 57 | lineElements.clear(); 58 | allelesSet.clear(); 59 | // Delete qoutes if exist 60 | if (quote != ' ') { 61 | oneLine.erase(remove(oneLine.begin(), oneLine.end(), quote), oneLine.end()); 62 | } 63 | // split string in to vector of strings 64 | split(oneLine, sep, lineElements); 65 | // length of line 66 | if (row != lineElements.size()){ 67 | Rcpp::stop("Error: Length of line ", line, " is ", 68 | lineElements.size(), " instead of ", row); 69 | } 70 | // detect Nucleotides in this line (SNP) 71 | for (unsigned int i = dataStart; i < lineElements.size(); ++ i) { 72 | if (lineElements[i] != naStr) { 73 | allelesSet.insert(lineElements[i].begin(), lineElements[i].end()); 74 | } 75 | } 76 | vector allele(allelesSet.begin(), allelesSet.end()); 77 | // if more not two alleles skip 78 | if (allele.size() != 2) { 79 | nosnpcount ++; 80 | continue; 81 | } 82 | allelepos.push_back(allele[0]); 83 | allelepos.push_back(allele[1]); 84 | // if two alleles make new coding 85 | AA = allele[0]; AA += allele[0]; 86 | AB = allele[0]; AB += allele[1]; 87 | BA = allele[1]; BA += allele[0]; 88 | BB = allele[1]; BB += allele[1]; 89 | // raw coding of line (SNP) 90 | for (unsigned int i = dataStart; i < lineElements.size(); i ++) { 91 | if (lineElements[i] == AA) { 92 | snpData.push_back(0x01); 93 | } else if (lineElements[i] == BB) { 94 | snpData.push_back(0x03); 95 | } else if ((lineElements[i] == AB) | 96 | (lineElements[i] == BA)) { 97 | snpData.push_back(0x02); 98 | } else { 99 | snpData.push_back(0x00); 100 | } 101 | } 102 | // SNP names 103 | if (rowNames) { 104 | lociNames.push_back(lineElements[0]); 105 | } 106 | // genetic positions 107 | chr.push_back(lineElements[posStart]); 108 | pos.push_back(atoi(lineElements[posStart + 1].c_str())); 109 | col ++; 110 | if ((nrows > 0) && (nrows <= line)) { 111 | break; 112 | } 113 | } 114 | if (snpData.size() == 0) 115 | Rcpp::stop("No valid SNP found"); 116 | if (nosnpcount > 0) 117 | Rcpp::Rcerr << "note: " << nosnpcount << " of " << line << 118 | " lines have more or less than two alleles and are therefore not considered" 119 | << endl; 120 | // Rcpp conversioan and vector as matrix 121 | // alleles 122 | Rcpp::CharacterVector alleles = Rcpp::wrap(allelepos); 123 | alleles.attr("dim") = Rcpp::Dimension(2, col); 124 | // snpData 125 | Rcpp::RawVector snpOutData(snpData.size()); 126 | copy(snpData.begin(), snpData.end(), snpOutData.begin()); 127 | row = snpOutData.size() / col; 128 | snpOutData.attr("dim") = Rcpp::Dimension(row, col); 129 | // individual names 130 | indivNames.erase(indivNames.begin(), indivNames.begin() + dataStart); 131 | // Results as List 132 | Rcpp::List out = Rcpp::List::create(Rcpp::Named("snpData", snpOutData), 133 | Rcpp::Named("alleles", alleles), 134 | Rcpp::Named("chr", chr), 135 | Rcpp::Named("pos", pos), 136 | Rcpp::Named("lociNames", lociNames), 137 | Rcpp::Named("indivNames", indivNames)); 138 | return out; 139 | } 140 | 141 | // split string in to vector of strings 142 | void split(const string &s, char delim, vector &elems) { 143 | stringstream ss(s); 144 | string item; 145 | while (getline(ss, item, delim)) { 146 | elems.push_back(item); 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /src/snpCluster.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | using namespace Rcpp; 4 | 5 | // correlation 6 | double cor(RawVector x, RawVector y) { 7 | int n = x.size(); 8 | int n_na = 0; 9 | double ex = 0, ey = 0, xt = 0, yt = 0, sxx = 0, syy = 0, sxy = 0; 10 | // setup variables and find the mean 11 | for (int i = 0; i < n; i ++) { 12 | if ((x[i] == 0x00) | (y[i] == 0x00)) { 13 | n_na ++; 14 | continue; 15 | } 16 | ex += x[i]; 17 | ey += y[i]; 18 | } 19 | if ((n - n_na) < 1) 20 | stop("estimation of correlation not possible, no complete element pairs"); 21 | ex /= (n - n_na); 22 | ey /= (n - n_na); 23 | // correlation coefficent 24 | for (int i = 0; i < n; i ++) { 25 | if ((x[i] == 0x00) | (y[i] == 0x00)) 26 | continue; 27 | xt = x[i] - ex; 28 | yt = y[i] - ey; 29 | sxx += xt * xt; 30 | syy += yt * yt; 31 | sxy += xt * yt; 32 | } 33 | double cor = sxy / (sqrt(sxx * syy) + 1e-16); 34 | return cor; 35 | } // cor 36 | 37 | // 1 x 1 correlation distance 38 | // [[Rcpp::export]] 39 | double corDist(RawVector x, RawVector y) { 40 | double dcor = 1 - std::fabs(cor(x, y)); 41 | return dcor; 42 | } 43 | 44 | // distance from all marker 45 | // [[Rcpp::export]] 46 | NumericVector corDists(RawMatrix x) { 47 | int n = x.ncol(); 48 | NumericVector dist(n * (n -1) / 2); 49 | uint64_t count = 0; 50 | for (int i = 0; i < n - 1; i ++) { 51 | for (int j = i + 1; j < n; j ++) { 52 | dist[count] = corDist(x(_, i), x(_, j)); 53 | count ++; 54 | } 55 | } 56 | return dist; 57 | } // corDists 58 | 59 | //pre cluster for identical search 60 | // [[Rcpp::export]] 61 | List corPreIdenticals(RawMatrix x, const int step) { 62 | int n = x.ncol(); 63 | IntegerVector clusters(n); 64 | std::map< int, std::vector > preclust; 65 | std::vector premedoInx; 66 | int k = 0; 67 | if (n >= step * 2) { 68 | // medoids equally distributed over the search space 69 | bool smallDist = false; 70 | int startStep = (int)step * .5; 71 | double medodist = 0; 72 | premedoInx.push_back(startStep); 73 | for (int i = startStep + step; i < n; i += step) { 74 | for(int j = startStep; j < i; j += step) { 75 | medodist = corDist(x(_, i), x(_, j)); 76 | if (medodist < .05) { 77 | smallDist = true; 78 | break; 79 | } 80 | } 81 | if (smallDist) { 82 | smallDist = false; 83 | continue; 84 | } 85 | premedoInx.push_back(i); 86 | } 87 | k = premedoInx.size(); 88 | } 89 | int j0; 90 | if (k > 1) { 91 | // cluster all variables to the nearest medoid 92 | NumericVector predist(k); 93 | for (int i = 0; i < n; i ++) { 94 | for (int j = 0; j < k; j ++) { 95 | j0 = premedoInx[j]; 96 | predist[j] = corDist(x(_, i), x(_, j0)); 97 | } 98 | preclust[which_min(predist)].push_back(i); 99 | } 100 | } else { 101 | for (int i = 0; i < n; i ++) { 102 | preclust[0].push_back(i); 103 | } 104 | } 105 | return List::create(preclust); 106 | } // preClustIdenticals 107 | 108 | // find identicals 109 | // [[Rcpp::export]] 110 | List corIdenticals(RawMatrix x, IntegerVector clustIdx) { 111 | int nCluster = clustIdx.size(); 112 | double dist = 0; 113 | IntegerVector clusters(nCluster); 114 | int clustCount = 1; 115 | std::vector grp, medoInx; 116 | int gMedo, i0, j0; 117 | for (int i = 0; i < nCluster; i ++) { 118 | i0 = clustIdx[i]; 119 | if (clusters[i] == 0) { 120 | clusters[i] = clustCount; 121 | grp.push_back(i); 122 | for (int j = i + 1; j < nCluster; j ++) { 123 | j0 = clustIdx[j]; 124 | if (clusters[j] == 0) { 125 | dist = corDist(x(_, i0), x(_, j0)); 126 | if (dist <= 1e-7) { 127 | clusters[j] = clustCount; 128 | grp.push_back(j); 129 | } 130 | } 131 | } 132 | gMedo = grp.size() / 2; 133 | medoInx.push_back(grp[gMedo]); 134 | clustCount ++; 135 | grp.clear(); 136 | } 137 | } 138 | return List::create(clusters, medoInx); 139 | } // identicals 140 | 141 | // join data to one object 142 | // [[Rcpp::export]] 143 | List joinCorIdenticals(int n, List preclust, List ClustMedo) { 144 | IntegerVector clusters(n); 145 | std::vector medoInx; 146 | List SubClustMedo; 147 | IntegerVector clustIdx, clust, medo; 148 | int count = 0, j0 = 0; 149 | for (int i = 0; i < preclust.size(); i ++) { 150 | clustIdx = preclust[i]; 151 | SubClustMedo = ClustMedo[i]; 152 | clust = SubClustMedo[0]; 153 | medo = SubClustMedo[1]; 154 | for (int j = 0; j < clustIdx.size(); j ++) { 155 | j0 = clustIdx[j]; 156 | clusters[j0] = clust[j] + count; 157 | } 158 | for (int j = 0; j < medo.size(); j ++) { 159 | j0 = medo[j]; 160 | medoInx.push_back(clustIdx[j0]); 161 | } 162 | count = medoInx.size(); 163 | } 164 | return List::create(clusters, medoInx); 165 | } // joinIdenticals 166 | 167 | 168 | // clustering of IDB by correlation dictamce with CLARANS 169 | // [[Rcpp::export]] 170 | List corClarans(RawMatrix x, const int k, const int maxNeigbours) { 171 | RNGScope scope; 172 | Environment base("package:base"); 173 | Function sample_int = base["sample.int"]; 174 | int n = x.ncol(); 175 | NumericMatrix medoDist(n, k); 176 | IntegerVector medoInx(k); 177 | double dist = 0; 178 | IntegerVector clusters(n); 179 | // starting medoid and clusters 180 | NumericVector pdist(n, 1.0); 181 | int m = 0; 182 | for (int c = 0; c < k; c ++) { 183 | // starting medoid 184 | m = as(sample_int(n, 1, false, pdist))-1; 185 | medoInx[c] = m; 186 | for (int i= 0; i < n; i ++) { 187 | dist = corDist(x(_, m), x(_, i)); 188 | medoDist(i, c) = dist; 189 | } 190 | } 191 | int clust = 0; 192 | double costs = 0, objective = 0; 193 | for (int i= 0; i < n; i ++) { 194 | // starting cluster membership 195 | clust = which_min(medoDist(i, _)); 196 | clusters[i] = clust; 197 | costs += medoDist(i, clust); 198 | } 199 | objective = costs / n; 200 | IntegerVector ranVar(maxNeigbours); 201 | int i = 0, o = 0; 202 | double minDist = 0; 203 | NumericVector objectDist(n); 204 | NumericVector objectMinDist(n); 205 | double object_cost = 0; 206 | // iteration of inner clarans loop 207 | int iter = 0; 208 | while (iter < maxNeigbours) { 209 | if ( iter == 0) { 210 | ranVar = floor(runif(maxNeigbours, 0, n - 1e-15)); 211 | } 212 | // replacing medoids with objects if costs are smaller 213 | i = ranVar[iter]; 214 | o = clusters[i]; 215 | for (int j = 0; j < n; j ++) { 216 | dist = corDist(x(_, i), x(_, j)); 217 | objectDist[j] = dist; 218 | minDist = dist; 219 | for (int c = 0; c < k; c ++) { 220 | if (c != o) { 221 | minDist = std::min(minDist, medoDist(j, c)); 222 | } 223 | } 224 | objectMinDist[j] = minDist; 225 | } 226 | object_cost = sum(objectMinDist); 227 | if (object_cost < costs) { 228 | // new medoids 229 | medoDist(_, o) = objectDist; 230 | medoInx[o] = i; 231 | // new costs 232 | costs = object_cost; 233 | objective = costs / n; 234 | // new membership to cluster 235 | for (int j = 0; j < n; j ++) { 236 | clusters[j] = which_min(medoDist(j, _)); 237 | } 238 | // restarting loop 239 | iter = 0; 240 | } else { 241 | iter ++; 242 | } 243 | } 244 | // Result as list 245 | return List::create(clusters + 1, medoInx, objective); 246 | } // clarans 247 | 248 | 249 | // medoids of SNP clusters 250 | // [[Rcpp::export]] 251 | IntegerVector corMedoids(RawMatrix x, IntegerVector clusters) { 252 | int n = clusters.size(); 253 | // map of vectors with cluster indexes 254 | std::map< int, std::vector > indexMap; 255 | for (int i = 0; i < n; i ++) { 256 | indexMap[clusters[i]].push_back(i); 257 | } 258 | // for each cluster a dist matrix were the col with min dist is the medoid 259 | std::vector clustInx; 260 | int nClust = 0; 261 | double dist = 0; 262 | int dist_j, dist_i; 263 | std::vector medoInx; 264 | for (std::map< int, std::vector >::iterator it = indexMap.begin(); 265 | it != indexMap.end(); it ++) { 266 | clustInx = it->second; 267 | nClust = clustInx.size(); 268 | NumericMatrix dist_mat(nClust, nClust); 269 | NumericVector dist_sums(nClust); 270 | for (int j = 0; j < nClust - 1; j ++) { 271 | dist_j = clustInx[j]; 272 | for (int i = j; i < nClust; i ++) { 273 | if ( i == j) { 274 | dist_mat(j, i) = 0; 275 | } else { 276 | dist_i = clustInx[i]; 277 | dist = corDist(x(_, dist_j), x(_, dist_i)); 278 | dist_mat(j, i) = dist; 279 | dist_mat(i, j) = dist; 280 | } 281 | } 282 | } 283 | for (int i = 0; i < nClust; i ++) { 284 | dist_sums[i] = sum(dist_mat(_, i)); 285 | } 286 | medoInx.push_back(clustInx[which_min(dist_sums)] + 1); 287 | } 288 | return wrap(medoInx); 289 | } 290 | -------------------------------------------------------------------------------- /src/snpData.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | using namespace Rcpp; 6 | 7 | 8 | // [[Rcpp::export]] 9 | NumericMatrix design(RawMatrix x) { 10 | size_t n = x.nrow(); 11 | size_t p = x.ncol(); 12 | NumericMatrix out(n, p); 13 | for (size_t i = 0; i < p; i ++) { 14 | for (size_t j = 0; j < n; j ++) { 15 | if (x(j, i) == 0x01) { 16 | out(j, i) = 0; 17 | } else if (x(j, i) == 0x03) { 18 | out(j, i) = 1; 19 | } else if (x(j, i) == 0x02) { 20 | out(j, i) = .5; 21 | } 22 | } 23 | } 24 | return out; 25 | } 26 | 27 | 28 | // [[Rcpp::export]] 29 | NumericVector afreq(RawMatrix x, bool maf) { 30 | size_t p = x.ncol(); 31 | size_t n = x.nrow(); 32 | size_t n_na = 0; 33 | double a = 0; 34 | double af = 0; 35 | NumericVector allfreq(p); 36 | for (size_t i = 0; i < p; i ++) { 37 | for (size_t j = 0; j < n; j ++) { 38 | if (x(j, i) == 0x00) { 39 | n_na ++; 40 | } else if (x(j, i) == 0x01) { 41 | a ++; 42 | } else if (x(j, i) == 0x02) { 43 | a += .5; 44 | } 45 | } 46 | af = a / (n - n_na); 47 | if (maf) { 48 | if (af <= .5) { 49 | allfreq[i] = af; 50 | } else { 51 | allfreq[i] = 1 - af; 52 | } 53 | } else { 54 | allfreq[i] = af; 55 | } 56 | n_na = 0; 57 | a = 0; 58 | } 59 | return allfreq; 60 | } 61 | 62 | 63 | // [[Rcpp::export]] 64 | NumericVector hetfreq(RawMatrix x, int dim) { 65 | size_t k, l; 66 | if (dim == 1) { 67 | k = x.nrow(); 68 | l = x.ncol(); 69 | } else { 70 | k = x.ncol(); 71 | l = x.nrow(); 72 | } 73 | size_t l_na = 0; 74 | double het = 0; 75 | NumericVector hetf(k); 76 | for (size_t i = 0; i < k; i ++) { 77 | for (size_t j = 0; j < l; j ++) { 78 | if (dim == 1) { 79 | if (x(i, j) == 0x00) { 80 | l_na ++; 81 | } else if (x(i, j) == 0x02) { 82 | het ++; 83 | } 84 | } else { 85 | if (x(j, i) == 0x00) { 86 | l_na ++; 87 | } else if (x(j, i) == 0x02) { 88 | het ++; 89 | } 90 | } 91 | } 92 | hetf[i] = het / (l - l_na); 93 | het = 0; 94 | l_na = 0; 95 | } 96 | return hetf; 97 | } 98 | 99 | 100 | // [[Rcpp::export]] 101 | NumericVector nafreq(RawMatrix x, int dim) { 102 | size_t k, l; 103 | if (dim == 1) { 104 | k = x.nrow(); 105 | l = x.ncol(); 106 | } else { 107 | k = x.ncol(); 108 | l = x.nrow(); 109 | } 110 | double na = 0; 111 | NumericVector naf(k); 112 | for (size_t i = 0; i < k; i ++) { 113 | for (size_t j = 0; j < l; j ++) { 114 | if (dim == 1) { 115 | if (x(i, j) == 0x00) 116 | na ++; 117 | } else { 118 | if (x(j, i) == 0x00) 119 | na ++; 120 | } 121 | } 122 | naf[i] = na / l; 123 | na = 0; 124 | } 125 | return naf; 126 | } 127 | --------------------------------------------------------------------------------