├── .Rbuildignore
├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── NAMESPACE
├── R
    ├── AllClass.R
    ├── AllGeneric.R
    ├── RcppExports.R
    ├── associationTest.R
    ├── qtcatPackage.R
    ├── snpCluster.R
    ├── snpData.R
    └── snpImpute.R
├── README.md
├── appveyor.yml
├── inst
    └── extdata
    │   ├── phenodata.csv
    │   └── snpdata.csv
├── man
    ├── alleleFreq-snpMatrix-method.Rd
    ├── alleleFreq.Rd
    ├── as.matrix-snpMatrix-method.Rd
    ├── as.snpMatrix.Rd
    ├── clarans.Rd
    ├── cutClust.Rd
    ├── distCor.Rd
    ├── hetFreq-snpMatrix-method.Rd
    ├── hetFreq.Rd
    ├── identicals.Rd
    ├── imputeMedoids.Rd
    ├── imputeSnp.Rd
    ├── imputeSnpIter.Rd
    ├── imputeSnpMatrix.Rd
    ├── lmQtc.Rd
    ├── medoQtc.Rd
    ├── naFreq-snpMatrix-method.Rd
    ├── naFreq.Rd
    ├── plotQtc.Rd
    ├── plotSelFreq.Rd
    ├── qtcat-package.Rd
    ├── qtcatClust.Rd
    ├── qtcatGeno.Rd
    ├── qtcatHit.Rd
    ├── qtcatPheno.Rd
    ├── qtcatQtc.Rd
    ├── read.snpData.Rd
    ├── rename.leafs.Rd
    ├── snpInfo-qtcatHit-method.Rd
    ├── snpInfo-snpMatrix-method.Rd
    ├── snpInfo.Rd
    ├── snpMatrix-class.Rd
    └── sub-snpMatrix-ANY-ANY-missing-method.Rd
└── src
    ├── RcppExports.cpp
    ├── ReadData.cpp
    ├── snpCluster.cpp
    └── snpData.cpp


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | tests/*
4 | ^packrat/
5 | .Rprofile
6 | ^\.travis\.yml$
7 | ^appveyor\.yml$
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .Rprofile
 4 | .RData
 5 | qtcat.Rproj
 6 | src/*.o
 7 | src/*.so
 8 | src/*.dll
 9 | inst/doc
10 | packrat/lib*
11 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: r
 2 | cache: packages
 3 | 
 4 | matrix:
 5 |   include:
 6 |     - os: linux
 7 |     - os: osx
 8 | 
 9 | warnings_are_errors: true
10 | 
11 | # r_github_packages:
12 | #   - jrklasen/hit
13 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: qtcat
 2 | Title: Quantitative Trait Cluster Association Test
 3 | Description: 
 4 |     All SNPs are jointly associated to the phenotype and at the same time correlation among 
 5 |     them is considered. Thus, correction for population structure becomes unnecessary, 
 6 |     which in many cases results in a power advantages compared to other methods.
 7 | Authors@R: person("Jonas", "Klasen", role = c("aut", "cre"), email = "qtcat@gmx.de")
 8 | Version: 0.4.3
 9 | URL: http://github.com/QTCAT/qtcat
10 | BugReports: https://github.com/QTCAT/qtcat/issues
11 | Depends:
12 |     R (>= 3.2.0)
13 | Imports:
14 |     methods,
15 |     stats,
16 |     parallel,
17 |     graphics,
18 |     utils,
19 |     Rcpp (>= 0.11.0),
20 |     fastcluster (>= 1.1.16),
21 |     hit (>= 0.4.0)
22 | Suggests:
23 |     knitr,
24 |     rmarkdown
25 | LinkingTo: Rcpp
26 | License: GPL (>= 2)
27 | RoxygenNote: 6.0.1
28 | VignetteBuilder: knitr
29 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(alleleFreq)
 4 | export(as.snpMatrix)
 5 | export(clarans)
 6 | export(cutClust)
 7 | export(distCor)
 8 | export(hetFreq)
 9 | export(identicals)
10 | export(imputeSnpMatrix)
11 | export(lmQtc)
12 | export(medoQtc)
13 | export(naFreq)
14 | export(plotQtc)
15 | export(plotSelFreq)
16 | export(qtcatClust)
17 | export(qtcatGeno)
18 | export(qtcatHit)
19 | export(qtcatPheno)
20 | export(qtcatQtc)
21 | export(read.snpData)
22 | export(snpInfo)
23 | exportClasses(snpMatrix)
24 | exportMethods("[")
25 | exportMethods(alleleFreq)
26 | exportMethods(as.matrix)
27 | exportMethods(hetFreq)
28 | exportMethods(naFreq)
29 | exportMethods(snpInfo)
30 | importFrom(Rcpp,evalCpp)
31 | importFrom(fastcluster,hclust)
32 | importFrom(graphics,axis)
33 | importFrom(graphics,mtext)
34 | importFrom(graphics,plot)
35 | importFrom(hit,as.hierarchy)
36 | importFrom(hit,hit)
37 | importFrom(methods,is)
38 | importFrom(methods,new)
39 | importFrom(methods,setClass)
40 | importFrom(methods,setGeneric)
41 | importFrom(methods,setMethod)
42 | importFrom(methods,setOldClass)
43 | importFrom(methods,signature)
44 | importFrom(parallel,mclapply)
45 | importFrom(stats,as.dendrogram)
46 | importFrom(stats,cor)
47 | importFrom(stats,dendrapply)
48 | importFrom(stats,is.leaf)
49 | importFrom(stats,lm)
50 | importFrom(stats,na.omit)
51 | importFrom(stats,optimise)
52 | importFrom(stats,reorder)
53 | importFrom(utils,installed.packages)
54 | useDynLib(qtcat)
55 | 


--------------------------------------------------------------------------------
/R/AllClass.R:
--------------------------------------------------------------------------------
 1 | #' @title A S4 class to represent a SNP-matrix
 2 | #'
 3 | #' @description A S4 class to represent a SNP matrix. Storing SNP information, by using a
 4 | #' byte-level (raw) storage scheme, jointly with genomic position and allele information.
 5 | #'
 6 | #' @slot snpData a matrix of SNPs stored in type 'raw'. 00 is NA, 01 homozygote AA, 02
 7 | #' heterozygote AB, and 03 homozygote BB.
 8 | #' @slot snpInfo data.frame with four columns. The first col. contains the chromosomes,
 9 | #' the second col. the positions, the third col. the first allele and the fourth the second
10 | #' allele.
11 | #' @slot dim an integer vector with exactly two non-negative values.
12 | #' @slot dimnames a list of length two; each component containing NULL or a character vector
13 | #' length equal the corresponding dim element.
14 | #'
15 | #' @importFrom methods setClass
16 | #' @export
17 | setClass("snpMatrix",
18 |          slots = c(snpData = "matrix",
19 |                    snpInfo = "data.frame",
20 |                    dim = "integer",
21 |                    dimnames = "list"))
22 | 


--------------------------------------------------------------------------------
/R/AllGeneric.R:
--------------------------------------------------------------------------------
 1 | #' @title Extract genomic position and allele information.
 2 | #'
 3 | #' @description Extract genomic position and allele information from object.
 4 | #'
 5 | #' @param object an object, for which a corresponding method exists.
 6 | #'
 7 | #' @importFrom methods setGeneric
 8 | #' @export
 9 | setGeneric("snpInfo", function(object) standardGeneric("snpInfo"))
10 | 
11 | 
12 | #' @title  Allele Frequency.
13 | #'
14 | #' @description Frequency of alleles in data set.
15 | #'
16 | #' @param x an object, for which a corresponding method exists.
17 | #' @param maf logical, if true minor allele frequency (default), other ways allele
18 | #' frequency of 'allele.1'.
19 | #'
20 | #' @importFrom methods setGeneric
21 | #' @export
22 | setGeneric("alleleFreq", function(x, maf = TRUE) standardGeneric("alleleFreq"))
23 | 
24 | 
25 | #' @title Heterozygosity Frequency.
26 | #'
27 | #' @description Frequency of heterozygosity in data set.
28 | #'
29 | #' @param x an object, for which a corresponding method exists.
30 | #' @param dim dimension over which heterozygosity is estimated. 1 (default) is for rows
31 | #' (individuals), 2 is for columns (SNPs).
32 | #'
33 | #' @importFrom methods setGeneric
34 | #' @export
35 | setGeneric("hetFreq", function(x, dim = 1L) standardGeneric("hetFreq"))
36 | 
37 | 
38 | #' @title Missing Data Frequency.
39 | #'
40 | #' @description Frequency of missing Data in data set.
41 | #'
42 | #' @param x an object, for which a corresponding method exists.
43 | #' @param dim dimension over which heterozygosity is estimated. 1 (default) is for rows
44 | #' (individuals), 2 is for columns (SNPs).
45 | #'
46 | #' @importFrom methods setGeneric
47 | #' @export
48 | setGeneric("naFreq", function(x, dim = 1L) standardGeneric("naFreq"))
49 | 


--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | read_snpData <- function(file, sep, quote, rowNames, na_str, nrows) {
 5 |     .Call('_qtcat_read_snpData', PACKAGE = 'qtcat', file, sep, quote, rowNames, na_str, nrows)
 6 | }
 7 | 
 8 | corDist <- function(x, y) {
 9 |     .Call('_qtcat_corDist', PACKAGE = 'qtcat', x, y)
10 | }
11 | 
12 | corDists <- function(x) {
13 |     .Call('_qtcat_corDists', PACKAGE = 'qtcat', x)
14 | }
15 | 
16 | corPreIdenticals <- function(x, step) {
17 |     .Call('_qtcat_corPreIdenticals', PACKAGE = 'qtcat', x, step)
18 | }
19 | 
20 | corIdenticals <- function(x, clustIdx) {
21 |     .Call('_qtcat_corIdenticals', PACKAGE = 'qtcat', x, clustIdx)
22 | }
23 | 
24 | joinCorIdenticals <- function(n, preclust, ClustMedo) {
25 |     .Call('_qtcat_joinCorIdenticals', PACKAGE = 'qtcat', n, preclust, ClustMedo)
26 | }
27 | 
28 | corClarans <- function(x, k, maxNeigbours) {
29 |     .Call('_qtcat_corClarans', PACKAGE = 'qtcat', x, k, maxNeigbours)
30 | }
31 | 
32 | corMedoids <- function(x, clusters) {
33 |     .Call('_qtcat_corMedoids', PACKAGE = 'qtcat', x, clusters)
34 | }
35 | 
36 | design <- function(x) {
37 |     .Call('_qtcat_design', PACKAGE = 'qtcat', x)
38 | }
39 | 
40 | afreq <- function(x, maf) {
41 |     .Call('_qtcat_afreq', PACKAGE = 'qtcat', x, maf)
42 | }
43 | 
44 | hetfreq <- function(x, dim) {
45 |     .Call('_qtcat_hetfreq', PACKAGE = 'qtcat', x, dim)
46 | }
47 | 
48 | nafreq <- function(x, dim) {
49 |     .Call('_qtcat_nafreq', PACKAGE = 'qtcat', x, dim)
50 | }
51 | 
52 | 


--------------------------------------------------------------------------------
/R/associationTest.R:
--------------------------------------------------------------------------------
  1 | #' @title A genotype object constructor
  2 | #'
  3 | #' @description Constructs a S3-object containing a SNP design matrix and a hierarchy. If
  4 | #' a SNP in the input object contains missing data, the clustering is used to impute
  5 | #' information from highly correlated neighbor SNPs. The genotype object is needed for
  6 | #' \code{\link{qtcatHit}} as input.
  7 | #'
  8 | #' @param snp an object of S4 class \linkS4class{snpMatrix}.
  9 | #' @param snpClust an object of class \code{\link{qtcatClust}}.
 10 | #' @param absCor a vector of absolute value of correlations considered in the hierarchy.
 11 | #' @param min.absCor a minimum absolute value of correlation which is considered. A value
 12 | #' in the range from 0 to 1.
 13 | #' @param mc.cores a number of cores for parallelising. The maximum is \code{'B'}. For
 14 | #' details see \code{\link[parallel]{mclapply}}.
 15 | #'
 16 | #' @examples
 17 | #' # file containing example data for SNP data
 18 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
 19 | #' snp <- read.snpData(gfile, sep = ",")
 20 | #' clust <- qtcatClust(snp)
 21 | #'
 22 | #' # Construct geotype object
 23 | #' geno <- qtcatGeno(snp, clust)
 24 | #'
 25 | #' @importFrom hit as.hierarchy
 26 | #' @importFrom methods is
 27 | #' @importFrom stats reorder
 28 | #' @export
 29 | qtcatGeno <- function(snp, snpClust, absCor, min.absCor = 0.5, mc.cores = 1) {
 30 |   stopifnot(is(snp, "snpMatrix"))
 31 |   stopifnot(is(snpClust, "qtcatClust"))
 32 |   if (!setequal(names(snpClust$clusters), colnames(snp)))
 33 |     stop("Names of 'snp' and 'snpClust' differ")
 34 |   if (missing(absCor))
 35 |     hier <- as.hierarchy(snpClust$dendrogram, 1 - min.absCor, names = colnames(snp))
 36 |   else
 37 |     hier <- as.hierarchy(snpClust$dendrogram, height = 1 - absCor, names = colnames(snp))
 38 |   if (any(naFreq(snp) > 0))
 39 |     snp <- imputeMedoids(snp, snpClust, hier, .25, mc.cores)
 40 |   if (is.null(names <- snpClust$medoids))
 41 |     names <- names(snpClust)
 42 |   snpnames <- colnames(snp)[colnames(snp) %in% names]
 43 |   desMat <- as.matrix(snp[, snpnames])
 44 |   hier <- reorder(hier, snpnames)
 45 |   out <- list(x = desMat,
 46 |               hierarchy = hier,
 47 |               clusters = snpClust$clusters,
 48 |               medoids = snpClust$medoids,
 49 |               snpInfo = snpInfo(snp))
 50 |   class(out) <- "qtcatGeno"
 51 |   out
 52 | }
 53 | 
 54 | #' @title A phenotype object constructor
 55 | #'
 56 | #' @description Constructs an S3-object containing phenotype and if additional covariats
 57 | #' exist a design matrix of those. The phenotype object is needed as input for
 58 | #' \code{\link{qtcatHit}}.
 59 | #'
 60 | #' @param names a vector of individual names of length 'n'.
 61 | #' @param pheno a vector of length 'n' or a matrix size 'n x 2' in case of binomial family.
 62 | #' This contains the phenotypic observations.
 63 | #' @param family a character string specifying the family of the phenotype distribution.
 64 | #' Either "gaussian" (default) or "binomial".
 65 | #' @param covariates a matrix typically generated by a call of
 66 | #' \code{\link[stats]{model.matrix}}. It contain additional variables influencing the
 67 | #' phenotype e.g. environmental and experimental covariates.
 68 | #'
 69 | #' @examples
 70 | #' # file containing example data for a phenotype.
 71 | #' pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
 72 | #' pdat <- read.csv(pfile, header = TRUE)
 73 | #'
 74 | #' # Construct phenotype object
 75 | #' pheno <- qtcatPheno(names = pdat[, 1],
 76 | #'                     pheno = pdat[, 2],
 77 | #'                     covariates = model.matrix(~ pdat[, 3]))
 78 | #'
 79 | #' @export
 80 | qtcatPheno <- function(names, pheno, family = "gaussian", covariates = NULL) {
 81 |   match.arg(family, c("gaussian", "binomial"))
 82 |   nn <- length(names)
 83 |   if (any(is.na(pheno)))
 84 |     stop("Missing values in 'pheno' are not allowed")
 85 |   if (!is.null(covariates)) {
 86 |     if (is.matrix(covariates)) {
 87 |       nc <- nrow(covariates)
 88 |       if (all(covariates[, 1] == 1))
 89 |         covariates <- covariates[, -1, drop = FALSE]
 90 |     } else {
 91 |       stop("'covariates' has to be a 'matrix'")
 92 |     }
 93 |   } else {
 94 |     covariates <- matrix(nrow = nn, ncol = 0L)
 95 |     nc <- nn
 96 |   }
 97 |   if (is.vector(pheno)) {
 98 |     np <- length(pheno)
 99 |   } else if (is.matrix(pheno) && family == "binomial") {
100 |     if (ncol(pheno) > 2)
101 |       stop("'phone' has more than two columns which is not allowed")
102 |     np <- nrow(pheno)
103 |   } else {
104 |     stop("'pheno' has to be a 'vector' or in in case of binomial family a 'matrix'")
105 |   }
106 |   if (nn == nc && nn == np)
107 |     out <- list(names = as.character(names),
108 |                 pheno = pheno,
109 |                 covariates = covariates,
110 |                 family = family)
111 |   class(out) <- "qtcatPheno"
112 |   out
113 | }
114 | 
115 | 
116 | #' @title Fitting Hierarchical Inference Testing
117 | #'
118 | #' @description Hierarchical inference testing for phenotype-SNP association.
119 | #'
120 | #' @param pheno an object of class \code{\link{qtcatPheno}}.
121 | #' @param geno an object of class \code{\link{qtcatGeno}}.
122 | #' @param B a integer indicating the number of sample-splits.
123 | #' @param p.samp1 a value specifying the fraction of data used for the LASSO sample-split.
124 | #' The ANOVA sample-split is \code{1 - p.samp1}.
125 | #' @param nfolds Number of folds (default is 5). See \code{\link[glmnet]{cv.glmnet}} for
126 | #' more details.
127 | #' @param overall.lambda 	Logical, if true, lambda is estimated once, if false (default),
128 | #' lambda is estimated for each sample split.
129 | #' @param lambda.opt a criterion for optimum selection of cross validated lasso. Either
130 | #' "lambda.1se" (default) or "lambda.min". See
131 | #' \code{\link[glmnet]{cv.glmnet}} for more details.
132 | #' @param alpha a single value in the range of 0 to 1 for the elastic net mixing parameter.
133 | #' @param gamma a vector of gamma-values used in significance estimation.
134 | #' @param max.p.esti a maximum for computed p-values. All p-values above this value are set
135 | #' to one. Small \code{max.p.esti} values reduce computing time.
136 | #' @param seed a RNG seed, see \code{\link{set.seed}}.
137 | #' @param mc.cores a number of cores for parallelising. The maximum is
138 | #' \code{'B'}. For details see \code{\link[parallel]{mclapply}}.
139 | #' @param trace logical, if \code{TRUE} it prints the current status of the program.
140 | #' @param ... additional arguments for \code{\link[glmnet]{cv.glmnet}}.
141 | #'
142 | #' @examples
143 | #' # If you want to run the examples, use:
144 | #' # example(qtcatHit, run.dontrun = TRUE)
145 | #' \dontrun{
146 | #' # files containing example data for SNP data and the phenotype
147 | #' pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
148 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
149 | #' pdat <- read.csv(pfile, header = TRUE)
150 | #' snp <- read.snpData(gfile, sep = ",")
151 | #' clust <- qtcatClust(snp)
152 | #' geno <- qtcatGeno(snp, clust)
153 | #' pheno <- qtcatPheno(names = pdat[, 1],
154 | #'                     pheno = pdat[, 2],
155 | #'                     covariates = model.matrix(~ pdat[, 3]))
156 | #'
157 | #' # fitting HIT
158 | #' fitted <- qtcatHit(pheno, geno)
159 | #' }
160 | #'
161 | #' @importFrom methods is
162 | #' @export
163 | qtcatHit <- function(pheno, geno, B = 50, p.samp1 = 0.35,
164 |                      nfolds = 5, overall.lambda = FALSE, lambda.opt = "lambda.1se",
165 |                      alpha = 1, gamma = seq(0.05, 0.99, by = 0.01),
166 |                      max.p.esti = 1, seed = 12321, mc.cores = 1, trace = FALSE, ...) {
167 |   set.seed(seed)
168 |   on.exit(set.seed(NULL))
169 |   stopifnot(is(pheno, "qtcatPheno"))
170 |   stopifnot(is(geno, "qtcatGeno"))
171 |   id <- intersect(pheno$names, rownames(geno$x))
172 |   if (!length(id))
173 |     stop("The ID intersect of 'pheno' and 'geno' is emty")
174 |   if (length(id.uniqueGeno <- setdiff(rownames(geno$x), id)))
175 |     cat("The following individuals are part of 'geno' but not of 'pheno':\n",
176 |         paste(id.uniqueGeno, collapse = " "), "\n")
177 |   if (length(id.uniquePheno <- setdiff(pheno$names, id)))
178 |     cat("The following individuals are part of 'pheno' but not of 'geno':\n",
179 |         paste(id.uniquePheno, collapse = " "), "\n")
180 |   phenoInx <- which(pheno$names %in% id)
181 |   genoInx <- match(pheno$names[phenoInx], rownames(geno$x))
182 |   if (ncol(pheno$covariates) == 0L)
183 |     x <- geno$x[genoInx, ]
184 |   else
185 |     x <- cbind(geno$x[genoInx, ], pheno$covariates[phenoInx, ])
186 |   y <- pheno$pheno[phenoInx]
187 |   fitHit <- hit(x, y, geno$hierarchy, pheno$family,
188 |                 B, p.samp1, nfolds, overall.lambda, lambda.opt, alpha,
189 |                 gamma, max.p.esti, mc.cores,
190 |                 trace, standardize = FALSE)
191 |   out <- c(fitHit, geno[3L:5L])
192 |   class(out) <- c("qtcatHit", "hit")
193 |   out
194 | }
195 | 
196 | 
197 | #' @title Summarize results of Hierarchical Inference Test
198 | #'
199 | #' @description Summarizing the QTCs (significant cluster of SNPs) and their position at
200 | #' the genome.
201 | #'
202 | #' @param object an object of class \code{\link{qtcatHit}}.
203 | #' @param alpha an alpha level for significance estimation.
204 | #' @param min.absCor a minimum absolute value of correlation to be considered.
205 | #'
206 | #' @examples
207 | #' # If you want to run the examples, use:
208 | #' # example(qtcatQtc, run.dontrun = TRUE)
209 | #' \dontrun{
210 | #' # files containing example data for SNP data and the phenotype
211 | #' pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
212 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
213 | #' pdat <- read.csv(pfile, header = TRUE)
214 | #' snp <- read.snpData(gfile, sep = ",")
215 | #' clust <- qtcatClust(snp)
216 | #' geno <- qtcatGeno(snp, clust)
217 | #' pheno <- qtcatPheno(names = pdat[, 1],
218 | #'                     pheno = pdat[, 2],
219 | #'                     covariates = model.matrix(~ pdat[, 3]))
220 | #' fitted <- qtcatHit(pheno, geno)
221 | #'
222 | #' # Summarizing the QTCs (loci37, loci260, and loci367 are causal)
223 | #' qtcatQtc(fitted)
224 | #' }
225 | #'
226 | #' @importFrom hit hit
227 | #' @importFrom methods is
228 | #' @export
229 | qtcatQtc <- function(object, alpha = 0.05, min.absCor = 0.05) {
230 |   stopifnot(is(object, "qtcatHit"))
231 |   y <- summary(object, alpha, 1 - min.absCor)
232 |   signames <- rownames(y)
233 |   sigclust <- match(signames, object$medoids)
234 |   sigClust <- matrix(0, length(object$clusters), 3L)
235 |   sigClust[, 2L] <- 1
236 |   for (i in seq_along(signames)) {
237 |     inx <- which(object$clusters == sigclust[i])
238 |     sigClust[inx, ] <- matrix(unlist(y[signames[i], ]),
239 |                               length(inx), 3L, byrow = TRUE)
240 |   }
241 |   rownames(sigClust) <- names(object$clusters)
242 |   sigClust[, 2L] <- 1 - sigClust[, 2L]
243 |   colnames(sigClust) <- c(colnames(y)[1L], "absCor", colnames(y)[3L])
244 |   sigClust <- as.data.frame(sigClust[sigClust[, 1L] != 0, ,drop = FALSE])
245 |   out <- cbind(snpInfo(object)[rownames(sigClust), 1:2], sigClust)
246 |   out
247 | }
248 | 
249 | 
250 | setOldClass("qtcatHit")
251 | #' @title Get position from qtcatHit object
252 | #'
253 | #' @description Genetic position info from an object of class qtcatHit.
254 | #'
255 | #' @param object an object of class \code{\link{qtcatHit}}.
256 | #'
257 | #'
258 | #' @importFrom methods setMethod setOldClass
259 | #' @export
260 | setMethod("snpInfo", "qtcatHit",
261 |           function(object) {
262 |             out <- object$snpInfo
263 |             if (is.null(out)) {
264 |               cat("No position information available")
265 |             }
266 |             out
267 |           }
268 | )
269 | 
270 | 
271 | #' @title Find medoids of QTCs
272 | #' @description Find a medoid of for each quantitative trait cluster (QTC).
273 | #'
274 | #' @param object an object of class \code{\link{qtcatHit}}.
275 | #' @param geno an object of class \code{\link{qtcatGeno}}.
276 | #' @param alpha an alpha level for significance estimation.
277 | #' @param min.absCor minimum absolute value of correlation considered.
278 | #'
279 | #' @examples
280 | #' # If you want to run the examples, use:
281 | #' # example(medoQtc, run.dontrun = TRUE)
282 | #' \dontrun{
283 | #' # files containing example data for SNP data and the phenotype
284 | #' pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
285 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
286 | #' pdat <- read.csv(pfile, header = TRUE)
287 | #' snp <- read.snpData(gfile, sep = ",")
288 | #' clust <- qtcatClust(snp)
289 | #' geno <- qtcatGeno(snp, clust)
290 | #' pheno <- qtcatPheno(names = pdat[, 1],
291 | #'                     pheno = pdat[, 2],
292 | #'                     covariates = model.matrix(~ pdat[, 3]))
293 | #' fitted <- qtcatHit(pheno, geno)
294 | #'
295 | #' # QTC medoids
296 | #' medo <- medoQtc(fitted, geno)
297 | #' }
298 | #'
299 | #' @importFrom methods is
300 | #' @importFrom stats cor
301 | #' @export
302 | medoQtc <- function(object, geno, alpha = 0.05, min.absCor = 0.05) {
303 |   stopifnot(is(object, "qtcatHit"))
304 |   stopifnot(is(geno, "qtcatGeno"))
305 |   sigClust <- summary(object, alpha, min.absCor)
306 |   if (nrow(sigClust)) {
307 |     clusters <- split(rownames(sigClust), sigClust$clusters)
308 |     medoids <- sapply(clusters, function(names, geno) {
309 |       if (length(names) > 1L)
310 |         return(names[which.max(abs(cor(geno$x[, names])))])
311 |       else
312 |         return(names)
313 |     }, geno = geno)
314 |   } else {
315 |     medoids <- c()
316 |   }
317 |   medoids
318 | }
319 | 
320 | 
321 | #' @title Fitting a Linear Model to QTCs
322 | #'
323 | #' @description Linear model between phenotype and medoids of QTCs (significant SNP
324 | #' clusters).
325 | #'
326 | #' @param object an object of class \code{\link{qtcatHit}}.
327 | #' @param pheno an object of class \code{\link{qtcatPheno}}.
328 | #' @param geno an object of class \code{\link{qtcatGeno}}.
329 | #' @param alpha an alpha level for significance estimation.
330 | #' @param min.absCor minimum absolute value of correlation considered.
331 | #'
332 | #' @examples
333 | #' # If you want to run the examples, use:
334 | #' # example(lmQtc, run.dontrun = TRUE)
335 | #' \dontrun{
336 | #' # files containing example data for SNP data and the phenotype
337 | #' pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
338 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
339 | #' pdat <- read.csv(pfile, header = TRUE)
340 | #' snp <- read.snpData(gfile, sep = ",")
341 | #' clust <- qtcatClust(snp)
342 | #' geno <- qtcatGeno(snp, clust)
343 | #' pheno <- qtcatPheno(names = pdat[, 1],
344 | #'                     pheno = pdat[, 2],
345 | #'                     covariates = model.matrix(~ pdat[, 3]))
346 | #' fitted <- qtcatHit(pheno, geno)
347 | #'
348 | #' # fitting a LM to the phenotype and QTC medoids
349 | #' lmfitted <- lmQtc(fitted, pheno, geno)
350 | #' }
351 | #'
352 | #' @importFrom methods is
353 | #' @importFrom stats lm
354 | #' @export
355 | lmQtc <- function(object, pheno, geno, alpha = 0.05, min.absCor = 0.05) {
356 |   stopifnot(is(object, "qtcatHit"))
357 |   stopifnot(is(pheno, "qtcatPheno"))
358 |   stopifnot(is(geno, "qtcatGeno"))
359 |   id <- intersect(pheno$names, rownames(geno$x))
360 |   phenoInx <- which(pheno$names %in% id)
361 |   if (!length(id))
362 |     stop("The ID intersect of 'pheno' and 'geno' is emty")
363 |   if (length(id.uniqueGeno <- setdiff(rownames(geno$x), id)))
364 |     cat("The following individuals are part of 'geno' but not of 'pheno':\n",
365 |         paste(id.uniqueGeno, collapse = " "), "\n")
366 |   if (length(id.uniquePheno <- setdiff(pheno$names, id)))
367 |     cat("The following individuals are part of 'pheno' but not of 'geno':\n",
368 |         paste(id.uniquePheno, collapse = " "), "\n")
369 |   medoids <- medoQtc(object, geno, alpha, min.absCor)
370 |   if (length(medoids)) {
371 |     xg <- geno$x[, colnames(geno$x) %in% medoids, drop = FALSE]
372 |     genoInx <- match(pheno$names[phenoInx], rownames(xg))
373 |     rownames(xg) <- NULL
374 |     if (ncol(pheno$covariates)) {
375 |       dat <- data.frame(y = pheno$pheno[phenoInx],
376 |                         pheno$covariates[phenoInx, ],
377 |                         xg[genoInx, ])
378 |     } else {
379 |       dat <- data.frame(y = pheno$pheno[phenoInx], xg[genoInx, ])
380 |     }
381 |   } else if (ncol(pheno$covariates)) {
382 |     dat <- data.frame(y = pheno$pheno[phenoInx],
383 |                       pheno$covariates[phenoInx, ])
384 |   } else {
385 |     dat <- data.frame(y = pheno$pheno[phenoInx])
386 |   }
387 |   out <- lm(y ~ ., data = dat)
388 |   out
389 | }
390 | 
391 | 
392 | #' @title Plot resulting QTCs of the Hierarchical Inference Test
393 | #'
394 | #' @description Plot the QTCs (significant cluster of SNPs) at their
395 | #' position at the genome.
396 | #'
397 | #' @param x an object of class \code{\link{qtcatHit}}.
398 | #' @param alpha an alpha level for significance estimation.
399 | #' @param xlab a title for the x axis.
400 | #' @param ylab a title for the y axis.
401 | #' @param col.axis colors for axis line, tick marks, and title respectively.
402 | #' @param ... other graphical parameters may also be passed as arguments to this function.
403 | #'
404 | #' @examples
405 | #' # If you want to run the examples, use:
406 | #' # example(plotQtc, run.dontrun = TRUE)
407 | #' \dontrun{
408 | #' # files containing example data for SNP data and the phenotype
409 | #' pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
410 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
411 | #' pdat <- read.csv(pfile, header = TRUE)
412 | #' snp <- read.snpData(gfile, sep = ",")
413 | #' clust <- qtcatClust(snp)
414 | #' geno <- qtcatGeno(snp, clust)
415 | #' pheno <- qtcatPheno(names = pdat[, 1],
416 | #'                     pheno = pdat[, 2],
417 | #'                     covariates = model.matrix(~ pdat[, 3]))
418 | #' fitted <- qtcatHit(pheno, geno)
419 | #'
420 | #' # Plot the QTCs (loci37, loci260, and loci367 are causal)
421 | #' plotQtc(fitted)
422 | #' }
423 | #'
424 | #' @importFrom graphics plot axis mtext
425 | #' @export
426 | plotQtc <- function(x, alpha = 0.05, xlab = "Chromosomes",
427 |                     ylab = expression(-log[10](italic(p))), col.axis = NULL, ...) {
428 |   stopifnot(is(x, "qtcatHit"))
429 |   # make positions linear with gaps between chr's
430 |   pos <- snpInfo(x)[, 1:2]
431 |   chrminmax <- vapply(split(pos[, 2L], pos[, 1L]), function(x) c(min(x), max(x)), c(1, 2))
432 |   chrgap <- sum(chrminmax[2L, ]) * .01
433 |   chrsize <- cumsum(c(0, chrminmax[2L, -ncol(chrminmax)])) -
434 |     cumsum(chrminmax[1L, ]) +
435 |     cumsum(c(0, rep(chrgap, ncol(chrminmax) - 1L)))
436 |   chr <- sort(unique(pos[, 1L]))
437 |   for (i in seq_along(chr)) {
438 |     inx <- which(pos[, 1L] == chr[i])
439 |     pos[inx, 2L] <- pos[inx, 2L] + chrsize[i]
440 |   }
441 |   chrstartend <- vapply(split(pos[, 2L], pos[, 1L]), function(x) c(min(x), max(x)), c(1, 2))
442 |   xlim <- c(chrstartend[1L, 1L] - chrgap, chrstartend[2L, ncol(chrstartend)] + chrgap)
443 |   # get QTCs from result
444 |   qtc <- qtcatQtc(x, alpha = alpha, min.absCor = .01)
445 |   for (i in seq_along(chr)) {
446 |     inx2 <- which(qtc[, 1L] == chr[i])
447 |     qtc[inx2, 2L] <- qtc[inx2, 2L] + chrsize[i]
448 |   }
449 |   qtc$pValues[qtc$pValues] <- qtc$pValues[qtc$pValues] + 1e-308
450 |   qtc$log.pValues <- -log10(qtc$pValues)
451 |   # plot
452 |   plot(qtc$pos, qtc$log.pValues,  xlim = xlim, ylim = c(0, max(9, qtc$log.pValues) + 1),
453 |        axes = FALSE, xlab = "", ylab = "", ...)
454 |   # x
455 |   for (i in seq_along(chr))
456 |     axis(1, labels = FALSE, at = c(chrstartend[1, i], chrstartend[2, i]), col = col.axis)
457 |   axis(1, at = colMeans(chrstartend), labels = chr, col = NA, col.axis = col.axis)
458 |   mtext(xlab, 1, 2.5, col = col.axis)
459 |   # y
460 |   axis(2, col.axis = col.axis, col = col.axis)
461 |   mtext(expression(-log[10](italic(p))), 2, 2.5, col = col.axis)
462 | }
463 | 
464 | 
465 | #' @title Plot markers selection frequencies of the Hierarchical Inference Test
466 | #'
467 | #' @description Plot markers selection frequencies at their
468 | #' position at the genome.
469 | #'
470 | #' @param x an object of class \code{\link{qtcatHit}}.
471 | #' @param xlab a title for the x axis.
472 | #' @param ylab a title for the y axis.
473 | #' @param col.axis colors for axis line, tick marks, and title respectively.
474 | #' @param ... other graphical parameters may also be passed as arguments to this function.
475 | #'
476 | #' @examples
477 | #' # If you want to run the examples, use:
478 | #' # example(plotSelFreq, run.dontrun = TRUE)
479 | #' \dontrun{
480 | #' # files containing example data for SNP data and the phenotype
481 | #' pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
482 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
483 | #' pdat <- read.csv(pfile, header = TRUE)
484 | #' snp <- read.snpData(gfile, sep = ",")
485 | #' clust <- qtcatClust(snp)
486 | #' geno <- qtcatGeno(snp, clust)
487 | #' pheno <- qtcatPheno(names = pdat[, 1],
488 | #'                     pheno = pdat[, 2],
489 | #'                     covariates = model.matrix(~ pdat[, 3]))
490 | #' fitted <- qtcatHit(pheno, geno)
491 | #'
492 | #' # Plot the selection frequncy of markers (loci37, loci260, and loci367 are causal)
493 | #' plotSelFreq(fitted)
494 | #' }
495 | #'
496 | #' @importFrom graphics plot axis mtext
497 | #' @export
498 | plotSelFreq <- function(x, xlab = "Chromosomes", ylab = "Sel. freq.",
499 |                         col.axis = NULL, ...) {
500 |   stopifnot(is(x, "qtcatHit"))
501 |   # make positions linear with gaps between chr's
502 |   pos <- snpInfo(x)[, 1:2]
503 |   chrminmax <- vapply(split(pos[, 2L], pos[, 1L]), function(x) c(min(x), max(x)), c(1, 2))
504 |   chrgap <- sum(chrminmax[2L, ]) * .01
505 |   chrsize <- cumsum(c(0, chrminmax[2L, -ncol(chrminmax)])) -
506 |     cumsum(chrminmax[1L, ]) +
507 |     cumsum(c(0, rep(chrgap, ncol(chrminmax) - 1L)))
508 |   chr <- sort(unique(pos[, 1L]))
509 |   for (i in seq_along(chr)) {
510 |     inx <- which(pos[, 1L] == chr[i])
511 |     pos[inx, 2L] <- pos[inx, 2L] + chrsize[i]
512 |   }
513 |   chrstartend <- vapply(split(pos[, 2L], pos[, 1L]), function(x) c(min(x), max(x)), c(1, 2))
514 |   xlim <- c(chrstartend[1L, 1L] - chrgap, chrstartend[2L, ncol(chrstartend)] + chrgap)
515 |   # hit lasso selection fequency
516 |   inx <- which(x$selectFreq > 0)
517 |   selfreq <- cbind(pos[names(x$hier), 2L][inx], x$selectFreq[inx])
518 |   # hit lasso selection fequency plot
519 |   plot(selfreq, xlim = xlim, ylim = c(0, 1.1), axes = FALSE, xlab = "", ylab = "", ...)
520 |   # x
521 |   for (i in seq_along(chr))
522 |     axis(1, labels = FALSE, at = c(chrstartend[1, i], chrstartend[2, i]), col = col.axis)
523 |   axis(1, at = colMeans(chrstartend), labels = chr, col = NA, col.axis = col.axis)
524 |   mtext(xlab, 1, 2.5, col = col.axis)
525 |   # y
526 |   axis(2, at = c(0, .5,  1), col = col.axis, col.axis = col.axis)
527 |   mtext(ylab, 2, 2.5, col = col.axis)
528 | }
529 | 


--------------------------------------------------------------------------------
/R/qtcatPackage.R:
--------------------------------------------------------------------------------
 1 | #' @title Quantitative Trait Cluster Association Test
 2 | #'
 3 | #' @description All SNPs are jointly associated to the phenotype and at the same time
 4 | #' correlation among them is considered. Thus, correction for population structure becomes
 5 | #' unnecessary, which in many cases results in a power advantages compared to other methods.
 6 | #'
 7 | #' @author Jonas Klasen
 8 | #'
 9 | #' @useDynLib qtcat
10 | #' @importFrom Rcpp evalCpp
11 | #'
12 | #' @docType package
13 | #' @name qtcat-package
14 | #' @aliases qtcat
15 | NULL
16 | 


--------------------------------------------------------------------------------
/R/snpCluster.R:
--------------------------------------------------------------------------------
  1 | #' @title Correlation based distance between SNPs
  2 | #'
  3 | #' @description This function computes a distance matrix. The distance is estimated
  4 | #' as one minus the absolute value of the correlation coefficient \code{1 - abs(cor)}.
  5 | #'
  6 | #' @param snp an object of class \linkS4class{snpMatrix}.
  7 | #'
  8 | #' @details See \code{\link[stats]{dist}} for details about the output object.
  9 | #' @seealso \code{\link[stats]{dist}}
 10 | #'
 11 | #' @examples
 12 | #' # file containing example data for SNP data
 13 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
 14 | #' snp <- read.snpData(gfile, sep = ",")
 15 | #'
 16 | #' dist <- distCor(snp[, 1:10])
 17 | #'
 18 | #' @importFrom methods is
 19 | #' @export
 20 | distCor <- function(snp) {
 21 |   stopifnot(is(snp, "snpMatrix"))
 22 |   out <- corDists(snp@snpData)
 23 |   attr(out,"Labels") <- colnames(snp)
 24 |   attr(out,"Size") <- ncol(snp)
 25 |   attr(out,"Diag") <- FALSE
 26 |   attr(out,"Upper") <- FALSE
 27 |   attr(out,"method") <- "1-abs(cor(snp))"
 28 |   attr(out,"call") <- match.call()
 29 |   class(out) <- "dist"
 30 |   out
 31 | }
 32 | 
 33 | 
 34 | #' @title Perfect simiarity clusters of SNP
 35 | #'
 36 | #' @description Finds perfect similarity cluster of SNPs. This is specially usfull in
 37 | #' artificial crossing populations.
 38 | #'
 39 | #' @param snp an object of class \linkS4class{snpMatrix}.
 40 | #' @param mc.cores a positive integer for the number of cores for parallel computing. See
 41 | #' \code{\link[parallel]{mclapply}} for details.
 42 | #'
 43 | #' @examples
 44 | #' # file containing example data for SNP data
 45 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
 46 | #' snp <- read.snpData(gfile, sep = ",")
 47 | #'
 48 | #' ident <- identicals(snp)
 49 | #'
 50 | #' @importFrom parallel mclapply
 51 | #' @importFrom methods is
 52 | #' @importFrom stats optimise
 53 | #' @export
 54 | identicals <- function(snp, mc.cores = 1) {
 55 |   stopifnot(is(snp, "snpMatrix"))
 56 |   p <- ncol(snp@snpData)
 57 |   s <- optimise(function(s, p, m) p * s + p / s * (p / s - 1) / 2 * s / m,
 58 |                 interval = c(2, p - 1), p = p, m = mc.cores)$minimum
 59 |   step <- as.integer(p / (s + 1))
 60 |   preclust <- unlist(corPreIdenticals(snp@snpData, step), FALSE)
 61 |   kidenticals <- mclapply(preclust, function(i, snp) corIdenticals(snp, i),
 62 |                          snp = snp@snpData, mc.cores = mc.cores)
 63 |   identclust <- joinCorIdenticals(p, preclust, kidenticals)
 64 |   clust <- identclust[[1L]]
 65 |   names(clust) <- colnames(snp)
 66 |   out <- list(clusters = clust,
 67 |               medoids = colnames(snp)[identclust[[2L]] + 1])
 68 |   class(out) <- "identicals"
 69 |   out
 70 | }
 71 | 
 72 | 
 73 | #' @title K-medoids clustering of SNPs using randomized search
 74 | #'
 75 | #' @description Partitioning (clustering) into k clusters "around medoids" by randomized
 76 | #' search. \code{1-abs(cor)} is used as distance between SNPs.
 77 | #'
 78 | #' @param snp an object of class \linkS4class{snpMatrix}.
 79 | #' @param k a positive integer specifying the number of clusters, has to be greater than
 80 | #' one and less than the number of SNPs.
 81 | #' @param maxNeigbours a positive integer specifying the maximum number of randomized
 82 | #' searches.
 83 | #' @param nLocal a positive integer specifying the number of optimisation runs.
 84 | #' @param mc.cores a positive integer for the number of cores for parallel computing. See
 85 | #' \code{\link[parallel]{mclapply}} for details.
 86 | #'
 87 | #' @details The K-medoids clustering is implemented as clustering large applications based
 88 | #' upon randomized search (CLARANS) algorithm (Ng and Han 2002). CLARANS is a modification
 89 | #' of the partitioning around medoids (PAM) algorithm \code{\link[cluster]{pam}}. Where the
 90 | #' PAM algorithm is estimating all distances between SNPs and the respective medoids,
 91 | #' CLARANS is searching a random subset of the SNPs. This is independently repeated several
 92 | #' times and the result which minimises the average distance the most is reported. This
 93 | #' produces results close to those of the PAM algorithm (Ng and Han 2002), though the
 94 | #' number of runs and the subset size have to be arbitrarily chosen by the user. The
 95 | #' algorithm has two advantages: (i) the number of distance comparisons is dramatically
 96 | #' reduced; and (ii) parallelizing is straightforward.
 97 | #'
 98 | #' @references Ng and J. Han (2002). CLARANS: A method for clustering objects for spatial
 99 | #' data mining. \emph{IEEE Transactions on Knowledge and Data Engineering}.
100 | #' \url{http://dx.doi.org/10.1109/TKDE.2002.1033770}).
101 | #'
102 | #' @examples
103 | #' # file containing example data for SNP data
104 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
105 | #' snp <- read.snpData(gfile, sep = ",")
106 | #'
107 | #' clust <- clarans(snp, 3)
108 | #'
109 | #' @importFrom parallel mclapply
110 | #' @importFrom methods is
111 | #' @export
112 | clarans <- function(snp, k, maxNeigbours = 100, nLocal = 10, mc.cores = 1) {
113 |   stopifnot(is(snp, "snpMatrix"))
114 |   if (missing(k))
115 |     stop("'k' must be specifid")
116 |   if (k < 2L)
117 |     stop("'k' must be at least two")
118 |   # cluster optimisation by clarans in parallel
119 |   clarans.i <- function(i, snp, k, maxNeigbours) {
120 |     # cluster optimisation by clarans
121 |     out <- corClarans(snp@snpData, k, maxNeigbours)
122 |     out
123 |   }
124 |   out.nLocal <- mclapply(1L:nLocal, clarans.i,
125 |                          snp, k, maxNeigbours,
126 |                          mc.cores = mc.cores)
127 |   opt.func <- function(i, snp) {snp[[i]][[3L]]}
128 |   all.objectives <- sapply(1:nLocal, opt.func, out.nLocal)
129 |   out.opt <- out.nLocal[[which.min(all.objectives)]]
130 |   clusters <- out.opt[[1L]]
131 |   names(clusters) <- colnames(snp)
132 |   medoids <- out.opt[[2L]] + 1
133 |   names(medoids) <- colnames(snp)[medoids]
134 |   # output
135 |   out <- list(clusters = clusters,
136 |               medoids = medoids,
137 |               objective = out.opt[[3L]],
138 |               all.objectives = all.objectives)
139 |   class(out) <- "k-medoids"
140 |   out
141 | }
142 | 
143 | 
144 | #' @title Hierarchical clustering for big SNP data sets.
145 | #'
146 | #' @description A three step approximated hierarchical clustering of SNPs suitable to
147 | #' large data sets.
148 | #'
149 | #' @param snp an object of class \linkS4class{snpMatrix}.
150 | #' @param k a positive integer specifying the number of clusters, less than the number of
151 | #' observations.
152 | #' @param identicals logical, if zero clustering.
153 | #' @param maxNeigbours a positive integer, specifying the maximum number of randomized
154 | #' searches.
155 | #' @param nLocal a positive integer, specifying the number of optimisation runs. Columns
156 | #' have to be similar to \code{snp}.
157 | #' @param method see hclust.
158 | #' @param mc.cores a number of cores for parallel computing. See \code{mclapply} in package
159 | #' parallel for details.
160 | #' @param trace logical, if \code{TRUE} it prints current status of the program.
161 | #' @param ... additional argruments for \code{\link[fastcluster]{hclust}}
162 | #'
163 | #' @seealso clarans
164 | #'
165 | #' @examples
166 | #' # file containing example data for SNP data
167 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
168 | #' snp <- read.snpData(gfile, sep = ",")
169 | #'
170 | #' clust <- qtcatClust(snp)
171 | #'
172 | #' @importFrom fastcluster hclust
173 | #' @importFrom stats as.dendrogram
174 | #' @importFrom parallel mclapply
175 | #' @importFrom methods is
176 | #' @importFrom utils installed.packages
177 | #' @export
178 | qtcatClust <- function(snp, k, identicals = TRUE, maxNeigbours = 100, nLocal = 10,
179 |                        method = "complete", mc.cores = 1, trace = FALSE, ...) {
180 |   stopifnot(is(snp, "snpMatrix"))
181 |   if (identicals) {
182 |     # identicals
183 |     if (trace)
184 |       cat("Step 1: Search for identicals is running\n")
185 |     identicalFit <- identicals(snp, mc.cores)
186 |     snp <- snp[, identicalFit$medoids]
187 |   } else if (trace) {
188 |     cat("Step 1: Search for identicals is switch off\n")
189 |   }
190 |   # CLARANS
191 |   if (missing(k))
192 |     k <- as.integer(ncol(snp) / 10000L)
193 |   if (k >= 2L) {
194 |     if (identicals && length(identicalFit$medoids) <= k * 2)
195 |       stop("Number of medoids from pefect correlated clustering is < k * 2")
196 |     if (trace)
197 |       cat("Step 2: CLARANS is running, 'k' is:", k, "\n")
198 |     clarFit <- clarans(snp, k, maxNeigbours, nLocal, mc.cores)
199 |     if (trace)
200 |       cat("   objectives:",
201 |           format(clarFit$all.objectives, sort = TRUE, digits = 4), "\n")
202 |     # if cluster < 2 add to a bigger cluster
203 |     clust.inx <- seq_len(k)
204 |     cluster.size <- rep(NA, k)
205 |     for (i in clust.inx)
206 |       cluster.size[i] <- sum(clarFit$clusters == i)
207 |     smallclust  <- which(cluster.size < 2)
208 |     if (length(smallclust)) {
209 |       clust.inx <- clust.inx[-smallclust]
210 |       min.bigclust <- clust.inx[which.min(min(cluster.size[clust.inx]))]
211 |       clarFit$clusters[clarFit$clusters %in% smallclust] <- min.bigclust
212 |     }
213 |     # HClust
214 |     if (trace)
215 |       cat("Step 3: HClust is running\n")
216 |     hclust.sub <- function(i, snp, clarFit, method, ...) {
217 |       inx.i <- which(clarFit$clusters == i)
218 |       out <- as.dendrogram(hclust(distCor(snp[, inx.i]), method, ...))
219 |       out
220 |     }
221 |     hclustFit <- mclapply(clust.inx, hclust.sub,
222 |                           snp, clarFit, method, ...,
223 |                           mc.cores = mc.cores)
224 |     dendro <- do.call(merge, c(hclustFit, height = 1, adjust = "add.max"))
225 |   } else {
226 |     # HClust
227 |     if (trace)
228 |       cat("Step 2: CLARANS is switch off\nStep 3: HClust is running\n")
229 |     dendro <- as.dendrogram(hclust(distCor(snp), method, ...))
230 |   }
231 |   if (identicals) {
232 |     out <- list(dendrogram = dendro,
233 |                 clusters = identicalFit$clusters,
234 |                 medoids = identicalFit$medoids)
235 |   } else {
236 |     medos <- labels(dendro)
237 |     clust <- 1:length(medos)
238 |     names(clust) <- medos
239 |     out <- list(dendrogram = dendro,
240 |                 clusters = clust,
241 |                 medoids = medos)
242 |   }
243 |   class(out) <- "qtcatClust"
244 |   out
245 | }
246 | 
247 | 
248 | #' @title Cut a qtcatClust object
249 | #'
250 | #' @description Cut a qtcatClust object at an specific height.
251 | #'
252 | #' @param snp an object of class \linkS4class{snpMatrix}.
253 | #' @param snpClust an object of class \code{\link{qtcatClust}}.
254 | #' @param absCor a cutting height in absolute value of correlation.
255 | #'
256 | #' @examples
257 | #' # file containing example data for SNP data
258 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
259 | #' snp <- read.snpData(gfile, sep = ",")
260 | #' clust <- qtcatClust(snp)
261 | #'
262 | #' cclust <- cutClust(snp, clust, .5)
263 | #'
264 | #' @importFrom methods is
265 | #' @importFrom stats na.omit
266 | #' @export
267 | cutClust <- function(snp, snpClust, absCor = 1) {
268 |   stopifnot(is(snp, "snpMatrix"))
269 |   stopifnot(is(snpClust, "qtcatClust"))
270 |   stopifnot(!missing(absCor))
271 |   dend <- snpClust$dendrogram
272 |   clust <- snpClust$clusters
273 |   if ((1 - absCor) >= attr(dend, "height")) {
274 |     stop("'absCor' outside of range")
275 |   } else {
276 |     cut.dend  <- cut(dend, h = 1 - absCor)
277 |     clust.member <- function(i, dlist, clust) {
278 |       namesclust <- names(clust)[clust %in% unique(clust[labels(dlist[[i]])])]
279 |       clusti <- rep(i, length(namesclust))
280 |       names(clusti) <- namesclust
281 |       return(clusti)
282 |     }
283 |     clust <- unlist(lapply(1:length(cut.dend$lower), clust.member, cut.dend$lower, clust))
284 |     medo <- names(clust)[corMedoids(snp[, names(clust)]@snpData, clust)]
285 |     dend <- rename.leafs(cut.dend$upper, medo)
286 |     clust <- clust[na.omit(match(colnames(snp), names(clust)))]
287 |   }
288 |   out <- list(dendrogram = dend,
289 |               clusters = clust,
290 |               medoids = medo)
291 |   class(out) <- "qtcatClust"
292 |   out
293 | }
294 | 
295 | 
296 | #' @title Rename dendrogram leafs
297 | #'
298 | #' @description Rename dendrogram leafs.
299 | #'
300 | #' @param dend a dendrogram.
301 | #' @param labels a vector of new names.
302 | #'
303 | #' @importFrom stats dendrapply is.leaf
304 | #' @keywords internal
305 | rename.leafs <- function(dend, labels){
306 |   dendlabel <- function(n) {
307 |     if(is.leaf(n)) {
308 |       i <<- i + 1L
309 |       attr(n, "label") <- labels[i]
310 |     }
311 |     n
312 |   }
313 |   i <- 0L
314 |   dendrapply(dend, dendlabel)
315 | }
316 | 


--------------------------------------------------------------------------------
/R/snpData.R:
--------------------------------------------------------------------------------
  1 | #' @title Read SNP tables as a snpMatrix object
  2 | #'
  3 | #' @description Reads a file in table format and as a \linkS4class{snpMatrix} object.
  4 | #'
  5 | #' @param file the name of the file which the data are to be read from. If it does not
  6 | #' contain an absolute path, the file name is relative to the current working directory,
  7 | #' \code{getwd()}. Tilde-expansion is performed where supported.
  8 | #' @param sep a field separator character. Values on each line of the file are separated
  9 | #' by this character.
 10 | #' @param quote the set of quoting characters. To disable quoting altogether, use
 11 | #' \code{quote = ""}.
 12 | #' @param na.string a string which is  interpreted as NA value.
 13 | #' @param nrows a integer, the maximum number of rows to read.
 14 | #'
 15 | #' @examples
 16 | #' # file containing example data for SNP data
 17 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
 18 | #' snp <- read.snpData(gfile, sep = ",")
 19 | #'
 20 | #' @importFrom methods new
 21 | #' @export
 22 | read.snpData <- function(file, sep = " ",  quote = "\"",
 23 |                          na.string = "NA", nrows = -1L) {
 24 |   if (!file.exists(file))
 25 |     stop("No such file or directory")
 26 |   file <- normalizePath(file)
 27 |   testRead <- strsplit(readLines(file, n = 2L), sep)
 28 |   if (sep != "")
 29 |     testRead <- lapply(testRead, function(x, sep) gsub(sep, '',x), sep = sep)
 30 |   if (length(testRead[[1L]]) <= 3L)
 31 |     stop("In line one the separator character 'sep' doesn't exist")
 32 |   if (length(testRead[[1L]]) != length(testRead[[2L]]))
 33 |     stop("Line one and two are of differnt length")
 34 |   if (!identical(unique(testRead[[1L]]), testRead[[1L]]))
 35 |     stop("Column names are not unique")
 36 |   firstCols <- tolower(substring(testRead[[1L]][1L:3L], 1L, 3L))
 37 |   if (identical(firstCols, c("nam", "chr", "pos"))) {
 38 |     rowNames <- TRUE
 39 |     snp1 <- testRead[[2L]][-1L:-3L]
 40 |   } else if (identical(firstCols[1L:2L], c("chr", "pos"))) {
 41 |     rowNames <- FALSE
 42 |     snp1 <- testRead[[2L]][-1L:-2L]
 43 |   } else {
 44 |     stop("Either the first three columns contain 'names', 'chr', and 'pos' or alternatively the first two columns contain 'chr', and 'pos'")
 45 |   }
 46 |   if (any(nchar(snp1) != 2L))
 47 |     stop("Every position in the SNP-matrix has to be specified by two characters, missing values are not allowed")
 48 |   if (na.string !=  "") {
 49 |     na.string <- as.character(na.string)
 50 |     na.string[is.na(na.string)] <- "NA"
 51 |   }
 52 |   temp <- read_snpData(file, sep, quote, rowNames, na.string, nrows)
 53 |   if (!length(temp$lociNames)) {
 54 |     lociNames <- paste0("loci", seq_len(ncol(temp$snpData)))
 55 |   } else {
 56 |     lociNames <- make.unique(temp$lociNames)
 57 |   }
 58 |   chr <- suppressWarnings(as.numeric(temp$chr))
 59 |   if (any(is.na(chr)))
 60 |     chr <- temp$chr
 61 |   out <- new("snpMatrix",
 62 |              snpData = temp$snpData,
 63 |              snpInfo = data.frame(chr = chr,
 64 |                                    pos = temp$pos,
 65 |                                    allele = t(temp$alleles),
 66 |                                    row.names = lociNames),
 67 |              dim = dim(temp$snpData),
 68 |              dimnames = list(temp$indivNames, lociNames))
 69 |   out
 70 | }
 71 | 
 72 | 
 73 | #' @title A snpMatrix constructor
 74 | #'
 75 | #' @description Constructs a \linkS4class{snpMatrix} object from the given data.
 76 | #'
 77 | #' @param x a matrix with individuals in rows and SNPs in columns.
 78 | #' @param chr a vector with chromosoms at which SNPs are located.
 79 | #' @param pos a vector with genomic positions at which SNPs are located.
 80 | #' @param alleleCoding a coding scheme of \code{x} for hom (AA), het (AB), and hom (BB).
 81 | #' @param allele.1 labels of allele one, for each SNP.
 82 | #' @param allele.2 labels of allele two, for each SNP.
 83 | #'
 84 | #' @importFrom methods new
 85 | #' @importFrom stats na.omit
 86 | #' @export
 87 | as.snpMatrix <- function(x, chr, pos, alleleCoding = c(-1, 0, 1),
 88 |                        allele.1 = NULL, allele.2 = NULL) {
 89 |   if (!is.matrix(x))
 90 |     x <- as.matrix(x)
 91 |   if (is.null(colnames(x)))
 92 |     stop("Column names are missing for 'x'")
 93 |   if (ncol(x) < 2L)
 94 |     stop("'x' has less than two columns")
 95 |   if (missing(chr))
 96 |     stop("'chr' must be specified")
 97 |   if (missing(pos))
 98 |     stop("'pos' must be specified")
 99 |   if (!is.vector(alleleCoding))
100 |     stop("'alleleCoding' is not a vector")
101 |   x.allele <- na.omit(unique(c(x)))
102 |   if (!all(c(x.allele %in% alleleCoding, alleleCoding %in% x.allele)))
103 |     stop("'alleleCoding' do not match to 'x'")
104 |   if (is.null(allele.1) || is.null(allele.2)) {
105 |     allele.1 <- rep("A", ncol(x))
106 |     allele.2 <- rep("B", ncol(x))
107 |   }
108 |   if (is.null(rownames(x))) {
109 |     indiv.names <- paste0("indiv", seq_len(nrow(x)))
110 |   } else {
111 |     indiv.names <- rownames(x)
112 |   }
113 |   loci.names <- colnames(x)
114 |   attr(x, 'dimnames') <- NULL
115 |   nLabels <- length(alleleCoding)
116 |   if (nLabels == 2L) {
117 |     newLabels <- as.raw(c(1, 3))
118 |   } else if (nLabels == 3L) {
119 |     newLabels <- as.raw(c(1, 2, 3))
120 |   }
121 |   y <- matrix(raw(0), nrow(x), ncol(x))
122 |   for (i in 1:nLabels)
123 |     y[alleleCoding[i] == x] <- newLabels[i]
124 |   out <- new("snpMatrix",
125 |              snpData = y,
126 |              snpInfo = data.frame(chr = chr,
127 |                                   pos = pos,
128 |                                   allele.1 = allele.1,
129 |                                   allele.2 = allele.2,
130 |                                   row.names = loci.names),
131 |              dim = dim(y),
132 |              dimnames = list(indiv.names, loci.names))
133 |   out
134 | }
135 | 
136 | 
137 | #' @title Subsetting snpMatrix
138 | #'
139 | #' @description Subsetting an object of class \linkS4class{snpMatrix}.
140 | #'
141 | #' @param x  an object of class \linkS4class{snpMatrix}.
142 | #' @param i a indices specifying elements to extract or replace. Indices are booleans,
143 | #' numeric or character vectors.
144 | #' @param j indices specifying elements to extract or replace. Indices are booleans,
145 | #' numeric or character vectors.
146 | #' @param ... not implemented.
147 | #' @param drop not implemented.
148 | #'
149 | #' @importFrom methods setMethod signature new
150 | #' @export
151 | setMethod("[", signature(x = "snpMatrix", i = "ANY", j = "ANY", drop = "missing"),
152 |           function(x, i, j, ..., drop) {
153 |             if (!missing(i)) {
154 |               if (is.character(i)) {
155 |                 i <- match(i, rownames(x))
156 |               }
157 |             }
158 |             if (!missing(j)) {
159 |               if (is.character(j)) {
160 |                 j <- match(j, colnames(x))
161 |               }
162 |             }
163 |             snpData <- x@snpData[i, j, drop = FALSE]
164 |             out <- new("snpMatrix",
165 |                        snpData = snpData,
166 |                        snpInfo = x@snpInfo[j, ],
167 |                        dim = dim(snpData),
168 |                        dimnames = list(rownames(x)[i], colnames(x)[j]))
169 |             out
170 |           }
171 | )
172 | 
173 | 
174 | #' @title As matrix method for snpMatrix
175 | #'
176 | #' @description As matrix method for an object of class \linkS4class{snpMatrix}.
177 | #'
178 | #' @param x an object of class \linkS4class{snpMatrix}.
179 | #'
180 | #' @examples
181 | #' # file containing example data for SNP data
182 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
183 | #' snp <- read.snpData(gfile, sep = ",")
184 | #' snpmat <- as.matrix(snp)
185 | #'
186 | #' @importFrom methods setMethod signature
187 | #' @export
188 | setMethod("as.matrix", signature(x = "snpMatrix"),
189 |           function(x) {
190 |             out <- design(x@snpData)
191 |             dimnames(out) <- dimnames(x)
192 |             out
193 |           }
194 | )
195 | 
196 | 
197 | #' @title Get position from snpMatrix
198 | #'
199 | #' @description Genetic position info from an object of class
200 | #'  \linkS4class{snpMatrix}.
201 | #'
202 | #' @param object an object of class \linkS4class{snpMatrix}.
203 | #'
204 | #' @examples
205 | #' # file containing example data for SNP data
206 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
207 | #' snp <- read.snpData(gfile, sep = ",")
208 | #' info <- snpInfo(snp)
209 | #'
210 | #' @importFrom methods setMethod signature
211 | #' @export
212 | setMethod("snpInfo", signature(object = "snpMatrix"),
213 |           function(object) {
214 |             out <- object@snpInfo
215 |             if (is.null(out)) {
216 |               cat("No position information available")
217 |             }
218 |             out
219 |           }
220 | )
221 | 
222 | 
223 | #' @title Allele frequency
224 | #'
225 | #' @description Allele frequency an object of class \linkS4class{snpMatrix}.
226 | #'
227 | #' @param x an object of class \linkS4class{snpMatrix}.
228 | #' @param maf logical, if true minor allele frequency (default), other ways allele
229 | #' frequency of 'allele.1'.
230 | #'
231 | #' @examples
232 | #' # file containing example data for SNP data
233 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
234 | #' snp <- read.snpData(gfile, sep = ",")
235 | #' af <- alleleFreq(snp)
236 | #'
237 | #' @importFrom methods setMethod signature
238 | #' @export
239 | setMethod("alleleFreq", signature(x = "snpMatrix"),
240 |           function(x, maf = TRUE) {
241 |             out <- afreq(x@snpData, maf)
242 |             names(out) <- colnames(x)
243 |             out
244 |           }
245 | )
246 | 
247 | 
248 | #' @title Heterozygosity
249 | #'
250 | #' @description Heterozygosity an object of class \linkS4class{snpMatrix}.
251 | #'
252 | #' @param x an object of class \linkS4class{snpMatrix}.
253 | #' @param dim a integer for dimension. 1 (default) is for rows (individuals), 2 is for
254 | #' columns (SNPs).
255 | #'
256 | #' @examples
257 | #' # file containing example data for SNP data
258 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
259 | #' snp <- read.snpData(gfile, sep = ",")
260 | #' hf1 <- hetFreq(snp, 1)
261 | #' hf2 <- hetFreq(snp, 2)
262 | #'
263 | #' @importFrom methods setMethod signature
264 | #' @export
265 | setMethod("hetFreq", signature(x = "snpMatrix"),
266 |           function(x, dim = 1) {
267 |             if (dim != 1L && dim != 2L)
268 |               stop("'dim' must be '1' or '2'")
269 |             out <- hetfreq(x@snpData, dim)
270 |             if (dim == 1L)
271 |               names(out) <- rownames(x)
272 |             else if (dim == 2L)
273 |               names(out) <- colnames(x)
274 |             return(out)
275 |           }
276 | )
277 | 
278 | 
279 | #' @title NA frequency
280 | #'
281 | #' @description NA frequency in an object of class \linkS4class{snpMatrix}.
282 | #'
283 | #' @param x an object of class \linkS4class{snpMatrix}.
284 | #' @param dim a integer for dimension. 1 (default) is for rows (individuals), 2 is for
285 | #' columns (SNPs).
286 | #'
287 | #' @examples
288 | #' # file containing example data for SNP data
289 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
290 | #' snp <- read.snpData(gfile, sep = ",")
291 | #' na1 <- naFreq(snp, 1)
292 | #' na2 <- naFreq(snp, 2)
293 | #'
294 | #' @importFrom methods setMethod signature
295 | #' @export
296 | setMethod("naFreq", signature(x = "snpMatrix"),
297 |           function(x, dim = 1) {
298 |             if (dim != 1L && dim != 2L)
299 |               stop("'dim' must be '1' or '2'")
300 |             out <- nafreq(x@snpData, dim)
301 |             if (dim == 1L)
302 |               names(out) <- rownames(x)
303 |             else if (dim == 2L)
304 |               names(out) <- colnames(x)
305 |             return(out)
306 |           }
307 | )
308 | 


--------------------------------------------------------------------------------
/R/snpImpute.R:
--------------------------------------------------------------------------------
  1 | #' @title Impute allele in formation to SNPs with missing data
  2 | #'
  3 | #' @description Uses neighbor SNPs from the clustering hierarchy to impute alleles to
  4 | #' positions with missing values.
  5 | #'
  6 | #' @param snp an object of class \linkS4class{snpMatrix}.
  7 | #' @param snpClust an object of class \code{\link{qtcatClust}}.
  8 | #' @param min.absCor a minimum value of correlation. If missing values still exist if this
  9 | #' point in the hierarchy is reached, imputing is done via allele frequencies.
 10 | #' @param mc.cores a number of cores for parallelising. Theoretical maximum is
 11 | #' \code{'B'}. For details see \code{\link[parallel]{mclapply}}.
 12 | #'
 13 | #' @examples
 14 | #' # file containing example data for SNP data
 15 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
 16 | #' snp1 <- read.snpData(gfile, sep = ",")
 17 | #' # delete SNP information from Matrix, 33.33% NAs (-> 66.67% SNP info)
 18 | #' snp2 <- snp1
 19 | #' nainx <- sample(1:length(snp2@snpData), length(snp2@snpData) / 3)
 20 | #' snp2@snpData[nainx] <- as.raw(0)
 21 | #' # clustering
 22 | #' snp2clust <- qtcatClust(snp2)
 23 | #'
 24 | #' # imputing
 25 | #' snp3 <- imputeSnpMatrix(snp2, snp2clust)
 26 | #' # comparison of the full and the imputed data set
 27 | #' snpmat1 <- as.matrix(snp1)
 28 | #' snpmat3 <- as.matrix(snp3)
 29 | #' (1 - sum(abs(snpmat1- snpmat3)) / length(snpmat1)) * 100
 30 | #'
 31 | #' @importFrom hit as.hierarchy
 32 | #' @export
 33 | imputeSnpMatrix <- function(snp, snpClust, min.absCor = .1, mc.cores = 1) {
 34 |   stopifnot(is(snp, "snpMatrix"))
 35 |   stopifnot(is(snpClust, "qtcatClust"))
 36 |   snpnames <- colnames(snp)
 37 |   hier <- as.hierarchy(snpClust$dendrogram, names = snpnames)
 38 |   snp <- imputeMedoids(snp, snpClust, hier, min.absCor, mc.cores)
 39 |   # impute non medoid SNPs (if exist)
 40 |   nonMedo <- which(!(names(snpClust$clusters) %in% snpClust$medoids))
 41 |   if (length(nonMedo)) {
 42 |     flipAlleles <- as.numeric(alleleFreq(snp, FALSE) <= .5)
 43 |     snpList <- list()
 44 |     for (i in 1:ncol(snp)) {
 45 |       snpList[[i]] <- snp@snpData[, i]
 46 |       if (i %in% nonMedo) {
 47 |         m <- which(snpClust$medoids[snpClust$clusters[i]] == snpnames)
 48 |         js <- which(snpList[[i]] == is.raw(0))
 49 |         jAllele <- snp@snpData[js, m]
 50 |         if (flipAlleles[i] != flipAlleles[m]) {
 51 |           j1 <- which(jAllele == as.raw(1))
 52 |           j3 <- which(jAllele == as.raw(3))
 53 |           jAllele[j1] <- as.raw(3)
 54 |           jAllele[j3] <- as.raw(1)
 55 |         }
 56 |         snpList[[i]][js] <- jAllele
 57 |       }
 58 |     }
 59 |     snp@snpData <- do.call(cbind, snpList)
 60 |   }
 61 |   snp
 62 | }
 63 | 
 64 | 
 65 | #' @title Impute missing information at medoid SNPs
 66 | #'
 67 | #' @description Uses neighboring SNPs in the clustering hierarchy to impute alleles to
 68 | #' positions with missing values at medoid SNPs.
 69 | #'
 70 | #' @param snp an object of class \linkS4class{snpMatrix}.
 71 | #' @param snpClust an object of class \code{\link{qtcatClust}}.
 72 | #' @param hier an object of class hierarchy.
 73 | #' @param min.absCor a minimum value of correlation. If missing values still exist if this
 74 | #' point in the hierarchy is reached, imputing is done via allele frequencies.
 75 | #' @param mc.cores a number of cores for parallelising. Theoretical maximum is
 76 | #' \code{'B'}. For details see \code{\link[parallel]{mclapply}}.
 77 | #'
 78 | #' @importFrom parallel mclapply
 79 | #' @keywords internal
 80 | imputeMedoids <- function(snp, snpClust, hier, min.absCor = .25, mc.cores = 1) {
 81 |   hierLeafs <- which(sapply(hier, function(x) is.null(attr(x, which = "subset"))))
 82 |   naSnps <- which(naFreq(snp, 2) > 0 & colnames(snp) %in% labels(hier))
 83 |   medoSnps <- names(snpClust$clusters) %in% snpClust$medoids
 84 |   flipAlleles <- as.numeric(alleleFreq(snp, FALSE) <= .5)
 85 |   # run thru all SNPs
 86 |   snpList <- mclapply(1:ncol(snp), imputeSnp,
 87 |                       snp, hier, hierLeafs, snpClust$clusters, medoSnps,
 88 |                       naSnps, flipAlleles, min.absCor,
 89 |                       mc.cores = mc.cores)
 90 |   snp@snpData <- do.call(cbind, snpList)
 91 |   snp
 92 | }
 93 | 
 94 | 
 95 | #' @title Impute missing information at a medoid SNPs from a group of neighbors
 96 | #'
 97 | #' @description Uses neighboring SNPs in the clustering hierarchy to impute as many as
 98 | #' possible alleles to positions with missing values at medoid SNPs.
 99 | #'
100 | #' @param inxSnpOfInt a vertor of the snp of interest.
101 | #' @param snp an object of class \linkS4class{snpMatrix}.
102 | #' @param hier an object of class hierarchy.
103 | #' @param hierLeafs a vector of leafs of the hierarchy.
104 | #' @param clust a named vector of clusters.
105 | #' @param medoSnps a vector of medo turue o false.
106 | #' @param naSnps a vector of NA indeces.
107 | #' @param flipAlleles a vertor of telling for each SNP if allele one has allele freq. > 0.5
108 | #' or not.
109 | #' @param min.absCor a minimum value of correlation. If missing values still exist if this
110 | #' point in the hierarchy is reached, imputing is done via allele frequencies.
111 | #'
112 | #' @keywords internal
113 | imputeSnp <- function(inxSnpOfInt, snp, hier, hierLeafs, clust, medoSnps, naSnps,
114 |                       flipAlleles, min.absCor) {
115 |   snpOfInt <- snp@snpData[, inxSnpOfInt]
116 |   if (medoSnps[inxSnpOfInt] && (inxSnpOfInt %in% naSnps)) {
117 |     unsolved <- TRUE
118 |     inxSnpsNotComp <- inxSnpOfInt
119 |     # check in clusters of identicals
120 |     inxSnpGrp <- which(clust == clust[inxSnpOfInt])
121 |     inxSnpsNotComp <- inxSnpGrp[!(inxSnpGrp %in% inxSnpsNotComp)]
122 |     if (length(inxSnpsNotComp)) {
123 |       temp <- imputeSnpIter(snp, snpOfInt, inxSnpsNotComp,
124 |                         flipAlleles[inxSnpOfInt], flipAlleles)
125 |       snpOfInt <- temp[[1L]]
126 |       unsolved <- temp[[2L]]
127 |     }
128 |     if (unsolved) {
129 |       # run thru the heirarchy until NAs of the SNP are filled with information or the
130 |       # height threshold is reached
131 |       hierSnpOfInt <- hierLeafs[sapply(hier[hierLeafs], function(x) any(x == inxSnpOfInt))]
132 |       super <- attr(hier[[hierSnpOfInt]], "superset")
133 |       inxSnpGrp <- hier[[super]]
134 |       h <- attr(inxSnpGrp, "height")
135 |       while (unsolved && h <= (1 - min.absCor)) {
136 |         inxSnpsNotComp <- c(inxSnpsNotComp, inxSnpsNotComp)
137 |         inxSnpsNotComp <- inxSnpGrp[!(inxSnpGrp %in% inxSnpsNotComp)]
138 |         if (length(inxSnpsNotComp)) {
139 |           temp <- imputeSnpIter(snp, snpOfInt, inxSnpsNotComp,
140 |                             flipAlleles[inxSnpOfInt], flipAlleles)
141 |           snpOfInt <- temp[[1L]]
142 |           unsolved <- temp[[2L]]
143 |         }
144 |         super <- attr(hier[[super]], "superset")
145 |         if (is.null(super))
146 |           break
147 |         inxSnpGrp <- hier[[super]]
148 |         h <- attr(inxSnpGrp, "height")
149 |       }
150 |     }
151 |     # if height threshold is reached use alle frequency for random imputing
152 |     if (unsolved) {
153 |       js <- which(snpOfInt == as.raw(0L))
154 |       alleleNo <- table(as.integer(snpOfInt), exclude = 0L)
155 |       alleles <- as.raw(names(alleleNo))
156 |       prob <- alleleNo / sum(alleleNo)
157 |       snpOfInt[js] <- sample(alleles, length(js), TRUE, prob)
158 |     }
159 |   }
160 |   snpOfInt
161 | }
162 | 
163 | 
164 | #' @title Impute missing information at a medoid SNPs from a group of neighbors
165 | #'
166 | #' @description Uses neighboring SNPs in the clustering hierarchy to impute as many as
167 | #' possible alleles to positions with missing values at medoid SNPs.
168 | #'
169 | #' @param snp an object of class \linkS4class{snpMatrix}.
170 | #' @param snpOfInt a vertor of the snp of interest.
171 | #' @param inxSnpsToComp a index of neighbors.
172 | #' @param snpOfIntFlip flip status of the snp of interest.
173 | #' @param flipAlleles a vertor of telling for each SNP if allele one has allele freq. > 0.5
174 | #' or not.
175 | #' @param min.absCor a minimum value of correlation. If missing values still exist if this
176 | #' point in the hierarchy is reached, imputing is done via allele frequencies.
177 | #'
178 | #' @keywords internal
179 | imputeSnpIter <- function(snp, snpOfInt, inxSnpsToComp, snpOfIntFlip, flipAlleles) {
180 |   unsolved <- TRUE
181 |   n <- length(inxSnpsToComp)
182 |   i <- 1L
183 |   while (unsolved && i <= n) {
184 |     js <- which(snpOfInt == as.raw(0L))
185 |     jAllele <- snp@snpData[js, inxSnpsToComp[i]]
186 |     if (snpOfIntFlip != flipAlleles[inxSnpsToComp[i]]) {
187 |       j1 <- which(jAllele == as.raw(1L))
188 |       j3 <- which(jAllele == as.raw(3L))
189 |       jAllele[j1] <- as.raw(3L)
190 |       jAllele[j3] <- as.raw(1L)
191 |     }
192 |     snpOfInt[js] <- jAllele
193 |     unsolved <- any(jAllele == as.raw(0L))
194 |     i <- i + 1L
195 |   }
196 |   list(snpOfInt, unsolved)
197 | }
198 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <a name="logo"/>
 2 | <div align="center">
 3 | <a href="https://github.com/QTCAT" target="_blank">
 4 | <img src="https://github.com/QTCAT/qtcat_logo/blob/master/logo/QTCAT_l.png?raw=true" width="600"></img>
 5 | </a>
 6 | </div>
 7 | 
 8 | --------------------------------------------------------------------------------
 9 | 
10 | # Quantitative Trait Cluster Association Test
11 | This project has no active development and is unmaintained.
12 | 
13 | 
14 | ## Description:
15 | All SNPs are jointly associated to the phenotype and at the same time correlation among 
16 | them is considered. Thus, correction for population structure becomes unnecessary, which in 
17 | many cases results in a power advantages compared to other methods.
18 | 
19 | **Klasen, J. R. et al. (2016)**. *A multi-marker association method for genome-wide 
20 | association studies without the need for population structure correction*. Nature 
21 | Communications. [Paper](http://www.nature.com/articles/ncomms13299)
22 | 
23 | ## Installation:
24 | The package can be installed from an R console via [`devtools`](https://github.com/hadley/devtools#updating-to-the-latest-version-of-devtools)
25 | (If you haven't yet installed `devtools` please do so first). 
26 | 
27 | ```R
28 | # install.packages("devtools")
29 | devtools::install_github("QTCAT/qtcat") 
30 | 
31 | ```
32 | 
33 | ## Example:
34 | The `qtcatQtc`-function example gives an overview of the functionality of 
35 | the package and can be accessed once the package is loaded.
36 | 
37 | ```R
38 | library(qtcat)
39 | example(qtcatQtc, run.dontrun = TRUE)
40 | 
41 | ```
42 | The data used in the example can be found under [inst/extdata](https://github.com/QTCAT/qtcat/tree/master/inst/extdata) 
43 | 
44 | There is also a [Arabidopsis example](https://github.com/QTCAT/qtcat.data) available.
45 | 
46 | --------------------------------------------------------------------------------
47 | 
48 | [![License](https://img.shields.io/badge/license-GPL%20%28%3E=%202%29-brightgreen.svg)](https://www.gnu.org/licenses/gpl-2.0.html)
49 | &copy; 2016 JR Klasen
50 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | # DO NOT CHANGE the "init" and "install" sections below
 2 | 
 3 | # Download script file from GitHub
 4 | init:
 5 |   ps: |
 6 |         $ErrorActionPreference = "Stop"
 7 |         Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1"
 8 |         Import-Module '..\appveyor-tool.ps1'
 9 | 
10 | install:
11 |   ps: Bootstrap
12 | 
13 | # Adapt as necessary starting from here
14 | 
15 | environment:
16 |   global:
17 |     WARNINGS_ARE_ERRORS: 1
18 | 
19 |   matrix:
20 |   - R_VERSION: release
21 |     R_ARCH: x64
22 | 
23 |   - R_VERSION: patched
24 | 
25 | build_script:
26 |   - travis-tool.sh install_deps
27 |   - travis-tool.sh install_github QTCAT/hit
28 | 
29 | test_script:
30 |   - travis-tool.sh run_tests
31 | 
32 | on_failure:
33 |   - 7z a failure.zip *.Rcheck\*
34 |   - appveyor PushArtifact failure.zip
35 | 
36 | artifacts:
37 |   - path: '*.Rcheck\**\*.log'
38 |     name: Logs
39 | 
40 |   - path: '*.Rcheck\**\*.out'
41 |     name: Logs
42 | 
43 |   - path: '*.Rcheck\**\*.fail'
44 |     name: Logs
45 | 
46 |   - path: '*.Rcheck\**\*.Rout'
47 |     name: Logs
48 | 
49 |   - path: '\*_*.tar.gz'
50 |     name: Bits
51 | 
52 |   - path: '\*_*.zip'
53 |     name: Bits
54 | 


--------------------------------------------------------------------------------
/inst/extdata/phenodata.csv:
--------------------------------------------------------------------------------
  1 | name,phenotype,replication
  2 | Indiv1,-0.712864882314292,I
  3 | Indiv2,4.05700145305166,I
  4 | Indiv3,2.68457752825909,I
  5 | Indiv4,-0.978722003892421,I
  6 | Indiv5,2.11833460559996,I
  7 | Indiv6,4.69596379863478,I
  8 | Indiv7,1.81492380500098,I
  9 | Indiv8,8.33973545250998,I
 10 | Indiv9,3.21727105571782,I
 11 | Indiv10,1.75615681901381,I
 12 | Indiv11,2.20340927227642,I
 13 | Indiv12,3.29008788696637,I
 14 | Indiv13,5.22242372780827,I
 15 | Indiv14,3.9616977925181,I
 16 | Indiv15,6.4184630775582,I
 17 | Indiv16,2.52617864225462,I
 18 | Indiv17,2.71675134747193,I
 19 | Indiv18,9.08630499778771,I
 20 | Indiv19,3.01776165508853,I
 21 | Indiv20,2.12967794232666,I
 22 | Indiv21,6.19881599227966,I
 23 | Indiv22,2.60912725791431,I
 24 | Indiv23,6.74137903134634,I
 25 | Indiv24,5.43854561008488,I
 26 | Indiv25,5.63126674897179,I
 27 | Indiv26,5.3700172742071,I
 28 | Indiv27,2.97767963958376,I
 29 | Indiv28,5.02236976056541,I
 30 | Indiv29,8.73017558942648,I
 31 | Indiv30,7.67488336483054,I
 32 | Indiv31,4.41931825555875,I
 33 | Indiv32,5.76329762287536,I
 34 | Indiv33,5.69297223264722,I
 35 | Indiv34,4.43795994668944,I
 36 | Indiv35,0.546417450296849,I
 37 | Indiv36,5.14011399361257,I
 38 | Indiv37,5.06931125177772,I
 39 | Indiv38,2.6091396053827,I
 40 | Indiv39,5.27906736993309,I
 41 | Indiv40,7.59780299611099,I
 42 | Indiv41,3.6971545640507,I
 43 | Indiv42,4.30324604452561,I
 44 | Indiv43,3.59785828183773,I
 45 | Indiv44,5.09832009873271,I
 46 | Indiv45,3.76780907857732,I
 47 | Indiv46,0.844484552582176,I
 48 | Indiv47,4.74916534660669,I
 49 | Indiv48,5.67195293439532,I
 50 | Indiv49,2.93964234735589,I
 51 | Indiv50,4.90681722956387,I
 52 | Indiv51,3.74911685226859,I
 53 | Indiv52,1.92666742002455,I
 54 | Indiv53,4.60404498058726,I
 55 | Indiv54,3.53578869920731,I
 56 | Indiv55,4.36984245698348,I
 57 | Indiv56,3.26681032690425,I
 58 | Indiv57,4.18302568636384,I
 59 | Indiv58,3.70971187197379,I
 60 | Indiv59,2.27999270087654,I
 61 | Indiv60,3.05181399805921,I
 62 | Indiv61,2.43549816911518,I
 63 | Indiv62,2.34800409902198,I
 64 | Indiv63,5.31821730546112,I
 65 | Indiv64,-0.713122051020195,I
 66 | Indiv65,4.50338394253097,I
 67 | Indiv66,4.62922637029452,I
 68 | Indiv67,2.24256710689108,I
 69 | Indiv68,3.59909039731827,I
 70 | Indiv69,6.20966957535184,I
 71 | Indiv70,4.00384637556956,I
 72 | Indiv71,2.6824335168794,I
 73 | Indiv72,6.83166810179122,I
 74 | Indiv73,2.49389386433214,I
 75 | Indiv74,2.96109283271708,I
 76 | Indiv75,5.90123212589518,I
 77 | Indiv76,4.42447675875507,I
 78 | Indiv77,4.96833384873987,I
 79 | Indiv78,7.12738639539622,I
 80 | Indiv79,4.42632757597075,I
 81 | Indiv80,5.27826128836297,I
 82 | Indiv81,4.2832569668242,I
 83 | Indiv82,2.35271359835162,I
 84 | Indiv83,6.75091008120157,I
 85 | Indiv84,3.39601817943395,I
 86 | Indiv85,0.792062124318047,I
 87 | Indiv86,4.49112716920309,I
 88 | Indiv87,5.48080731018814,I
 89 | Indiv88,5.53831911353581,I
 90 | Indiv89,3.8073404958687,I
 91 | Indiv90,8.11792267567341,I
 92 | Indiv91,5.24450049724668,I
 93 | Indiv92,8.06636067197744,I
 94 | Indiv93,3.76240790598956,I
 95 | Indiv94,4.57106970441781,I
 96 | Indiv95,0.930562501758845,I
 97 | Indiv96,5.89207888898974,I
 98 | Indiv97,4.76510770162231,I
 99 | Indiv98,6.13315220689909,I
100 | Indiv99,5.15975639021241,I
101 | Indiv100,4.97858970802809,I
102 | Indiv1,0.176206906929316,II
103 | Indiv2,3.85129791007974,II
104 | Indiv3,0.89679511088741,II
105 | Indiv4,3.50501419315986,II
106 | Indiv5,2.74144800827057,II
107 | Indiv6,2.89870296149524,II
108 | Indiv7,2.83813356575242,II
109 | Indiv8,6.69393926686158,II
110 | Indiv9,0.445190879937969,II
111 | Indiv10,1.81268165056905,II
112 | Indiv11,4.18471268209546,II
113 | Indiv12,3.54037632849923,II
114 | Indiv13,7.54041530100953,II
115 | Indiv14,5.86490355924926,II
116 | Indiv15,0.541745439242708,II
117 | Indiv16,2.69319603761256,II
118 | Indiv17,4.212108002269,II
119 | Indiv18,5.48944504498334,II
120 | Indiv19,4.55532775386045,II
121 | Indiv20,4.42315823029467,II
122 | Indiv21,4.20476815196306,II
123 | Indiv22,7.19077440353425,II
124 | Indiv23,7.85927666811885,II
125 | Indiv24,4.7649811402712,II
126 | Indiv25,5.33833106226095,II
127 | Indiv26,9.51369408166689,II
128 | Indiv27,2.92203513544632,II
129 | Indiv28,6.32725491154529,II
130 | Indiv29,6.95849521528492,II
131 | Indiv30,4.49056269636047,II
132 | Indiv31,4.64224668507529,II
133 | Indiv32,6.49903099179706,II
134 | Indiv33,2.76900451424559,II
135 | Indiv34,5.10198462028238,II
136 | Indiv35,1.33235508902282,II
137 | Indiv36,3.99616006339745,II
138 | Indiv37,6.9482881756113,II
139 | Indiv38,2.21201425259469,II
140 | Indiv39,3.30168447239249,II
141 | Indiv40,5.70770160656621,II
142 | Indiv41,5.51655628789299,II
143 | Indiv42,3.22099848037914,II
144 | Indiv43,6.83821240540428,II
145 | Indiv44,1.05611152692071,II
146 | Indiv45,2.3793891845802,II
147 | Indiv46,3.486717253706,II
148 | Indiv47,4.45135169448376,II
149 | Indiv48,1.70633725047442,II
150 | Indiv49,7.78933321655033,II
151 | Indiv50,5.93711762234058,II
152 | Indiv51,2.63282544384585,II
153 | Indiv52,5.12336178663091,II
154 | Indiv53,3.13670189353915,II
155 | Indiv54,0.575601823259657,II
156 | Indiv55,5.3577809898076,II
157 | Indiv56,3.64994372110131,II
158 | Indiv57,1.95200830822741,II
159 | Indiv58,2.20290632954696,II
160 | Indiv59,3.98471425274528,II
161 | Indiv60,1.46737912491768,II
162 | Indiv61,2.92074054315639,II
163 | Indiv62,3.92929791664308,II
164 | Indiv63,2.19530471281698,II
165 | Indiv64,1.13804915005779,II
166 | Indiv65,4.15958191761742,II
167 | Indiv66,3.99613481122336,II
168 | Indiv67,1.52917842389308,II
169 | Indiv68,3.82975810221317,II
170 | Indiv69,2.06278495341624,II
171 | Indiv70,5.73101019063431,II
172 | Indiv71,5.43947438490607,II
173 | Indiv72,3.03603922924573,II
174 | Indiv73,2.288226558559,II
175 | Indiv74,2.09090076017277,II
176 | Indiv75,5.00102930990027,II
177 | Indiv76,5.97477767911573,II
178 | Indiv77,5.70601446968208,II
179 | Indiv78,5.02323274457706,II
180 | Indiv79,5.44894123457085,II
181 | Indiv80,5.43591657545378,II
182 | Indiv81,2.51784481532873,II
183 | Indiv82,3.10671706906678,II
184 | Indiv83,8.72487282279824,II
185 | Indiv84,1.27498342463781,II
186 | Indiv85,3.7876125919506,II
187 | Indiv86,5.12474566303836,II
188 | Indiv87,5.2718274186796,II
189 | Indiv88,7.85916592955344,II
190 | Indiv89,4.91608517979929,II
191 | Indiv90,5.01900455357883,II
192 | Indiv91,4.07100812407142,II
193 | Indiv92,6.6107211538395,II
194 | Indiv93,3.51727716642754,II
195 | Indiv94,4.1753329858646,II
196 | Indiv95,3.0353098664466,II
197 | Indiv96,3.32270339015034,II
198 | Indiv97,6.26606071672542,II
199 | Indiv98,7.52163666481343,II
200 | Indiv99,3.20389084994409,II
201 | Indiv100,5.63788046229403,II
202 | Indiv1,2.02029439261033,III
203 | Indiv2,1.3310022466908,III
204 | Indiv3,1.88109268308407,III
205 | Indiv4,2.0747606653117,III
206 | Indiv5,0.71136509236296,III
207 | Indiv6,5.69396750256412,III
208 | Indiv7,3.88933752587519,III
209 | Indiv8,5.40513402423043,III
210 | Indiv9,2.9082007446537,III
211 | Indiv10,5.01449395435981,III
212 | Indiv11,0.404922306483191,III
213 | Indiv12,5.16241985621497,III
214 | Indiv13,6.30167091059181,III
215 | Indiv14,3.02761398080251,III
216 | Indiv15,4.78362934229522,III
217 | Indiv16,2.62219124504208,III
218 | Indiv17,2.20120211280138,III
219 | Indiv18,7.81735976730744,III
220 | Indiv19,5.47309512628714,III
221 | Indiv20,0.326478522987693,III
222 | Indiv21,4.17417013688955,III
223 | Indiv22,4.6634139839633,III
224 | Indiv23,5.11293773267319,III
225 | Indiv24,3.83476050228082,III
226 | Indiv25,6.10600197974025,III
227 | Indiv26,4.88123830253315,III
228 | Indiv27,5.93398647240647,III
229 | Indiv28,6.79058601815249,III
230 | Indiv29,4.08974418429866,III
231 | Indiv30,6.36966316478642,III
232 | Indiv31,6.77187881311789,III
233 | Indiv32,4.2356086186249,III
234 | Indiv33,1.89829225573216,III
235 | Indiv34,8.16035874246949,III
236 | Indiv35,-0.56002492101901,III
237 | Indiv36,4.5986854641122,III
238 | Indiv37,6.90187522768069,III
239 | Indiv38,0.209604370609421,III
240 | Indiv39,4.77510809790155,III
241 | Indiv40,8.50368809488841,III
242 | Indiv41,3.08657316770405,III
243 | Indiv42,3.52485436085774,III
244 | Indiv43,5.98808046655028,III
245 | Indiv44,1.16351360622373,III
246 | Indiv45,7.69353448228304,III
247 | Indiv46,4.93097609274219,III
248 | Indiv47,3.15220320609129,III
249 | Indiv48,2.68712804101995,III
250 | Indiv49,7.71421329580518,III
251 | Indiv50,2.87368671700662,III
252 | Indiv51,3.94179831275021,III
253 | Indiv52,5.10124526695668,III
254 | Indiv53,2.92799161101486,III
255 | Indiv54,0.96951652051614,III
256 | Indiv55,6.14384764255529,III
257 | Indiv56,2.73228444251163,III
258 | Indiv57,4.41322465728952,III
259 | Indiv58,6.15032268108312,III
260 | Indiv59,0.877839915335282,III
261 | Indiv60,2.92317102138906,III
262 | Indiv61,1.94411888999608,III
263 | Indiv62,0.188646943059274,III
264 | Indiv63,3.99239953854629,III
265 | Indiv64,4.5826438337587,III
266 | Indiv65,2.52512157516696,III
267 | Indiv66,7.04756283086153,III
268 | Indiv67,3.35134198261861,III
269 | Indiv68,3.26565145307141,III
270 | Indiv69,4.13587004514256,III
271 | Indiv70,6.50598959703055,III
272 | Indiv71,1.21868886446736,III
273 | Indiv72,3.63896545676015,III
274 | Indiv73,4.93741312391645,III
275 | Indiv74,-1.87541241257004,III
276 | Indiv75,3.31873034768445,III
277 | Indiv76,4.52245145391965,III
278 | Indiv77,5.11374257847258,III
279 | Indiv78,5.48074339885391,III
280 | Indiv79,6.41669576635036,III
281 | Indiv80,2.89227943325531,III
282 | Indiv81,4.59381627240809,III
283 | Indiv82,5.33877112921947,III
284 | Indiv83,5.36755681458729,III
285 | Indiv84,2.69754684643688,III
286 | Indiv85,1.13013661082002,III
287 | Indiv86,5.16369038077365,III
288 | Indiv87,3.88016289688369,III
289 | Indiv88,7.60604371237198,III
290 | Indiv89,1.74340317670462,III
291 | Indiv90,7.43364608481622,III
292 | Indiv91,5.57022305368588,III
293 | Indiv92,4.62469210352858,III
294 | Indiv93,4.84210640577904,III
295 | Indiv94,3.54461123156264,III
296 | Indiv95,-0.0820107650207224,III
297 | Indiv96,5.4477657365215,III
298 | Indiv97,7.78709432577761,III
299 | Indiv98,5.84336889734014,III
300 | Indiv99,4.62500167547687,III
301 | Indiv100,4.9071448387676,III
302 | 


--------------------------------------------------------------------------------
/man/alleleFreq-snpMatrix-method.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/snpData.R
 3 | \docType{methods}
 4 | \name{alleleFreq,snpMatrix-method}
 5 | \alias{alleleFreq,snpMatrix-method}
 6 | \title{Allele frequency}
 7 | \usage{
 8 | \S4method{alleleFreq}{snpMatrix}(x, maf = TRUE)
 9 | }
10 | \arguments{
11 | \item{x}{an object of class \linkS4class{snpMatrix}.}
12 | 
13 | \item{maf}{logical, if true minor allele frequency (default), other ways allele
14 | frequency of 'allele.1'.}
15 | }
16 | \description{
17 | Allele frequency an object of class \linkS4class{snpMatrix}.
18 | }
19 | \examples{
20 | # file containing example data for SNP data
21 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
22 | snp <- read.snpData(gfile, sep = ",")
23 | af <- alleleFreq(snp)
24 | 
25 | }
26 | 


--------------------------------------------------------------------------------
/man/alleleFreq.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AllGeneric.R
 3 | \name{alleleFreq}
 4 | \alias{alleleFreq}
 5 | \title{Allele Frequency.}
 6 | \usage{
 7 | alleleFreq(x, maf = TRUE)
 8 | }
 9 | \arguments{
10 | \item{x}{an object, for which a corresponding method exists.}
11 | 
12 | \item{maf}{logical, if true minor allele frequency (default), other ways allele
13 | frequency of 'allele.1'.}
14 | }
15 | \description{
16 | Frequency of alleles in data set.
17 | }
18 | 


--------------------------------------------------------------------------------
/man/as.matrix-snpMatrix-method.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/snpData.R
 3 | \docType{methods}
 4 | \name{as.matrix,snpMatrix-method}
 5 | \alias{as.matrix,snpMatrix-method}
 6 | \title{As matrix method for snpMatrix}
 7 | \usage{
 8 | \S4method{as.matrix}{snpMatrix}(x)
 9 | }
10 | \arguments{
11 | \item{x}{an object of class \linkS4class{snpMatrix}.}
12 | }
13 | \description{
14 | As matrix method for an object of class \linkS4class{snpMatrix}.
15 | }
16 | \examples{
17 | # file containing example data for SNP data
18 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
19 | snp <- read.snpData(gfile, sep = ",")
20 | snpmat <- as.matrix(snp)
21 | 
22 | }
23 | 


--------------------------------------------------------------------------------
/man/as.snpMatrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/snpData.R
 3 | \name{as.snpMatrix}
 4 | \alias{as.snpMatrix}
 5 | \title{A snpMatrix constructor}
 6 | \usage{
 7 | as.snpMatrix(x, chr, pos, alleleCoding = c(-1, 0, 1), allele.1 = NULL,
 8 |   allele.2 = NULL)
 9 | }
10 | \arguments{
11 | \item{x}{a matrix with individuals in rows and SNPs in columns.}
12 | 
13 | \item{chr}{a vector with chromosoms at which SNPs are located.}
14 | 
15 | \item{pos}{a vector with genomic positions at which SNPs are located.}
16 | 
17 | \item{alleleCoding}{a coding scheme of \code{x} for hom (AA), het (AB), and hom (BB).}
18 | 
19 | \item{allele.1}{labels of allele one, for each SNP.}
20 | 
21 | \item{allele.2}{labels of allele two, for each SNP.}
22 | }
23 | \description{
24 | Constructs a \linkS4class{snpMatrix} object from the given data.
25 | }
26 | 


--------------------------------------------------------------------------------
/man/clarans.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/snpCluster.R
 3 | \name{clarans}
 4 | \alias{clarans}
 5 | \title{K-medoids clustering of SNPs using randomized search}
 6 | \usage{
 7 | clarans(snp, k, maxNeigbours = 100, nLocal = 10, mc.cores = 1)
 8 | }
 9 | \arguments{
10 | \item{snp}{an object of class \linkS4class{snpMatrix}.}
11 | 
12 | \item{k}{a positive integer specifying the number of clusters, has to be greater than
13 | one and less than the number of SNPs.}
14 | 
15 | \item{maxNeigbours}{a positive integer specifying the maximum number of randomized
16 | searches.}
17 | 
18 | \item{nLocal}{a positive integer specifying the number of optimisation runs.}
19 | 
20 | \item{mc.cores}{a positive integer for the number of cores for parallel computing. See
21 | \code{\link[parallel]{mclapply}} for details.}
22 | }
23 | \description{
24 | Partitioning (clustering) into k clusters "around medoids" by randomized
25 | search. \code{1-abs(cor)} is used as distance between SNPs.
26 | }
27 | \details{
28 | The K-medoids clustering is implemented as clustering large applications based
29 | upon randomized search (CLARANS) algorithm (Ng and Han 2002). CLARANS is a modification
30 | of the partitioning around medoids (PAM) algorithm \code{\link[cluster]{pam}}. Where the
31 | PAM algorithm is estimating all distances between SNPs and the respective medoids,
32 | CLARANS is searching a random subset of the SNPs. This is independently repeated several
33 | times and the result which minimises the average distance the most is reported. This
34 | produces results close to those of the PAM algorithm (Ng and Han 2002), though the
35 | number of runs and the subset size have to be arbitrarily chosen by the user. The
36 | algorithm has two advantages: (i) the number of distance comparisons is dramatically
37 | reduced; and (ii) parallelizing is straightforward.
38 | }
39 | \examples{
40 | # file containing example data for SNP data
41 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
42 | snp <- read.snpData(gfile, sep = ",")
43 | 
44 | clust <- clarans(snp, 3)
45 | 
46 | }
47 | \references{
48 | Ng and J. Han (2002). CLARANS: A method for clustering objects for spatial
49 | data mining. \emph{IEEE Transactions on Knowledge and Data Engineering}.
50 | \url{http://dx.doi.org/10.1109/TKDE.2002.1033770}).
51 | }
52 | 


--------------------------------------------------------------------------------
/man/cutClust.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/snpCluster.R
 3 | \name{cutClust}
 4 | \alias{cutClust}
 5 | \title{Cut a qtcatClust object}
 6 | \usage{
 7 | cutClust(snp, snpClust, absCor = 1)
 8 | }
 9 | \arguments{
10 | \item{snp}{an object of class \linkS4class{snpMatrix}.}
11 | 
12 | \item{snpClust}{an object of class \code{\link{qtcatClust}}.}
13 | 
14 | \item{absCor}{a cutting height in absolute value of correlation.}
15 | }
16 | \description{
17 | Cut a qtcatClust object at an specific height.
18 | }
19 | \examples{
20 | # file containing example data for SNP data
21 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
22 | snp <- read.snpData(gfile, sep = ",")
23 | clust <- qtcatClust(snp)
24 | 
25 | cclust <- cutClust(snp, clust, .5)
26 | 
27 | }
28 | 


--------------------------------------------------------------------------------
/man/distCor.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/snpCluster.R
 3 | \name{distCor}
 4 | \alias{distCor}
 5 | \title{Correlation based distance between SNPs}
 6 | \usage{
 7 | distCor(snp)
 8 | }
 9 | \arguments{
10 | \item{snp}{an object of class \linkS4class{snpMatrix}.}
11 | }
12 | \description{
13 | This function computes a distance matrix. The distance is estimated
14 | as one minus the absolute value of the correlation coefficient \code{1 - abs(cor)}.
15 | }
16 | \details{
17 | See \code{\link[stats]{dist}} for details about the output object.
18 | }
19 | \examples{
20 | # file containing example data for SNP data
21 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
22 | snp <- read.snpData(gfile, sep = ",")
23 | 
24 | dist <- distCor(snp[, 1:10])
25 | 
26 | }
27 | \seealso{
28 | \code{\link[stats]{dist}}
29 | }
30 | 


--------------------------------------------------------------------------------
/man/hetFreq-snpMatrix-method.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/snpData.R
 3 | \docType{methods}
 4 | \name{hetFreq,snpMatrix-method}
 5 | \alias{hetFreq,snpMatrix-method}
 6 | \title{Heterozygosity}
 7 | \usage{
 8 | \S4method{hetFreq}{snpMatrix}(x, dim = 1)
 9 | }
10 | \arguments{
11 | \item{x}{an object of class \linkS4class{snpMatrix}.}
12 | 
13 | \item{dim}{a integer for dimension. 1 (default) is for rows (individuals), 2 is for
14 | columns (SNPs).}
15 | }
16 | \description{
17 | Heterozygosity an object of class \linkS4class{snpMatrix}.
18 | }
19 | \examples{
20 | # file containing example data for SNP data
21 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
22 | snp <- read.snpData(gfile, sep = ",")
23 | hf1 <- hetFreq(snp, 1)
24 | hf2 <- hetFreq(snp, 2)
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/man/hetFreq.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AllGeneric.R
 3 | \name{hetFreq}
 4 | \alias{hetFreq}
 5 | \title{Heterozygosity Frequency.}
 6 | \usage{
 7 | hetFreq(x, dim = 1L)
 8 | }
 9 | \arguments{
10 | \item{x}{an object, for which a corresponding method exists.}
11 | 
12 | \item{dim}{dimension over which heterozygosity is estimated. 1 (default) is for rows
13 | (individuals), 2 is for columns (SNPs).}
14 | }
15 | \description{
16 | Frequency of heterozygosity in data set.
17 | }
18 | 


--------------------------------------------------------------------------------
/man/identicals.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/snpCluster.R
 3 | \name{identicals}
 4 | \alias{identicals}
 5 | \title{Perfect simiarity clusters of SNP}
 6 | \usage{
 7 | identicals(snp, mc.cores = 1)
 8 | }
 9 | \arguments{
10 | \item{snp}{an object of class \linkS4class{snpMatrix}.}
11 | 
12 | \item{mc.cores}{a positive integer for the number of cores for parallel computing. See
13 | \code{\link[parallel]{mclapply}} for details.}
14 | }
15 | \description{
16 | Finds perfect similarity cluster of SNPs. This is specially usfull in
17 | artificial crossing populations.
18 | }
19 | \examples{
20 | # file containing example data for SNP data
21 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
22 | snp <- read.snpData(gfile, sep = ",")
23 | 
24 | ident <- identicals(snp)
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/man/imputeMedoids.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/snpImpute.R
 3 | \name{imputeMedoids}
 4 | \alias{imputeMedoids}
 5 | \title{Impute missing information at medoid SNPs}
 6 | \usage{
 7 | imputeMedoids(snp, snpClust, hier, min.absCor = 0.25, mc.cores = 1)
 8 | }
 9 | \arguments{
10 | \item{snp}{an object of class \linkS4class{snpMatrix}.}
11 | 
12 | \item{snpClust}{an object of class \code{\link{qtcatClust}}.}
13 | 
14 | \item{hier}{an object of class hierarchy.}
15 | 
16 | \item{min.absCor}{a minimum value of correlation. If missing values still exist if this
17 | point in the hierarchy is reached, imputing is done via allele frequencies.}
18 | 
19 | \item{mc.cores}{a number of cores for parallelising. Theoretical maximum is
20 | \code{'B'}. For details see \code{\link[parallel]{mclapply}}.}
21 | }
22 | \description{
23 | Uses neighboring SNPs in the clustering hierarchy to impute alleles to
24 | positions with missing values at medoid SNPs.
25 | }
26 | \keyword{internal}
27 | 


--------------------------------------------------------------------------------
/man/imputeSnp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/snpImpute.R
 3 | \name{imputeSnp}
 4 | \alias{imputeSnp}
 5 | \title{Impute missing information at a medoid SNPs from a group of neighbors}
 6 | \usage{
 7 | imputeSnp(inxSnpOfInt, snp, hier, hierLeafs, clust, medoSnps, naSnps,
 8 |   flipAlleles, min.absCor)
 9 | }
10 | \arguments{
11 | \item{inxSnpOfInt}{a vertor of the snp of interest.}
12 | 
13 | \item{snp}{an object of class \linkS4class{snpMatrix}.}
14 | 
15 | \item{hier}{an object of class hierarchy.}
16 | 
17 | \item{hierLeafs}{a vector of leafs of the hierarchy.}
18 | 
19 | \item{clust}{a named vector of clusters.}
20 | 
21 | \item{medoSnps}{a vector of medo turue o false.}
22 | 
23 | \item{naSnps}{a vector of NA indeces.}
24 | 
25 | \item{flipAlleles}{a vertor of telling for each SNP if allele one has allele freq. > 0.5
26 | or not.}
27 | 
28 | \item{min.absCor}{a minimum value of correlation. If missing values still exist if this
29 | point in the hierarchy is reached, imputing is done via allele frequencies.}
30 | }
31 | \description{
32 | Uses neighboring SNPs in the clustering hierarchy to impute as many as
33 | possible alleles to positions with missing values at medoid SNPs.
34 | }
35 | \keyword{internal}
36 | 


--------------------------------------------------------------------------------
/man/imputeSnpIter.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/snpImpute.R
 3 | \name{imputeSnpIter}
 4 | \alias{imputeSnpIter}
 5 | \title{Impute missing information at a medoid SNPs from a group of neighbors}
 6 | \usage{
 7 | imputeSnpIter(snp, snpOfInt, inxSnpsToComp, snpOfIntFlip, flipAlleles)
 8 | }
 9 | \arguments{
10 | \item{snp}{an object of class \linkS4class{snpMatrix}.}
11 | 
12 | \item{snpOfInt}{a vertor of the snp of interest.}
13 | 
14 | \item{inxSnpsToComp}{a index of neighbors.}
15 | 
16 | \item{snpOfIntFlip}{flip status of the snp of interest.}
17 | 
18 | \item{flipAlleles}{a vertor of telling for each SNP if allele one has allele freq. > 0.5
19 | or not.}
20 | 
21 | \item{min.absCor}{a minimum value of correlation. If missing values still exist if this
22 | point in the hierarchy is reached, imputing is done via allele frequencies.}
23 | }
24 | \description{
25 | Uses neighboring SNPs in the clustering hierarchy to impute as many as
26 | possible alleles to positions with missing values at medoid SNPs.
27 | }
28 | \keyword{internal}
29 | 


--------------------------------------------------------------------------------
/man/imputeSnpMatrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/snpImpute.R
 3 | \name{imputeSnpMatrix}
 4 | \alias{imputeSnpMatrix}
 5 | \title{Impute allele in formation to SNPs with missing data}
 6 | \usage{
 7 | imputeSnpMatrix(snp, snpClust, min.absCor = 0.1, mc.cores = 1)
 8 | }
 9 | \arguments{
10 | \item{snp}{an object of class \linkS4class{snpMatrix}.}
11 | 
12 | \item{snpClust}{an object of class \code{\link{qtcatClust}}.}
13 | 
14 | \item{min.absCor}{a minimum value of correlation. If missing values still exist if this
15 | point in the hierarchy is reached, imputing is done via allele frequencies.}
16 | 
17 | \item{mc.cores}{a number of cores for parallelising. Theoretical maximum is
18 | \code{'B'}. For details see \code{\link[parallel]{mclapply}}.}
19 | }
20 | \description{
21 | Uses neighbor SNPs from the clustering hierarchy to impute alleles to
22 | positions with missing values.
23 | }
24 | \examples{
25 | # file containing example data for SNP data
26 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
27 | snp1 <- read.snpData(gfile, sep = ",")
28 | # delete SNP information from Matrix, 33.33\% NAs (-> 66.67\% SNP info)
29 | snp2 <- snp1
30 | nainx <- sample(1:length(snp2@snpData), length(snp2@snpData) / 3)
31 | snp2@snpData[nainx] <- as.raw(0)
32 | # clustering
33 | snp2clust <- qtcatClust(snp2)
34 | 
35 | # imputing
36 | snp3 <- imputeSnpMatrix(snp2, snp2clust)
37 | # comparison of the full and the imputed data set
38 | snpmat1 <- as.matrix(snp1)
39 | snpmat3 <- as.matrix(snp3)
40 | (1 - sum(abs(snpmat1- snpmat3)) / length(snpmat1)) * 100
41 | 
42 | }
43 | 


--------------------------------------------------------------------------------
/man/lmQtc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/associationTest.R
 3 | \name{lmQtc}
 4 | \alias{lmQtc}
 5 | \title{Fitting a Linear Model to QTCs}
 6 | \usage{
 7 | lmQtc(object, pheno, geno, alpha = 0.05, min.absCor = 0.05)
 8 | }
 9 | \arguments{
10 | \item{object}{an object of class \code{\link{qtcatHit}}.}
11 | 
12 | \item{pheno}{an object of class \code{\link{qtcatPheno}}.}
13 | 
14 | \item{geno}{an object of class \code{\link{qtcatGeno}}.}
15 | 
16 | \item{alpha}{an alpha level for significance estimation.}
17 | 
18 | \item{min.absCor}{minimum absolute value of correlation considered.}
19 | }
20 | \description{
21 | Linear model between phenotype and medoids of QTCs (significant SNP
22 | clusters).
23 | }
24 | \examples{
25 | # If you want to run the examples, use:
26 | # example(lmQtc, run.dontrun = TRUE)
27 | \dontrun{
28 | # files containing example data for SNP data and the phenotype
29 | pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
30 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
31 | pdat <- read.csv(pfile, header = TRUE)
32 | snp <- read.snpData(gfile, sep = ",")
33 | clust <- qtcatClust(snp)
34 | geno <- qtcatGeno(snp, clust)
35 | pheno <- qtcatPheno(names = pdat[, 1],
36 |                     pheno = pdat[, 2],
37 |                     covariates = model.matrix(~ pdat[, 3]))
38 | fitted <- qtcatHit(pheno, geno)
39 | 
40 | # fitting a LM to the phenotype and QTC medoids
41 | lmfitted <- lmQtc(fitted, pheno, geno)
42 | }
43 | 
44 | }
45 | 


--------------------------------------------------------------------------------
/man/medoQtc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/associationTest.R
 3 | \name{medoQtc}
 4 | \alias{medoQtc}
 5 | \title{Find medoids of QTCs}
 6 | \usage{
 7 | medoQtc(object, geno, alpha = 0.05, min.absCor = 0.05)
 8 | }
 9 | \arguments{
10 | \item{object}{an object of class \code{\link{qtcatHit}}.}
11 | 
12 | \item{geno}{an object of class \code{\link{qtcatGeno}}.}
13 | 
14 | \item{alpha}{an alpha level for significance estimation.}
15 | 
16 | \item{min.absCor}{minimum absolute value of correlation considered.}
17 | }
18 | \description{
19 | Find a medoid of for each quantitative trait cluster (QTC).
20 | }
21 | \examples{
22 | # If you want to run the examples, use:
23 | # example(medoQtc, run.dontrun = TRUE)
24 | \dontrun{
25 | # files containing example data for SNP data and the phenotype
26 | pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
27 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
28 | pdat <- read.csv(pfile, header = TRUE)
29 | snp <- read.snpData(gfile, sep = ",")
30 | clust <- qtcatClust(snp)
31 | geno <- qtcatGeno(snp, clust)
32 | pheno <- qtcatPheno(names = pdat[, 1],
33 |                     pheno = pdat[, 2],
34 |                     covariates = model.matrix(~ pdat[, 3]))
35 | fitted <- qtcatHit(pheno, geno)
36 | 
37 | # QTC medoids
38 | medo <- medoQtc(fitted, geno)
39 | }
40 | 
41 | }
42 | 


--------------------------------------------------------------------------------
/man/naFreq-snpMatrix-method.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/snpData.R
 3 | \docType{methods}
 4 | \name{naFreq,snpMatrix-method}
 5 | \alias{naFreq,snpMatrix-method}
 6 | \title{NA frequency}
 7 | \usage{
 8 | \S4method{naFreq}{snpMatrix}(x, dim = 1)
 9 | }
10 | \arguments{
11 | \item{x}{an object of class \linkS4class{snpMatrix}.}
12 | 
13 | \item{dim}{a integer for dimension. 1 (default) is for rows (individuals), 2 is for
14 | columns (SNPs).}
15 | }
16 | \description{
17 | NA frequency in an object of class \linkS4class{snpMatrix}.
18 | }
19 | \examples{
20 | # file containing example data for SNP data
21 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
22 | snp <- read.snpData(gfile, sep = ",")
23 | na1 <- naFreq(snp, 1)
24 | na2 <- naFreq(snp, 2)
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/man/naFreq.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AllGeneric.R
 3 | \name{naFreq}
 4 | \alias{naFreq}
 5 | \title{Missing Data Frequency.}
 6 | \usage{
 7 | naFreq(x, dim = 1L)
 8 | }
 9 | \arguments{
10 | \item{x}{an object, for which a corresponding method exists.}
11 | 
12 | \item{dim}{dimension over which heterozygosity is estimated. 1 (default) is for rows
13 | (individuals), 2 is for columns (SNPs).}
14 | }
15 | \description{
16 | Frequency of missing Data in data set.
17 | }
18 | 


--------------------------------------------------------------------------------
/man/plotQtc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/associationTest.R
 3 | \name{plotQtc}
 4 | \alias{plotQtc}
 5 | \title{Plot resulting QTCs of the Hierarchical Inference Test}
 6 | \usage{
 7 | plotQtc(x, alpha = 0.05, xlab = "Chromosomes",
 8 |   ylab = expression(-log[10](italic(p))), col.axis = NULL, ...)
 9 | }
10 | \arguments{
11 | \item{x}{an object of class \code{\link{qtcatHit}}.}
12 | 
13 | \item{alpha}{an alpha level for significance estimation.}
14 | 
15 | \item{xlab}{a title for the x axis.}
16 | 
17 | \item{ylab}{a title for the y axis.}
18 | 
19 | \item{col.axis}{colors for axis line, tick marks, and title respectively.}
20 | 
21 | \item{...}{other graphical parameters may also be passed as arguments to this function.}
22 | }
23 | \description{
24 | Plot the QTCs (significant cluster of SNPs) at their
25 | position at the genome.
26 | }
27 | \examples{
28 | # If you want to run the examples, use:
29 | # example(plotQtc, run.dontrun = TRUE)
30 | \dontrun{
31 | # files containing example data for SNP data and the phenotype
32 | pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
33 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
34 | pdat <- read.csv(pfile, header = TRUE)
35 | snp <- read.snpData(gfile, sep = ",")
36 | clust <- qtcatClust(snp)
37 | geno <- qtcatGeno(snp, clust)
38 | pheno <- qtcatPheno(names = pdat[, 1],
39 |                     pheno = pdat[, 2],
40 |                     covariates = model.matrix(~ pdat[, 3]))
41 | fitted <- qtcatHit(pheno, geno)
42 | 
43 | # Plot the QTCs (loci37, loci260, and loci367 are causal)
44 | plotQtc(fitted)
45 | }
46 | 
47 | }
48 | 


--------------------------------------------------------------------------------
/man/plotSelFreq.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/associationTest.R
 3 | \name{plotSelFreq}
 4 | \alias{plotSelFreq}
 5 | \title{Plot markers selection frequencies of the Hierarchical Inference Test}
 6 | \usage{
 7 | plotSelFreq(x, xlab = "Chromosomes", ylab = "Sel. freq.", col.axis = NULL,
 8 |   ...)
 9 | }
10 | \arguments{
11 | \item{x}{an object of class \code{\link{qtcatHit}}.}
12 | 
13 | \item{xlab}{a title for the x axis.}
14 | 
15 | \item{ylab}{a title for the y axis.}
16 | 
17 | \item{col.axis}{colors for axis line, tick marks, and title respectively.}
18 | 
19 | \item{...}{other graphical parameters may also be passed as arguments to this function.}
20 | }
21 | \description{
22 | Plot markers selection frequencies at their
23 | position at the genome.
24 | }
25 | \examples{
26 | # If you want to run the examples, use:
27 | # example(plotSelFreq, run.dontrun = TRUE)
28 | \dontrun{
29 | # files containing example data for SNP data and the phenotype
30 | pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
31 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
32 | pdat <- read.csv(pfile, header = TRUE)
33 | snp <- read.snpData(gfile, sep = ",")
34 | clust <- qtcatClust(snp)
35 | geno <- qtcatGeno(snp, clust)
36 | pheno <- qtcatPheno(names = pdat[, 1],
37 |                     pheno = pdat[, 2],
38 |                     covariates = model.matrix(~ pdat[, 3]))
39 | fitted <- qtcatHit(pheno, geno)
40 | 
41 | # Plot the selection frequncy of markers (loci37, loci260, and loci367 are causal)
42 | plotSelFreq(fitted)
43 | }
44 | 
45 | }
46 | 


--------------------------------------------------------------------------------
/man/qtcat-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/qtcatPackage.R
 3 | \docType{package}
 4 | \name{qtcat-package}
 5 | \alias{qtcat-package}
 6 | \alias{qtcat}
 7 | \title{Quantitative Trait Cluster Association Test}
 8 | \description{
 9 | All SNPs are jointly associated to the phenotype and at the same time
10 | correlation among them is considered. Thus, correction for population structure becomes
11 | unnecessary, which in many cases results in a power advantages compared to other methods.
12 | }
13 | \author{
14 | Jonas Klasen
15 | }
16 | 


--------------------------------------------------------------------------------
/man/qtcatClust.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/snpCluster.R
 3 | \name{qtcatClust}
 4 | \alias{qtcatClust}
 5 | \title{Hierarchical clustering for big SNP data sets.}
 6 | \usage{
 7 | qtcatClust(snp, k, identicals = TRUE, maxNeigbours = 100, nLocal = 10,
 8 |   method = "complete", mc.cores = 1, trace = FALSE, ...)
 9 | }
10 | \arguments{
11 | \item{snp}{an object of class \linkS4class{snpMatrix}.}
12 | 
13 | \item{k}{a positive integer specifying the number of clusters, less than the number of
14 | observations.}
15 | 
16 | \item{identicals}{logical, if zero clustering.}
17 | 
18 | \item{maxNeigbours}{a positive integer, specifying the maximum number of randomized
19 | searches.}
20 | 
21 | \item{nLocal}{a positive integer, specifying the number of optimisation runs. Columns
22 | have to be similar to \code{snp}.}
23 | 
24 | \item{method}{see hclust.}
25 | 
26 | \item{mc.cores}{a number of cores for parallel computing. See \code{mclapply} in package
27 | parallel for details.}
28 | 
29 | \item{trace}{logical, if \code{TRUE} it prints current status of the program.}
30 | 
31 | \item{...}{additional argruments for \code{\link[fastcluster]{hclust}}}
32 | }
33 | \description{
34 | A three step approximated hierarchical clustering of SNPs suitable to
35 | large data sets.
36 | }
37 | \examples{
38 | # file containing example data for SNP data
39 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
40 | snp <- read.snpData(gfile, sep = ",")
41 | 
42 | clust <- qtcatClust(snp)
43 | 
44 | }
45 | \seealso{
46 | clarans
47 | }
48 | 


--------------------------------------------------------------------------------
/man/qtcatGeno.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/associationTest.R
 3 | \name{qtcatGeno}
 4 | \alias{qtcatGeno}
 5 | \title{A genotype object constructor}
 6 | \usage{
 7 | qtcatGeno(snp, snpClust, absCor, min.absCor = 0.5, mc.cores = 1)
 8 | }
 9 | \arguments{
10 | \item{snp}{an object of S4 class \linkS4class{snpMatrix}.}
11 | 
12 | \item{snpClust}{an object of class \code{\link{qtcatClust}}.}
13 | 
14 | \item{absCor}{a vector of absolute value of correlations considered in the hierarchy.}
15 | 
16 | \item{min.absCor}{a minimum absolute value of correlation which is considered. A value
17 | in the range from 0 to 1.}
18 | 
19 | \item{mc.cores}{a number of cores for parallelising. The maximum is \code{'B'}. For
20 | details see \code{\link[parallel]{mclapply}}.}
21 | }
22 | \description{
23 | Constructs a S3-object containing a SNP design matrix and a hierarchy. If
24 | a SNP in the input object contains missing data, the clustering is used to impute
25 | information from highly correlated neighbor SNPs. The genotype object is needed for
26 | \code{\link{qtcatHit}} as input.
27 | }
28 | \examples{
29 | # file containing example data for SNP data
30 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
31 | snp <- read.snpData(gfile, sep = ",")
32 | clust <- qtcatClust(snp)
33 | 
34 | # Construct geotype object
35 | geno <- qtcatGeno(snp, clust)
36 | 
37 | }
38 | 


--------------------------------------------------------------------------------
/man/qtcatHit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/associationTest.R
 3 | \name{qtcatHit}
 4 | \alias{qtcatHit}
 5 | \title{Fitting Hierarchical Inference Testing}
 6 | \usage{
 7 | qtcatHit(pheno, geno, B = 50, p.samp1 = 0.35, nfolds = 5,
 8 |   overall.lambda = FALSE, lambda.opt = "lambda.1se", alpha = 1,
 9 |   gamma = seq(0.05, 0.99, by = 0.01), max.p.esti = 1, seed = 12321,
10 |   mc.cores = 1, trace = FALSE, ...)
11 | }
12 | \arguments{
13 | \item{pheno}{an object of class \code{\link{qtcatPheno}}.}
14 | 
15 | \item{geno}{an object of class \code{\link{qtcatGeno}}.}
16 | 
17 | \item{B}{a integer indicating the number of sample-splits.}
18 | 
19 | \item{p.samp1}{a value specifying the fraction of data used for the LASSO sample-split.
20 | The ANOVA sample-split is \code{1 - p.samp1}.}
21 | 
22 | \item{nfolds}{Number of folds (default is 5). See \code{\link[glmnet]{cv.glmnet}} for
23 | more details.}
24 | 
25 | \item{overall.lambda}{Logical, if true, lambda is estimated once, if false (default),
26 | lambda is estimated for each sample split.}
27 | 
28 | \item{lambda.opt}{a criterion for optimum selection of cross validated lasso. Either
29 | "lambda.1se" (default) or "lambda.min". See
30 | \code{\link[glmnet]{cv.glmnet}} for more details.}
31 | 
32 | \item{alpha}{a single value in the range of 0 to 1 for the elastic net mixing parameter.}
33 | 
34 | \item{gamma}{a vector of gamma-values used in significance estimation.}
35 | 
36 | \item{max.p.esti}{a maximum for computed p-values. All p-values above this value are set
37 | to one. Small \code{max.p.esti} values reduce computing time.}
38 | 
39 | \item{seed}{a RNG seed, see \code{\link{set.seed}}.}
40 | 
41 | \item{mc.cores}{a number of cores for parallelising. The maximum is
42 | \code{'B'}. For details see \code{\link[parallel]{mclapply}}.}
43 | 
44 | \item{trace}{logical, if \code{TRUE} it prints the current status of the program.}
45 | 
46 | \item{...}{additional arguments for \code{\link[glmnet]{cv.glmnet}}.}
47 | }
48 | \description{
49 | Hierarchical inference testing for phenotype-SNP association.
50 | }
51 | \examples{
52 | # If you want to run the examples, use:
53 | # example(qtcatHit, run.dontrun = TRUE)
54 | \dontrun{
55 | # files containing example data for SNP data and the phenotype
56 | pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
57 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
58 | pdat <- read.csv(pfile, header = TRUE)
59 | snp <- read.snpData(gfile, sep = ",")
60 | clust <- qtcatClust(snp)
61 | geno <- qtcatGeno(snp, clust)
62 | pheno <- qtcatPheno(names = pdat[, 1],
63 |                     pheno = pdat[, 2],
64 |                     covariates = model.matrix(~ pdat[, 3]))
65 | 
66 | # fitting HIT
67 | fitted <- qtcatHit(pheno, geno)
68 | }
69 | 
70 | }
71 | 


--------------------------------------------------------------------------------
/man/qtcatPheno.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/associationTest.R
 3 | \name{qtcatPheno}
 4 | \alias{qtcatPheno}
 5 | \title{A phenotype object constructor}
 6 | \usage{
 7 | qtcatPheno(names, pheno, family = "gaussian", covariates = NULL)
 8 | }
 9 | \arguments{
10 | \item{names}{a vector of individual names of length 'n'.}
11 | 
12 | \item{pheno}{a vector of length 'n' or a matrix size 'n x 2' in case of binomial family.
13 | This contains the phenotypic observations.}
14 | 
15 | \item{family}{a character string specifying the family of the phenotype distribution.
16 | Either "gaussian" (default) or "binomial".}
17 | 
18 | \item{covariates}{a matrix typically generated by a call of
19 | \code{\link[stats]{model.matrix}}. It contain additional variables influencing the
20 | phenotype e.g. environmental and experimental covariates.}
21 | }
22 | \description{
23 | Constructs an S3-object containing phenotype and if additional covariats
24 | exist a design matrix of those. The phenotype object is needed as input for
25 | \code{\link{qtcatHit}}.
26 | }
27 | \examples{
28 | # file containing example data for a phenotype.
29 | pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
30 | pdat <- read.csv(pfile, header = TRUE)
31 | 
32 | # Construct phenotype object
33 | pheno <- qtcatPheno(names = pdat[, 1],
34 |                     pheno = pdat[, 2],
35 |                     covariates = model.matrix(~ pdat[, 3]))
36 | 
37 | }
38 | 


--------------------------------------------------------------------------------
/man/qtcatQtc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/associationTest.R
 3 | \name{qtcatQtc}
 4 | \alias{qtcatQtc}
 5 | \title{Summarize results of Hierarchical Inference Test}
 6 | \usage{
 7 | qtcatQtc(object, alpha = 0.05, min.absCor = 0.05)
 8 | }
 9 | \arguments{
10 | \item{object}{an object of class \code{\link{qtcatHit}}.}
11 | 
12 | \item{alpha}{an alpha level for significance estimation.}
13 | 
14 | \item{min.absCor}{a minimum absolute value of correlation to be considered.}
15 | }
16 | \description{
17 | Summarizing the QTCs (significant cluster of SNPs) and their position at
18 | the genome.
19 | }
20 | \examples{
21 | # If you want to run the examples, use:
22 | # example(qtcatQtc, run.dontrun = TRUE)
23 | \dontrun{
24 | # files containing example data for SNP data and the phenotype
25 | pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
26 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
27 | pdat <- read.csv(pfile, header = TRUE)
28 | snp <- read.snpData(gfile, sep = ",")
29 | clust <- qtcatClust(snp)
30 | geno <- qtcatGeno(snp, clust)
31 | pheno <- qtcatPheno(names = pdat[, 1],
32 |                     pheno = pdat[, 2],
33 |                     covariates = model.matrix(~ pdat[, 3]))
34 | fitted <- qtcatHit(pheno, geno)
35 | 
36 | # Summarizing the QTCs (loci37, loci260, and loci367 are causal)
37 | qtcatQtc(fitted)
38 | }
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/man/read.snpData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/snpData.R
 3 | \name{read.snpData}
 4 | \alias{read.snpData}
 5 | \title{Read SNP tables as a snpMatrix object}
 6 | \usage{
 7 | read.snpData(file, sep = " ", quote = "\\"", na.string = "NA",
 8 |   nrows = -1L)
 9 | }
10 | \arguments{
11 | \item{file}{the name of the file which the data are to be read from. If it does not
12 | contain an absolute path, the file name is relative to the current working directory,
13 | \code{getwd()}. Tilde-expansion is performed where supported.}
14 | 
15 | \item{sep}{a field separator character. Values on each line of the file are separated
16 | by this character.}
17 | 
18 | \item{quote}{the set of quoting characters. To disable quoting altogether, use
19 | \code{quote = ""}.}
20 | 
21 | \item{na.string}{a string which is  interpreted as NA value.}
22 | 
23 | \item{nrows}{a integer, the maximum number of rows to read.}
24 | }
25 | \description{
26 | Reads a file in table format and as a \linkS4class{snpMatrix} object.
27 | }
28 | \examples{
29 | # file containing example data for SNP data
30 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
31 | snp <- read.snpData(gfile, sep = ",")
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/man/rename.leafs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/snpCluster.R
 3 | \name{rename.leafs}
 4 | \alias{rename.leafs}
 5 | \title{Rename dendrogram leafs}
 6 | \usage{
 7 | rename.leafs(dend, labels)
 8 | }
 9 | \arguments{
10 | \item{dend}{a dendrogram.}
11 | 
12 | \item{labels}{a vector of new names.}
13 | }
14 | \description{
15 | Rename dendrogram leafs.
16 | }
17 | \keyword{internal}
18 | 


--------------------------------------------------------------------------------
/man/snpInfo-qtcatHit-method.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/associationTest.R
 3 | \docType{methods}
 4 | \name{snpInfo,qtcatHit-method}
 5 | \alias{snpInfo,qtcatHit-method}
 6 | \title{Get position from qtcatHit object}
 7 | \usage{
 8 | \S4method{snpInfo}{qtcatHit}(object)
 9 | }
10 | \arguments{
11 | \item{object}{an object of class \code{\link{qtcatHit}}.}
12 | }
13 | \description{
14 | Genetic position info from an object of class qtcatHit.
15 | }
16 | 


--------------------------------------------------------------------------------
/man/snpInfo-snpMatrix-method.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/snpData.R
 3 | \docType{methods}
 4 | \name{snpInfo,snpMatrix-method}
 5 | \alias{snpInfo,snpMatrix-method}
 6 | \title{Get position from snpMatrix}
 7 | \usage{
 8 | \S4method{snpInfo}{snpMatrix}(object)
 9 | }
10 | \arguments{
11 | \item{object}{an object of class \linkS4class{snpMatrix}.}
12 | }
13 | \description{
14 | Genetic position info from an object of class
15 |  \linkS4class{snpMatrix}.
16 | }
17 | \examples{
18 | # file containing example data for SNP data
19 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
20 | snp <- read.snpData(gfile, sep = ",")
21 | info <- snpInfo(snp)
22 | 
23 | }
24 | 


--------------------------------------------------------------------------------
/man/snpInfo.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AllGeneric.R
 3 | \name{snpInfo}
 4 | \alias{snpInfo}
 5 | \title{Extract genomic position and allele information.}
 6 | \usage{
 7 | snpInfo(object)
 8 | }
 9 | \arguments{
10 | \item{object}{an object, for which a corresponding method exists.}
11 | }
12 | \description{
13 | Extract genomic position and allele information from object.
14 | }
15 | 


--------------------------------------------------------------------------------
/man/snpMatrix-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AllClass.R
 3 | \docType{class}
 4 | \name{snpMatrix-class}
 5 | \alias{snpMatrix-class}
 6 | \title{A S4 class to represent a SNP-matrix}
 7 | \description{
 8 | A S4 class to represent a SNP matrix. Storing SNP information, by using a
 9 | byte-level (raw) storage scheme, jointly with genomic position and allele information.
10 | }
11 | \section{Slots}{
12 | 
13 | \describe{
14 | \item{\code{snpData}}{a matrix of SNPs stored in type 'raw'. 00 is NA, 01 homozygote AA, 02
15 | heterozygote AB, and 03 homozygote BB.}
16 | 
17 | \item{\code{snpInfo}}{data.frame with four columns. The first col. contains the chromosomes,
18 | the second col. the positions, the third col. the first allele and the fourth the second
19 | allele.}
20 | 
21 | \item{\code{dim}}{an integer vector with exactly two non-negative values.}
22 | 
23 | \item{\code{dimnames}}{a list of length two; each component containing NULL or a character vector
24 | length equal the corresponding dim element.}
25 | }}
26 | 
27 | 


--------------------------------------------------------------------------------
/man/sub-snpMatrix-ANY-ANY-missing-method.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/snpData.R
 3 | \docType{methods}
 4 | \name{[,snpMatrix,ANY,ANY,missing-method}
 5 | \alias{[,snpMatrix,ANY,ANY,missing-method}
 6 | \title{Subsetting snpMatrix}
 7 | \usage{
 8 | \S4method{[}{snpMatrix,ANY,ANY,missing}(x, i, j, ..., drop = TRUE)
 9 | }
10 | \arguments{
11 | \item{x}{an object of class \linkS4class{snpMatrix}.}
12 | 
13 | \item{i}{a indices specifying elements to extract or replace. Indices are booleans,
14 | numeric or character vectors.}
15 | 
16 | \item{j}{indices specifying elements to extract or replace. Indices are booleans,
17 | numeric or character vectors.}
18 | 
19 | \item{...}{not implemented.}
20 | 
21 | \item{drop}{not implemented.}
22 | }
23 | \description{
24 | Subsetting an object of class \linkS4class{snpMatrix}.
25 | }
26 | 


--------------------------------------------------------------------------------
/src/RcppExports.cpp:
--------------------------------------------------------------------------------
  1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
  2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
  3 | 
  4 | #include <Rcpp.h>
  5 | 
  6 | using namespace Rcpp;
  7 | 
  8 | // read_snpData
  9 | Rcpp::List read_snpData(Rcpp::CharacterVector file, char sep, char quote, bool rowNames, Rcpp::CharacterVector na_str, int nrows);
 10 | RcppExport SEXP _qtcat_read_snpData(SEXP fileSEXP, SEXP sepSEXP, SEXP quoteSEXP, SEXP rowNamesSEXP, SEXP na_strSEXP, SEXP nrowsSEXP) {
 11 | BEGIN_RCPP
 12 |     Rcpp::RObject rcpp_result_gen;
 13 |     Rcpp::RNGScope rcpp_rngScope_gen;
 14 |     Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type file(fileSEXP);
 15 |     Rcpp::traits::input_parameter< char >::type sep(sepSEXP);
 16 |     Rcpp::traits::input_parameter< char >::type quote(quoteSEXP);
 17 |     Rcpp::traits::input_parameter< bool >::type rowNames(rowNamesSEXP);
 18 |     Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type na_str(na_strSEXP);
 19 |     Rcpp::traits::input_parameter< int >::type nrows(nrowsSEXP);
 20 |     rcpp_result_gen = Rcpp::wrap(read_snpData(file, sep, quote, rowNames, na_str, nrows));
 21 |     return rcpp_result_gen;
 22 | END_RCPP
 23 | }
 24 | // corDist
 25 | double corDist(RawVector x, RawVector y);
 26 | RcppExport SEXP _qtcat_corDist(SEXP xSEXP, SEXP ySEXP) {
 27 | BEGIN_RCPP
 28 |     Rcpp::RObject rcpp_result_gen;
 29 |     Rcpp::RNGScope rcpp_rngScope_gen;
 30 |     Rcpp::traits::input_parameter< RawVector >::type x(xSEXP);
 31 |     Rcpp::traits::input_parameter< RawVector >::type y(ySEXP);
 32 |     rcpp_result_gen = Rcpp::wrap(corDist(x, y));
 33 |     return rcpp_result_gen;
 34 | END_RCPP
 35 | }
 36 | // corDists
 37 | NumericVector corDists(RawMatrix x);
 38 | RcppExport SEXP _qtcat_corDists(SEXP xSEXP) {
 39 | BEGIN_RCPP
 40 |     Rcpp::RObject rcpp_result_gen;
 41 |     Rcpp::RNGScope rcpp_rngScope_gen;
 42 |     Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP);
 43 |     rcpp_result_gen = Rcpp::wrap(corDists(x));
 44 |     return rcpp_result_gen;
 45 | END_RCPP
 46 | }
 47 | // corPreIdenticals
 48 | List corPreIdenticals(RawMatrix x, const int step);
 49 | RcppExport SEXP _qtcat_corPreIdenticals(SEXP xSEXP, SEXP stepSEXP) {
 50 | BEGIN_RCPP
 51 |     Rcpp::RObject rcpp_result_gen;
 52 |     Rcpp::RNGScope rcpp_rngScope_gen;
 53 |     Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP);
 54 |     Rcpp::traits::input_parameter< const int >::type step(stepSEXP);
 55 |     rcpp_result_gen = Rcpp::wrap(corPreIdenticals(x, step));
 56 |     return rcpp_result_gen;
 57 | END_RCPP
 58 | }
 59 | // corIdenticals
 60 | List corIdenticals(RawMatrix x, IntegerVector clustIdx);
 61 | RcppExport SEXP _qtcat_corIdenticals(SEXP xSEXP, SEXP clustIdxSEXP) {
 62 | BEGIN_RCPP
 63 |     Rcpp::RObject rcpp_result_gen;
 64 |     Rcpp::RNGScope rcpp_rngScope_gen;
 65 |     Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP);
 66 |     Rcpp::traits::input_parameter< IntegerVector >::type clustIdx(clustIdxSEXP);
 67 |     rcpp_result_gen = Rcpp::wrap(corIdenticals(x, clustIdx));
 68 |     return rcpp_result_gen;
 69 | END_RCPP
 70 | }
 71 | // joinCorIdenticals
 72 | List joinCorIdenticals(int n, List preclust, List ClustMedo);
 73 | RcppExport SEXP _qtcat_joinCorIdenticals(SEXP nSEXP, SEXP preclustSEXP, SEXP ClustMedoSEXP) {
 74 | BEGIN_RCPP
 75 |     Rcpp::RObject rcpp_result_gen;
 76 |     Rcpp::RNGScope rcpp_rngScope_gen;
 77 |     Rcpp::traits::input_parameter< int >::type n(nSEXP);
 78 |     Rcpp::traits::input_parameter< List >::type preclust(preclustSEXP);
 79 |     Rcpp::traits::input_parameter< List >::type ClustMedo(ClustMedoSEXP);
 80 |     rcpp_result_gen = Rcpp::wrap(joinCorIdenticals(n, preclust, ClustMedo));
 81 |     return rcpp_result_gen;
 82 | END_RCPP
 83 | }
 84 | // corClarans
 85 | List corClarans(RawMatrix x, const int k, const int maxNeigbours);
 86 | RcppExport SEXP _qtcat_corClarans(SEXP xSEXP, SEXP kSEXP, SEXP maxNeigboursSEXP) {
 87 | BEGIN_RCPP
 88 |     Rcpp::RObject rcpp_result_gen;
 89 |     Rcpp::RNGScope rcpp_rngScope_gen;
 90 |     Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP);
 91 |     Rcpp::traits::input_parameter< const int >::type k(kSEXP);
 92 |     Rcpp::traits::input_parameter< const int >::type maxNeigbours(maxNeigboursSEXP);
 93 |     rcpp_result_gen = Rcpp::wrap(corClarans(x, k, maxNeigbours));
 94 |     return rcpp_result_gen;
 95 | END_RCPP
 96 | }
 97 | // corMedoids
 98 | IntegerVector corMedoids(RawMatrix x, IntegerVector clusters);
 99 | RcppExport SEXP _qtcat_corMedoids(SEXP xSEXP, SEXP clustersSEXP) {
100 | BEGIN_RCPP
101 |     Rcpp::RObject rcpp_result_gen;
102 |     Rcpp::RNGScope rcpp_rngScope_gen;
103 |     Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP);
104 |     Rcpp::traits::input_parameter< IntegerVector >::type clusters(clustersSEXP);
105 |     rcpp_result_gen = Rcpp::wrap(corMedoids(x, clusters));
106 |     return rcpp_result_gen;
107 | END_RCPP
108 | }
109 | // design
110 | NumericMatrix design(RawMatrix x);
111 | RcppExport SEXP _qtcat_design(SEXP xSEXP) {
112 | BEGIN_RCPP
113 |     Rcpp::RObject rcpp_result_gen;
114 |     Rcpp::RNGScope rcpp_rngScope_gen;
115 |     Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP);
116 |     rcpp_result_gen = Rcpp::wrap(design(x));
117 |     return rcpp_result_gen;
118 | END_RCPP
119 | }
120 | // afreq
121 | NumericVector afreq(RawMatrix x, bool maf);
122 | RcppExport SEXP _qtcat_afreq(SEXP xSEXP, SEXP mafSEXP) {
123 | BEGIN_RCPP
124 |     Rcpp::RObject rcpp_result_gen;
125 |     Rcpp::RNGScope rcpp_rngScope_gen;
126 |     Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP);
127 |     Rcpp::traits::input_parameter< bool >::type maf(mafSEXP);
128 |     rcpp_result_gen = Rcpp::wrap(afreq(x, maf));
129 |     return rcpp_result_gen;
130 | END_RCPP
131 | }
132 | // hetfreq
133 | NumericVector hetfreq(RawMatrix x, int dim);
134 | RcppExport SEXP _qtcat_hetfreq(SEXP xSEXP, SEXP dimSEXP) {
135 | BEGIN_RCPP
136 |     Rcpp::RObject rcpp_result_gen;
137 |     Rcpp::RNGScope rcpp_rngScope_gen;
138 |     Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP);
139 |     Rcpp::traits::input_parameter< int >::type dim(dimSEXP);
140 |     rcpp_result_gen = Rcpp::wrap(hetfreq(x, dim));
141 |     return rcpp_result_gen;
142 | END_RCPP
143 | }
144 | // nafreq
145 | NumericVector nafreq(RawMatrix x, int dim);
146 | RcppExport SEXP _qtcat_nafreq(SEXP xSEXP, SEXP dimSEXP) {
147 | BEGIN_RCPP
148 |     Rcpp::RObject rcpp_result_gen;
149 |     Rcpp::RNGScope rcpp_rngScope_gen;
150 |     Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP);
151 |     Rcpp::traits::input_parameter< int >::type dim(dimSEXP);
152 |     rcpp_result_gen = Rcpp::wrap(nafreq(x, dim));
153 |     return rcpp_result_gen;
154 | END_RCPP
155 | }
156 | 
157 | static const R_CallMethodDef CallEntries[] = {
158 |     {"_qtcat_read_snpData", (DL_FUNC) &_qtcat_read_snpData, 6},
159 |     {"_qtcat_corDist", (DL_FUNC) &_qtcat_corDist, 2},
160 |     {"_qtcat_corDists", (DL_FUNC) &_qtcat_corDists, 1},
161 |     {"_qtcat_corPreIdenticals", (DL_FUNC) &_qtcat_corPreIdenticals, 2},
162 |     {"_qtcat_corIdenticals", (DL_FUNC) &_qtcat_corIdenticals, 2},
163 |     {"_qtcat_joinCorIdenticals", (DL_FUNC) &_qtcat_joinCorIdenticals, 3},
164 |     {"_qtcat_corClarans", (DL_FUNC) &_qtcat_corClarans, 3},
165 |     {"_qtcat_corMedoids", (DL_FUNC) &_qtcat_corMedoids, 2},
166 |     {"_qtcat_design", (DL_FUNC) &_qtcat_design, 1},
167 |     {"_qtcat_afreq", (DL_FUNC) &_qtcat_afreq, 2},
168 |     {"_qtcat_hetfreq", (DL_FUNC) &_qtcat_hetfreq, 2},
169 |     {"_qtcat_nafreq", (DL_FUNC) &_qtcat_nafreq, 2},
170 |     {NULL, NULL, 0}
171 | };
172 | 
173 | RcppExport void R_init_qtcat(DllInfo *dll) {
174 |     R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
175 |     R_useDynamicSymbols(dll, FALSE);
176 | }
177 | 


--------------------------------------------------------------------------------
/src/ReadData.cpp:
--------------------------------------------------------------------------------
  1 | #include <Rcpp.h>
  2 | #include <iostream>
  3 | #include <fstream>
  4 | #include <string>
  5 | 
  6 | using namespace std;
  7 | 
  8 | void split(const string &s, char delim, vector<string> &elems);
  9 | 
 10 | // [[Rcpp::export]]
 11 | Rcpp::List read_snpData(Rcpp::CharacterVector file, char sep, char quote,
 12 |                         bool rowNames, Rcpp::CharacterVector na_str,
 13 |                         int nrows) {
 14 |     string oneLine;
 15 |     vector<string> lineElements;
 16 |     const string fname = Rcpp::as<string>(file);
 17 |     const string naStr = Rcpp::as<string>(na_str);
 18 |     // Input file stream
 19 |     ifstream fileIn(fname.c_str());
 20 |     // Column names (Individual names)
 21 |     getline(fileIn, oneLine);
 22 |     // Delete qoutes if exist
 23 |     if (quote != ' ') {
 24 |         oneLine.erase(remove(oneLine.begin(), oneLine.end(), quote), oneLine.end());
 25 |     }
 26 |     // check if sep is part of first line
 27 |     if (oneLine.find(sep) == string::npos) {
 28 |         Rcpp::stop("In the first line the separator character 'sep' doesn't exist");
 29 |     }
 30 |     // split string in to vector of strings
 31 |     split(oneLine, sep, lineElements);
 32 |     unsigned int row = lineElements.size();
 33 |     // Individual names
 34 |     Rcpp::CharacterVector indivNames = Rcpp::wrap(lineElements);
 35 |     //
 36 |     int nosnpcount = 0;
 37 |     vector<string> lociNames;
 38 |     vector<string> chr;
 39 |     vector<int> pos;
 40 |     set<char> allelesSet;
 41 |     vector<char> allelepos;
 42 |     string AA;
 43 |     string AB;
 44 |     string BA;
 45 |     string BB;
 46 |     vector<char> snpData;
 47 |     int col = 0;
 48 |     int posStart = 0;
 49 |     int dataStart = 2;
 50 |     if (rowNames) {
 51 |         dataStart ++;
 52 |         posStart ++;
 53 |     }
 54 |     int line = 0;
 55 |     while(getline(fileIn, oneLine)) {
 56 |         line ++;
 57 |         lineElements.clear();
 58 |         allelesSet.clear();
 59 |         // Delete qoutes if exist
 60 |         if (quote != ' ') {
 61 |             oneLine.erase(remove(oneLine.begin(), oneLine.end(), quote), oneLine.end());
 62 |         }
 63 |         // split string in to vector of strings
 64 |         split(oneLine, sep, lineElements);
 65 |         // length of line
 66 |         if (row != lineElements.size()){
 67 |             Rcpp::stop("Error: Length of line ", line, " is ",
 68 |                 lineElements.size(), " instead of ", row);
 69 |         }
 70 |         // detect Nucleotides in this line (SNP)
 71 |         for (unsigned int i = dataStart; i < lineElements.size(); ++ i) {
 72 |             if (lineElements[i] != naStr) {
 73 |                 allelesSet.insert(lineElements[i].begin(), lineElements[i].end());
 74 |             }
 75 |         }
 76 |         vector<char> allele(allelesSet.begin(), allelesSet.end());
 77 |         // if more not two alleles skip
 78 |         if (allele.size() != 2) {
 79 |             nosnpcount ++;
 80 |             continue;
 81 |         }
 82 |         allelepos.push_back(allele[0]);
 83 |         allelepos.push_back(allele[1]);
 84 |         // if two alleles make new coding
 85 |         AA = allele[0]; AA += allele[0];
 86 |         AB = allele[0]; AB += allele[1];
 87 |         BA = allele[1]; BA += allele[0];
 88 |         BB = allele[1]; BB += allele[1];
 89 |         // raw coding of line (SNP)
 90 |         for (unsigned int i = dataStart; i < lineElements.size(); i ++) {
 91 |             if (lineElements[i] == AA) {
 92 |                 snpData.push_back(0x01);
 93 |             } else if (lineElements[i] == BB) {
 94 |                 snpData.push_back(0x03);
 95 |             } else if ((lineElements[i] == AB) |
 96 |                        (lineElements[i] == BA)) {
 97 |                 snpData.push_back(0x02);
 98 |             } else {
 99 |                 snpData.push_back(0x00);
100 |             }
101 |         }
102 |         // SNP names
103 |         if (rowNames) {
104 |             lociNames.push_back(lineElements[0]);
105 |         }
106 |         // genetic positions
107 |         chr.push_back(lineElements[posStart]);
108 |         pos.push_back(atoi(lineElements[posStart + 1].c_str()));
109 |         col ++;
110 |         if ((nrows > 0) && (nrows <= line)) {
111 |             break;
112 |         }
113 |     }
114 |     if (snpData.size() == 0)
115 |         Rcpp::stop("No valid SNP found");
116 |     if (nosnpcount > 0)
117 |         Rcpp::Rcerr << "note: " << nosnpcount << " of " << line <<
118 |           " lines have more or less than two alleles and are therefore not considered"
119 |           << endl;
120 |     // Rcpp conversioan and vector as matrix
121 |     // alleles
122 |     Rcpp::CharacterVector alleles = Rcpp::wrap(allelepos);
123 |     alleles.attr("dim") = Rcpp::Dimension(2, col);
124 |     // snpData
125 |     Rcpp::RawVector snpOutData(snpData.size());
126 |     copy(snpData.begin(), snpData.end(), snpOutData.begin());
127 |     row  = snpOutData.size() / col;
128 |     snpOutData.attr("dim") = Rcpp::Dimension(row, col);
129 |     // individual names
130 |     indivNames.erase(indivNames.begin(), indivNames.begin() + dataStart);
131 |     // Results as List
132 |     Rcpp::List  out = Rcpp::List::create(Rcpp::Named("snpData", snpOutData),
133 |                                          Rcpp::Named("alleles", alleles),
134 |                                          Rcpp::Named("chr", chr),
135 |                                          Rcpp::Named("pos", pos),
136 |                                          Rcpp::Named("lociNames", lociNames),
137 |                                          Rcpp::Named("indivNames", indivNames));
138 |     return out;
139 | }
140 | 
141 | // split string in to vector of strings
142 | void split(const string &s, char delim, vector<string> &elems) {
143 |     stringstream ss(s);
144 |     string item;
145 |     while (getline(ss, item, delim)) {
146 |         elems.push_back(item);
147 |     }
148 | }
149 | 


--------------------------------------------------------------------------------
/src/snpCluster.cpp:
--------------------------------------------------------------------------------
  1 | #include <Rcpp.h>
  2 | 
  3 | using namespace Rcpp;
  4 | 
  5 | // correlation
  6 | double cor(RawVector x, RawVector y) {
  7 |     int n = x.size();
  8 |     int n_na = 0;
  9 |     double ex = 0, ey = 0, xt = 0, yt = 0, sxx = 0, syy = 0, sxy = 0;
 10 |     // setup variables and find the mean
 11 |     for (int i = 0; i < n; i ++) {
 12 |         if ((x[i] == 0x00) | (y[i] == 0x00)) {
 13 |             n_na ++;
 14 |             continue;
 15 |         }
 16 |         ex += x[i];
 17 |         ey += y[i];
 18 |     }
 19 |     if ((n - n_na) < 1)
 20 |         stop("estimation of correlation not possible, no complete element pairs");
 21 |     ex /= (n - n_na);
 22 |     ey /= (n - n_na);
 23 |     // correlation coefficent
 24 |     for (int i = 0; i < n; i ++) {
 25 |         if ((x[i] == 0x00) | (y[i] == 0x00))
 26 |             continue;
 27 |         xt = x[i] - ex;
 28 |         yt = y[i] - ey;
 29 |         sxx += xt * xt;
 30 |         syy += yt * yt;
 31 |         sxy += xt * yt;
 32 |     }
 33 |     double cor = sxy / (sqrt(sxx * syy) + 1e-16);
 34 |     return cor;
 35 | } // cor
 36 | 
 37 | //  1 x 1 correlation distance
 38 | // [[Rcpp::export]]
 39 | double corDist(RawVector x, RawVector y) {
 40 |    double dcor = 1 - std::fabs(cor(x, y));
 41 |    return dcor;
 42 | }
 43 | 
 44 | // distance from all marker
 45 | // [[Rcpp::export]]
 46 | NumericVector corDists(RawMatrix x) {
 47 |     int n = x.ncol();
 48 |     NumericVector dist(n * (n -1) / 2);
 49 |     uint64_t count = 0;
 50 |     for (int i = 0; i < n - 1; i ++) {
 51 |         for (int j = i + 1; j < n; j ++) {
 52 |             dist[count] = corDist(x(_, i), x(_, j));
 53 |             count ++;
 54 |         }
 55 |     }
 56 |    return dist;
 57 | } // corDists
 58 | 
 59 | //pre cluster for identical search
 60 | // [[Rcpp::export]]
 61 | List corPreIdenticals(RawMatrix x, const int step) {
 62 |     int n = x.ncol();
 63 |     IntegerVector clusters(n);
 64 |     std::map< int, std::vector<int> > preclust;
 65 |     std::vector<int> premedoInx;
 66 |     int k = 0;
 67 |     if (n >= step * 2) {
 68 |         // medoids equally distributed over the search space
 69 |         bool smallDist = false;
 70 |         int startStep = (int)step * .5;
 71 |         double medodist = 0;
 72 |         premedoInx.push_back(startStep);
 73 |         for (int i = startStep + step; i < n; i += step) {
 74 |             for(int j = startStep; j < i; j += step) {
 75 |               medodist = corDist(x(_, i), x(_, j));
 76 |               if (medodist < .05) {
 77 |                   smallDist = true;
 78 |                   break;
 79 |               }
 80 |             }
 81 |             if (smallDist) {
 82 |               smallDist = false;
 83 |               continue;
 84 |             }
 85 |             premedoInx.push_back(i);
 86 |         }
 87 |         k = premedoInx.size();
 88 |     }
 89 |     int j0;
 90 |     if (k > 1) {
 91 |         // cluster all variables to the nearest medoid
 92 |         NumericVector predist(k);
 93 |         for (int i = 0; i < n; i ++) {
 94 |             for (int j = 0; j < k; j ++) {
 95 |                 j0 = premedoInx[j];
 96 |                 predist[j] = corDist(x(_, i), x(_, j0));
 97 |             }
 98 |             preclust[which_min(predist)].push_back(i);
 99 |         }
100 |     } else {
101 |         for (int i = 0; i < n; i ++) {
102 |             preclust[0].push_back(i);
103 |         }
104 |     }
105 |     return List::create(preclust);
106 | } // preClustIdenticals
107 | 
108 | // find identicals
109 | // [[Rcpp::export]]
110 | List corIdenticals(RawMatrix x, IntegerVector clustIdx) {
111 |     int nCluster = clustIdx.size();
112 |     double dist = 0;
113 |     IntegerVector clusters(nCluster);
114 |     int clustCount = 1;
115 |     std::vector<int> grp, medoInx;
116 |     int gMedo, i0, j0;
117 |     for (int i = 0; i < nCluster; i ++) {
118 |         i0 = clustIdx[i];
119 |         if (clusters[i] == 0) {
120 |             clusters[i] = clustCount;
121 |             grp.push_back(i);
122 |             for (int j = i + 1; j < nCluster; j ++) {
123 |                 j0 = clustIdx[j];
124 |                 if (clusters[j] == 0) {
125 |                     dist = corDist(x(_, i0), x(_, j0));
126 |                     if (dist <= 1e-7) {
127 |                         clusters[j] = clustCount;
128 |                         grp.push_back(j);
129 |                     }
130 |                 }
131 |             }
132 |             gMedo = grp.size() / 2;
133 |             medoInx.push_back(grp[gMedo]);
134 |             clustCount ++;
135 |             grp.clear();
136 |         }
137 |     }
138 |     return List::create(clusters, medoInx);
139 | } // identicals
140 | 
141 | // join data to one object
142 | // [[Rcpp::export]]
143 | List joinCorIdenticals(int n, List preclust, List ClustMedo) {
144 |      IntegerVector clusters(n);
145 |      std::vector<int> medoInx;
146 |      List SubClustMedo;
147 |      IntegerVector clustIdx, clust, medo;
148 |      int count = 0, j0 = 0;
149 |      for (int i = 0; i < preclust.size(); i ++) {
150 |          clustIdx = preclust[i];
151 |          SubClustMedo = ClustMedo[i];
152 |          clust = SubClustMedo[0];
153 |          medo = SubClustMedo[1];
154 |          for (int j = 0; j < clustIdx.size(); j ++) {
155 |              j0 = clustIdx[j];
156 |              clusters[j0] = clust[j] + count;
157 |          }
158 |          for (int j = 0; j < medo.size(); j ++) {
159 |              j0 = medo[j];
160 |              medoInx.push_back(clustIdx[j0]);
161 |          }
162 |          count = medoInx.size();
163 |      }
164 |      return List::create(clusters, medoInx);
165 | } // joinIdenticals
166 | 
167 | 
168 | // clustering of IDB by correlation dictamce with CLARANS
169 | // [[Rcpp::export]]
170 | List corClarans(RawMatrix x, const int k, const int maxNeigbours) {
171 |     RNGScope scope;
172 |     Environment base("package:base");
173 |     Function sample_int = base["sample.int"];
174 |     int n = x.ncol();
175 |     NumericMatrix medoDist(n, k);
176 |     IntegerVector medoInx(k);
177 |     double dist = 0;
178 |     IntegerVector clusters(n);
179 |     // starting medoid and clusters
180 |     NumericVector pdist(n, 1.0);
181 |     int m = 0;
182 |     for (int c = 0; c < k; c ++) {
183 |         // starting medoid
184 |         m = as<int>(sample_int(n, 1, false, pdist))-1;
185 |         medoInx[c] = m;
186 |         for (int i= 0; i < n; i ++) {
187 |             dist = corDist(x(_, m), x(_, i));
188 |             medoDist(i, c) = dist;
189 |         }
190 |     }
191 |     int clust = 0;
192 |     double costs = 0, objective = 0;
193 |     for (int i= 0; i < n; i ++) {
194 |         // starting cluster membership
195 |         clust = which_min(medoDist(i, _));
196 |         clusters[i] = clust;
197 |         costs += medoDist(i, clust);
198 |     }
199 |     objective = costs / n;
200 |     IntegerVector ranVar(maxNeigbours);
201 |     int i = 0, o = 0;
202 |     double minDist = 0;
203 |     NumericVector objectDist(n);
204 |     NumericVector objectMinDist(n);
205 |     double object_cost = 0;
206 |     // iteration of inner clarans loop
207 |     int iter = 0;
208 |     while (iter < maxNeigbours) {
209 |         if ( iter == 0) {
210 |             ranVar = floor(runif(maxNeigbours, 0, n - 1e-15));
211 |         }
212 |         // replacing medoids with objects if costs are smaller
213 |         i = ranVar[iter];
214 |         o = clusters[i];
215 |         for (int j = 0; j < n; j ++) {
216 |             dist = corDist(x(_, i), x(_, j));
217 |             objectDist[j] = dist;
218 |             minDist = dist;
219 |             for (int c = 0; c < k; c ++) {
220 |                 if (c != o) {
221 |                     minDist = std::min(minDist, medoDist(j, c));
222 |                 }
223 |             }
224 |             objectMinDist[j] = minDist;
225 |         }
226 |         object_cost = sum(objectMinDist);
227 |         if (object_cost < costs) {
228 |             // new medoids
229 |             medoDist(_, o) = objectDist;
230 |             medoInx[o] = i;
231 |             //  new costs
232 |             costs = object_cost;
233 |             objective = costs / n;
234 |             // new membership to cluster
235 |             for (int j = 0; j < n; j ++) {
236 |                 clusters[j] = which_min(medoDist(j, _));
237 |             }
238 |             // restarting loop
239 |             iter = 0;
240 |         } else {
241 |             iter ++;
242 |         }
243 |     }
244 |     // Result as list
245 |     return List::create(clusters + 1, medoInx, objective);
246 | } // clarans
247 | 
248 | 
249 | // medoids of SNP clusters
250 | // [[Rcpp::export]]
251 | IntegerVector corMedoids(RawMatrix x, IntegerVector clusters) {
252 |     int n = clusters.size();
253 |     // map of vectors with cluster indexes
254 |     std::map< int, std::vector<int> > indexMap;
255 |     for (int i = 0; i < n; i ++) {
256 |         indexMap[clusters[i]].push_back(i);
257 |     }
258 |     // for each cluster a dist matrix were the col with min dist is the medoid
259 |     std::vector<int> clustInx;
260 |     int nClust = 0;
261 |     double dist = 0;
262 |     int dist_j, dist_i;
263 |     std::vector<int> medoInx;
264 |     for (std::map< int, std::vector<int> >::iterator it = indexMap.begin();
265 |          it != indexMap.end(); it ++) {
266 |         clustInx = it->second;
267 |         nClust = clustInx.size();
268 |         NumericMatrix dist_mat(nClust, nClust);
269 |         NumericVector dist_sums(nClust);
270 |         for (int j = 0; j < nClust - 1; j ++) {
271 |           dist_j = clustInx[j];
272 |             for (int i = j; i < nClust; i ++) {
273 |                 if ( i == j) {
274 |                     dist_mat(j, i) = 0;
275 |                 } else {
276 |                    dist_i = clustInx[i];
277 |                    dist = corDist(x(_, dist_j), x(_, dist_i));
278 |                    dist_mat(j, i) = dist;
279 |                    dist_mat(i, j) = dist;
280 |                 }
281 |             }
282 |         }
283 |         for (int i = 0; i < nClust; i ++) {
284 |             dist_sums[i] = sum(dist_mat(_, i));
285 |         }
286 |         medoInx.push_back(clustInx[which_min(dist_sums)] + 1);
287 |     }
288 |     return wrap(medoInx);
289 | }
290 | 


--------------------------------------------------------------------------------
/src/snpData.cpp:
--------------------------------------------------------------------------------
  1 | #include <Rcpp.h>
  2 | #include <algorithm>
  3 | #include <string>
  4 | 
  5 | using namespace Rcpp;
  6 | 
  7 | 
  8 | // [[Rcpp::export]]
  9 | NumericMatrix design(RawMatrix x) {
 10 |     size_t n = x.nrow();
 11 |     size_t p = x.ncol();
 12 |     NumericMatrix out(n, p);
 13 |     for (size_t i = 0; i < p; i ++) {
 14 |         for (size_t j = 0; j < n; j ++) {
 15 |             if (x(j, i) == 0x01) {
 16 |                 out(j, i) = 0;
 17 |             } else if (x(j, i) == 0x03) {
 18 |                 out(j, i) = 1;
 19 |             } else if (x(j, i) == 0x02) {
 20 |               out(j, i) = .5;
 21 |             }
 22 |         }
 23 |     }
 24 |     return out;
 25 | }
 26 | 
 27 | 
 28 | // [[Rcpp::export]]
 29 | NumericVector afreq(RawMatrix x, bool maf) {
 30 |     size_t p = x.ncol();
 31 |     size_t n = x.nrow();
 32 |     size_t n_na = 0;
 33 |     double a = 0;
 34 |     double af = 0;
 35 |     NumericVector allfreq(p);
 36 |     for (size_t i = 0; i < p; i ++) {
 37 |         for (size_t j = 0; j < n; j ++) {
 38 |             if (x(j, i) == 0x00) {
 39 |                 n_na ++;
 40 |             } else if (x(j, i) == 0x01) {
 41 |                 a ++;
 42 |             } else if (x(j, i) == 0x02) {
 43 |                 a += .5;
 44 |             }
 45 |         }
 46 |         af = a / (n - n_na);
 47 |         if (maf) {
 48 |             if (af <= .5) {
 49 |                 allfreq[i] = af;
 50 |             } else {
 51 |                 allfreq[i] = 1 - af;
 52 |             }
 53 |         } else {
 54 |             allfreq[i] = af;
 55 |         }
 56 |         n_na = 0;
 57 |         a = 0;
 58 |     }
 59 |     return allfreq;
 60 | }
 61 | 
 62 | 
 63 | // [[Rcpp::export]]
 64 | NumericVector hetfreq(RawMatrix x, int dim) {
 65 |     size_t k, l;
 66 |     if (dim == 1) {
 67 |         k = x.nrow();
 68 |         l = x.ncol();
 69 |     } else {
 70 |         k = x.ncol();
 71 |         l = x.nrow();
 72 |     }
 73 |     size_t l_na = 0;
 74 |     double het = 0;
 75 |     NumericVector hetf(k);
 76 |     for (size_t i = 0; i < k; i ++) {
 77 |         for (size_t j = 0; j < l; j ++) {
 78 |             if (dim == 1) {
 79 |                 if (x(i, j) == 0x00) {
 80 |                     l_na ++;
 81 |                 } else if (x(i, j) == 0x02) {
 82 |                     het ++;
 83 |                 }
 84 |             } else {
 85 |                 if (x(j, i) == 0x00) {
 86 |                     l_na ++;
 87 |                 } else if (x(j, i) == 0x02) {
 88 |                     het ++;
 89 |                 }
 90 |             }
 91 |         }
 92 |         hetf[i] = het / (l - l_na);
 93 |         het = 0;
 94 |         l_na = 0;
 95 |     }
 96 |     return hetf;
 97 | }
 98 | 
 99 | 
100 | // [[Rcpp::export]]
101 | NumericVector nafreq(RawMatrix x, int dim) {
102 |   size_t k, l;
103 |     if (dim == 1) {
104 |         k = x.nrow();
105 |         l = x.ncol();
106 |     } else {
107 |         k = x.ncol();
108 |         l = x.nrow();
109 |     }
110 |     double na = 0;
111 |     NumericVector naf(k);
112 |     for (size_t i = 0; i < k; i ++) {
113 |         for (size_t j = 0; j < l; j ++) {
114 |             if (dim == 1) {
115 |                 if (x(i, j) == 0x00)
116 |                     na ++;
117 |             } else {
118 |                 if (x(j, i) == 0x00)
119 |                     na ++;
120 |             }
121 |         }
122 |         naf[i] = na / l;
123 |         na = 0;
124 |     }
125 |     return naf;
126 | }
127 | 


--------------------------------------------------------------------------------