├── .Rbuildignore
├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── NAMESPACE
├── R
├── AllClass.R
├── AllGeneric.R
├── RcppExports.R
├── associationTest.R
├── qtcatPackage.R
├── snpCluster.R
├── snpData.R
└── snpImpute.R
├── README.md
├── appveyor.yml
├── inst
└── extdata
│ ├── phenodata.csv
│ └── snpdata.csv
├── man
├── alleleFreq-snpMatrix-method.Rd
├── alleleFreq.Rd
├── as.matrix-snpMatrix-method.Rd
├── as.snpMatrix.Rd
├── clarans.Rd
├── cutClust.Rd
├── distCor.Rd
├── hetFreq-snpMatrix-method.Rd
├── hetFreq.Rd
├── identicals.Rd
├── imputeMedoids.Rd
├── imputeSnp.Rd
├── imputeSnpIter.Rd
├── imputeSnpMatrix.Rd
├── lmQtc.Rd
├── medoQtc.Rd
├── naFreq-snpMatrix-method.Rd
├── naFreq.Rd
├── plotQtc.Rd
├── plotSelFreq.Rd
├── qtcat-package.Rd
├── qtcatClust.Rd
├── qtcatGeno.Rd
├── qtcatHit.Rd
├── qtcatPheno.Rd
├── qtcatQtc.Rd
├── read.snpData.Rd
├── rename.leafs.Rd
├── snpInfo-qtcatHit-method.Rd
├── snpInfo-snpMatrix-method.Rd
├── snpInfo.Rd
├── snpMatrix-class.Rd
└── sub-snpMatrix-ANY-ANY-missing-method.Rd
└── src
├── RcppExports.cpp
├── ReadData.cpp
├── snpCluster.cpp
└── snpData.cpp
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | tests/*
4 | ^packrat/
5 | .Rprofile
6 | ^\.travis\.yml$
7 | ^appveyor\.yml$
8 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .Rprofile
4 | .RData
5 | qtcat.Rproj
6 | src/*.o
7 | src/*.so
8 | src/*.dll
9 | inst/doc
10 | packrat/lib*
11 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: r
2 | cache: packages
3 |
4 | matrix:
5 | include:
6 | - os: linux
7 | - os: osx
8 |
9 | warnings_are_errors: true
10 |
11 | # r_github_packages:
12 | # - jrklasen/hit
13 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: qtcat
2 | Title: Quantitative Trait Cluster Association Test
3 | Description:
4 | All SNPs are jointly associated to the phenotype and at the same time correlation among
5 | them is considered. Thus, correction for population structure becomes unnecessary,
6 | which in many cases results in a power advantages compared to other methods.
7 | Authors@R: person("Jonas", "Klasen", role = c("aut", "cre"), email = "qtcat@gmx.de")
8 | Version: 0.4.3
9 | URL: http://github.com/QTCAT/qtcat
10 | BugReports: https://github.com/QTCAT/qtcat/issues
11 | Depends:
12 | R (>= 3.2.0)
13 | Imports:
14 | methods,
15 | stats,
16 | parallel,
17 | graphics,
18 | utils,
19 | Rcpp (>= 0.11.0),
20 | fastcluster (>= 1.1.16),
21 | hit (>= 0.4.0)
22 | Suggests:
23 | knitr,
24 | rmarkdown
25 | LinkingTo: Rcpp
26 | License: GPL (>= 2)
27 | RoxygenNote: 6.0.1
28 | VignetteBuilder: knitr
29 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export(alleleFreq)
4 | export(as.snpMatrix)
5 | export(clarans)
6 | export(cutClust)
7 | export(distCor)
8 | export(hetFreq)
9 | export(identicals)
10 | export(imputeSnpMatrix)
11 | export(lmQtc)
12 | export(medoQtc)
13 | export(naFreq)
14 | export(plotQtc)
15 | export(plotSelFreq)
16 | export(qtcatClust)
17 | export(qtcatGeno)
18 | export(qtcatHit)
19 | export(qtcatPheno)
20 | export(qtcatQtc)
21 | export(read.snpData)
22 | export(snpInfo)
23 | exportClasses(snpMatrix)
24 | exportMethods("[")
25 | exportMethods(alleleFreq)
26 | exportMethods(as.matrix)
27 | exportMethods(hetFreq)
28 | exportMethods(naFreq)
29 | exportMethods(snpInfo)
30 | importFrom(Rcpp,evalCpp)
31 | importFrom(fastcluster,hclust)
32 | importFrom(graphics,axis)
33 | importFrom(graphics,mtext)
34 | importFrom(graphics,plot)
35 | importFrom(hit,as.hierarchy)
36 | importFrom(hit,hit)
37 | importFrom(methods,is)
38 | importFrom(methods,new)
39 | importFrom(methods,setClass)
40 | importFrom(methods,setGeneric)
41 | importFrom(methods,setMethod)
42 | importFrom(methods,setOldClass)
43 | importFrom(methods,signature)
44 | importFrom(parallel,mclapply)
45 | importFrom(stats,as.dendrogram)
46 | importFrom(stats,cor)
47 | importFrom(stats,dendrapply)
48 | importFrom(stats,is.leaf)
49 | importFrom(stats,lm)
50 | importFrom(stats,na.omit)
51 | importFrom(stats,optimise)
52 | importFrom(stats,reorder)
53 | importFrom(utils,installed.packages)
54 | useDynLib(qtcat)
55 |
--------------------------------------------------------------------------------
/R/AllClass.R:
--------------------------------------------------------------------------------
1 | #' @title A S4 class to represent a SNP-matrix
2 | #'
3 | #' @description A S4 class to represent a SNP matrix. Storing SNP information, by using a
4 | #' byte-level (raw) storage scheme, jointly with genomic position and allele information.
5 | #'
6 | #' @slot snpData a matrix of SNPs stored in type 'raw'. 00 is NA, 01 homozygote AA, 02
7 | #' heterozygote AB, and 03 homozygote BB.
8 | #' @slot snpInfo data.frame with four columns. The first col. contains the chromosomes,
9 | #' the second col. the positions, the third col. the first allele and the fourth the second
10 | #' allele.
11 | #' @slot dim an integer vector with exactly two non-negative values.
12 | #' @slot dimnames a list of length two; each component containing NULL or a character vector
13 | #' length equal the corresponding dim element.
14 | #'
15 | #' @importFrom methods setClass
16 | #' @export
17 | setClass("snpMatrix",
18 | slots = c(snpData = "matrix",
19 | snpInfo = "data.frame",
20 | dim = "integer",
21 | dimnames = "list"))
22 |
--------------------------------------------------------------------------------
/R/AllGeneric.R:
--------------------------------------------------------------------------------
1 | #' @title Extract genomic position and allele information.
2 | #'
3 | #' @description Extract genomic position and allele information from object.
4 | #'
5 | #' @param object an object, for which a corresponding method exists.
6 | #'
7 | #' @importFrom methods setGeneric
8 | #' @export
9 | setGeneric("snpInfo", function(object) standardGeneric("snpInfo"))
10 |
11 |
12 | #' @title Allele Frequency.
13 | #'
14 | #' @description Frequency of alleles in data set.
15 | #'
16 | #' @param x an object, for which a corresponding method exists.
17 | #' @param maf logical, if true minor allele frequency (default), other ways allele
18 | #' frequency of 'allele.1'.
19 | #'
20 | #' @importFrom methods setGeneric
21 | #' @export
22 | setGeneric("alleleFreq", function(x, maf = TRUE) standardGeneric("alleleFreq"))
23 |
24 |
25 | #' @title Heterozygosity Frequency.
26 | #'
27 | #' @description Frequency of heterozygosity in data set.
28 | #'
29 | #' @param x an object, for which a corresponding method exists.
30 | #' @param dim dimension over which heterozygosity is estimated. 1 (default) is for rows
31 | #' (individuals), 2 is for columns (SNPs).
32 | #'
33 | #' @importFrom methods setGeneric
34 | #' @export
35 | setGeneric("hetFreq", function(x, dim = 1L) standardGeneric("hetFreq"))
36 |
37 |
38 | #' @title Missing Data Frequency.
39 | #'
40 | #' @description Frequency of missing Data in data set.
41 | #'
42 | #' @param x an object, for which a corresponding method exists.
43 | #' @param dim dimension over which heterozygosity is estimated. 1 (default) is for rows
44 | #' (individuals), 2 is for columns (SNPs).
45 | #'
46 | #' @importFrom methods setGeneric
47 | #' @export
48 | setGeneric("naFreq", function(x, dim = 1L) standardGeneric("naFreq"))
49 |
--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3 |
4 | read_snpData <- function(file, sep, quote, rowNames, na_str, nrows) {
5 | .Call('_qtcat_read_snpData', PACKAGE = 'qtcat', file, sep, quote, rowNames, na_str, nrows)
6 | }
7 |
8 | corDist <- function(x, y) {
9 | .Call('_qtcat_corDist', PACKAGE = 'qtcat', x, y)
10 | }
11 |
12 | corDists <- function(x) {
13 | .Call('_qtcat_corDists', PACKAGE = 'qtcat', x)
14 | }
15 |
16 | corPreIdenticals <- function(x, step) {
17 | .Call('_qtcat_corPreIdenticals', PACKAGE = 'qtcat', x, step)
18 | }
19 |
20 | corIdenticals <- function(x, clustIdx) {
21 | .Call('_qtcat_corIdenticals', PACKAGE = 'qtcat', x, clustIdx)
22 | }
23 |
24 | joinCorIdenticals <- function(n, preclust, ClustMedo) {
25 | .Call('_qtcat_joinCorIdenticals', PACKAGE = 'qtcat', n, preclust, ClustMedo)
26 | }
27 |
28 | corClarans <- function(x, k, maxNeigbours) {
29 | .Call('_qtcat_corClarans', PACKAGE = 'qtcat', x, k, maxNeigbours)
30 | }
31 |
32 | corMedoids <- function(x, clusters) {
33 | .Call('_qtcat_corMedoids', PACKAGE = 'qtcat', x, clusters)
34 | }
35 |
36 | design <- function(x) {
37 | .Call('_qtcat_design', PACKAGE = 'qtcat', x)
38 | }
39 |
40 | afreq <- function(x, maf) {
41 | .Call('_qtcat_afreq', PACKAGE = 'qtcat', x, maf)
42 | }
43 |
44 | hetfreq <- function(x, dim) {
45 | .Call('_qtcat_hetfreq', PACKAGE = 'qtcat', x, dim)
46 | }
47 |
48 | nafreq <- function(x, dim) {
49 | .Call('_qtcat_nafreq', PACKAGE = 'qtcat', x, dim)
50 | }
51 |
52 |
--------------------------------------------------------------------------------
/R/associationTest.R:
--------------------------------------------------------------------------------
1 | #' @title A genotype object constructor
2 | #'
3 | #' @description Constructs a S3-object containing a SNP design matrix and a hierarchy. If
4 | #' a SNP in the input object contains missing data, the clustering is used to impute
5 | #' information from highly correlated neighbor SNPs. The genotype object is needed for
6 | #' \code{\link{qtcatHit}} as input.
7 | #'
8 | #' @param snp an object of S4 class \linkS4class{snpMatrix}.
9 | #' @param snpClust an object of class \code{\link{qtcatClust}}.
10 | #' @param absCor a vector of absolute value of correlations considered in the hierarchy.
11 | #' @param min.absCor a minimum absolute value of correlation which is considered. A value
12 | #' in the range from 0 to 1.
13 | #' @param mc.cores a number of cores for parallelising. The maximum is \code{'B'}. For
14 | #' details see \code{\link[parallel]{mclapply}}.
15 | #'
16 | #' @examples
17 | #' # file containing example data for SNP data
18 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
19 | #' snp <- read.snpData(gfile, sep = ",")
20 | #' clust <- qtcatClust(snp)
21 | #'
22 | #' # Construct geotype object
23 | #' geno <- qtcatGeno(snp, clust)
24 | #'
25 | #' @importFrom hit as.hierarchy
26 | #' @importFrom methods is
27 | #' @importFrom stats reorder
28 | #' @export
29 | qtcatGeno <- function(snp, snpClust, absCor, min.absCor = 0.5, mc.cores = 1) {
30 | stopifnot(is(snp, "snpMatrix"))
31 | stopifnot(is(snpClust, "qtcatClust"))
32 | if (!setequal(names(snpClust$clusters), colnames(snp)))
33 | stop("Names of 'snp' and 'snpClust' differ")
34 | if (missing(absCor))
35 | hier <- as.hierarchy(snpClust$dendrogram, 1 - min.absCor, names = colnames(snp))
36 | else
37 | hier <- as.hierarchy(snpClust$dendrogram, height = 1 - absCor, names = colnames(snp))
38 | if (any(naFreq(snp) > 0))
39 | snp <- imputeMedoids(snp, snpClust, hier, .25, mc.cores)
40 | if (is.null(names <- snpClust$medoids))
41 | names <- names(snpClust)
42 | snpnames <- colnames(snp)[colnames(snp) %in% names]
43 | desMat <- as.matrix(snp[, snpnames])
44 | hier <- reorder(hier, snpnames)
45 | out <- list(x = desMat,
46 | hierarchy = hier,
47 | clusters = snpClust$clusters,
48 | medoids = snpClust$medoids,
49 | snpInfo = snpInfo(snp))
50 | class(out) <- "qtcatGeno"
51 | out
52 | }
53 |
54 | #' @title A phenotype object constructor
55 | #'
56 | #' @description Constructs an S3-object containing phenotype and if additional covariats
57 | #' exist a design matrix of those. The phenotype object is needed as input for
58 | #' \code{\link{qtcatHit}}.
59 | #'
60 | #' @param names a vector of individual names of length 'n'.
61 | #' @param pheno a vector of length 'n' or a matrix size 'n x 2' in case of binomial family.
62 | #' This contains the phenotypic observations.
63 | #' @param family a character string specifying the family of the phenotype distribution.
64 | #' Either "gaussian" (default) or "binomial".
65 | #' @param covariates a matrix typically generated by a call of
66 | #' \code{\link[stats]{model.matrix}}. It contain additional variables influencing the
67 | #' phenotype e.g. environmental and experimental covariates.
68 | #'
69 | #' @examples
70 | #' # file containing example data for a phenotype.
71 | #' pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
72 | #' pdat <- read.csv(pfile, header = TRUE)
73 | #'
74 | #' # Construct phenotype object
75 | #' pheno <- qtcatPheno(names = pdat[, 1],
76 | #' pheno = pdat[, 2],
77 | #' covariates = model.matrix(~ pdat[, 3]))
78 | #'
79 | #' @export
80 | qtcatPheno <- function(names, pheno, family = "gaussian", covariates = NULL) {
81 | match.arg(family, c("gaussian", "binomial"))
82 | nn <- length(names)
83 | if (any(is.na(pheno)))
84 | stop("Missing values in 'pheno' are not allowed")
85 | if (!is.null(covariates)) {
86 | if (is.matrix(covariates)) {
87 | nc <- nrow(covariates)
88 | if (all(covariates[, 1] == 1))
89 | covariates <- covariates[, -1, drop = FALSE]
90 | } else {
91 | stop("'covariates' has to be a 'matrix'")
92 | }
93 | } else {
94 | covariates <- matrix(nrow = nn, ncol = 0L)
95 | nc <- nn
96 | }
97 | if (is.vector(pheno)) {
98 | np <- length(pheno)
99 | } else if (is.matrix(pheno) && family == "binomial") {
100 | if (ncol(pheno) > 2)
101 | stop("'phone' has more than two columns which is not allowed")
102 | np <- nrow(pheno)
103 | } else {
104 | stop("'pheno' has to be a 'vector' or in in case of binomial family a 'matrix'")
105 | }
106 | if (nn == nc && nn == np)
107 | out <- list(names = as.character(names),
108 | pheno = pheno,
109 | covariates = covariates,
110 | family = family)
111 | class(out) <- "qtcatPheno"
112 | out
113 | }
114 |
115 |
116 | #' @title Fitting Hierarchical Inference Testing
117 | #'
118 | #' @description Hierarchical inference testing for phenotype-SNP association.
119 | #'
120 | #' @param pheno an object of class \code{\link{qtcatPheno}}.
121 | #' @param geno an object of class \code{\link{qtcatGeno}}.
122 | #' @param B a integer indicating the number of sample-splits.
123 | #' @param p.samp1 a value specifying the fraction of data used for the LASSO sample-split.
124 | #' The ANOVA sample-split is \code{1 - p.samp1}.
125 | #' @param nfolds Number of folds (default is 5). See \code{\link[glmnet]{cv.glmnet}} for
126 | #' more details.
127 | #' @param overall.lambda Logical, if true, lambda is estimated once, if false (default),
128 | #' lambda is estimated for each sample split.
129 | #' @param lambda.opt a criterion for optimum selection of cross validated lasso. Either
130 | #' "lambda.1se" (default) or "lambda.min". See
131 | #' \code{\link[glmnet]{cv.glmnet}} for more details.
132 | #' @param alpha a single value in the range of 0 to 1 for the elastic net mixing parameter.
133 | #' @param gamma a vector of gamma-values used in significance estimation.
134 | #' @param max.p.esti a maximum for computed p-values. All p-values above this value are set
135 | #' to one. Small \code{max.p.esti} values reduce computing time.
136 | #' @param seed a RNG seed, see \code{\link{set.seed}}.
137 | #' @param mc.cores a number of cores for parallelising. The maximum is
138 | #' \code{'B'}. For details see \code{\link[parallel]{mclapply}}.
139 | #' @param trace logical, if \code{TRUE} it prints the current status of the program.
140 | #' @param ... additional arguments for \code{\link[glmnet]{cv.glmnet}}.
141 | #'
142 | #' @examples
143 | #' # If you want to run the examples, use:
144 | #' # example(qtcatHit, run.dontrun = TRUE)
145 | #' \dontrun{
146 | #' # files containing example data for SNP data and the phenotype
147 | #' pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
148 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
149 | #' pdat <- read.csv(pfile, header = TRUE)
150 | #' snp <- read.snpData(gfile, sep = ",")
151 | #' clust <- qtcatClust(snp)
152 | #' geno <- qtcatGeno(snp, clust)
153 | #' pheno <- qtcatPheno(names = pdat[, 1],
154 | #' pheno = pdat[, 2],
155 | #' covariates = model.matrix(~ pdat[, 3]))
156 | #'
157 | #' # fitting HIT
158 | #' fitted <- qtcatHit(pheno, geno)
159 | #' }
160 | #'
161 | #' @importFrom methods is
162 | #' @export
163 | qtcatHit <- function(pheno, geno, B = 50, p.samp1 = 0.35,
164 | nfolds = 5, overall.lambda = FALSE, lambda.opt = "lambda.1se",
165 | alpha = 1, gamma = seq(0.05, 0.99, by = 0.01),
166 | max.p.esti = 1, seed = 12321, mc.cores = 1, trace = FALSE, ...) {
167 | set.seed(seed)
168 | on.exit(set.seed(NULL))
169 | stopifnot(is(pheno, "qtcatPheno"))
170 | stopifnot(is(geno, "qtcatGeno"))
171 | id <- intersect(pheno$names, rownames(geno$x))
172 | if (!length(id))
173 | stop("The ID intersect of 'pheno' and 'geno' is emty")
174 | if (length(id.uniqueGeno <- setdiff(rownames(geno$x), id)))
175 | cat("The following individuals are part of 'geno' but not of 'pheno':\n",
176 | paste(id.uniqueGeno, collapse = " "), "\n")
177 | if (length(id.uniquePheno <- setdiff(pheno$names, id)))
178 | cat("The following individuals are part of 'pheno' but not of 'geno':\n",
179 | paste(id.uniquePheno, collapse = " "), "\n")
180 | phenoInx <- which(pheno$names %in% id)
181 | genoInx <- match(pheno$names[phenoInx], rownames(geno$x))
182 | if (ncol(pheno$covariates) == 0L)
183 | x <- geno$x[genoInx, ]
184 | else
185 | x <- cbind(geno$x[genoInx, ], pheno$covariates[phenoInx, ])
186 | y <- pheno$pheno[phenoInx]
187 | fitHit <- hit(x, y, geno$hierarchy, pheno$family,
188 | B, p.samp1, nfolds, overall.lambda, lambda.opt, alpha,
189 | gamma, max.p.esti, mc.cores,
190 | trace, standardize = FALSE)
191 | out <- c(fitHit, geno[3L:5L])
192 | class(out) <- c("qtcatHit", "hit")
193 | out
194 | }
195 |
196 |
197 | #' @title Summarize results of Hierarchical Inference Test
198 | #'
199 | #' @description Summarizing the QTCs (significant cluster of SNPs) and their position at
200 | #' the genome.
201 | #'
202 | #' @param object an object of class \code{\link{qtcatHit}}.
203 | #' @param alpha an alpha level for significance estimation.
204 | #' @param min.absCor a minimum absolute value of correlation to be considered.
205 | #'
206 | #' @examples
207 | #' # If you want to run the examples, use:
208 | #' # example(qtcatQtc, run.dontrun = TRUE)
209 | #' \dontrun{
210 | #' # files containing example data for SNP data and the phenotype
211 | #' pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
212 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
213 | #' pdat <- read.csv(pfile, header = TRUE)
214 | #' snp <- read.snpData(gfile, sep = ",")
215 | #' clust <- qtcatClust(snp)
216 | #' geno <- qtcatGeno(snp, clust)
217 | #' pheno <- qtcatPheno(names = pdat[, 1],
218 | #' pheno = pdat[, 2],
219 | #' covariates = model.matrix(~ pdat[, 3]))
220 | #' fitted <- qtcatHit(pheno, geno)
221 | #'
222 | #' # Summarizing the QTCs (loci37, loci260, and loci367 are causal)
223 | #' qtcatQtc(fitted)
224 | #' }
225 | #'
226 | #' @importFrom hit hit
227 | #' @importFrom methods is
228 | #' @export
229 | qtcatQtc <- function(object, alpha = 0.05, min.absCor = 0.05) {
230 | stopifnot(is(object, "qtcatHit"))
231 | y <- summary(object, alpha, 1 - min.absCor)
232 | signames <- rownames(y)
233 | sigclust <- match(signames, object$medoids)
234 | sigClust <- matrix(0, length(object$clusters), 3L)
235 | sigClust[, 2L] <- 1
236 | for (i in seq_along(signames)) {
237 | inx <- which(object$clusters == sigclust[i])
238 | sigClust[inx, ] <- matrix(unlist(y[signames[i], ]),
239 | length(inx), 3L, byrow = TRUE)
240 | }
241 | rownames(sigClust) <- names(object$clusters)
242 | sigClust[, 2L] <- 1 - sigClust[, 2L]
243 | colnames(sigClust) <- c(colnames(y)[1L], "absCor", colnames(y)[3L])
244 | sigClust <- as.data.frame(sigClust[sigClust[, 1L] != 0, ,drop = FALSE])
245 | out <- cbind(snpInfo(object)[rownames(sigClust), 1:2], sigClust)
246 | out
247 | }
248 |
249 |
250 | setOldClass("qtcatHit")
251 | #' @title Get position from qtcatHit object
252 | #'
253 | #' @description Genetic position info from an object of class qtcatHit.
254 | #'
255 | #' @param object an object of class \code{\link{qtcatHit}}.
256 | #'
257 | #'
258 | #' @importFrom methods setMethod setOldClass
259 | #' @export
260 | setMethod("snpInfo", "qtcatHit",
261 | function(object) {
262 | out <- object$snpInfo
263 | if (is.null(out)) {
264 | cat("No position information available")
265 | }
266 | out
267 | }
268 | )
269 |
270 |
271 | #' @title Find medoids of QTCs
272 | #' @description Find a medoid of for each quantitative trait cluster (QTC).
273 | #'
274 | #' @param object an object of class \code{\link{qtcatHit}}.
275 | #' @param geno an object of class \code{\link{qtcatGeno}}.
276 | #' @param alpha an alpha level for significance estimation.
277 | #' @param min.absCor minimum absolute value of correlation considered.
278 | #'
279 | #' @examples
280 | #' # If you want to run the examples, use:
281 | #' # example(medoQtc, run.dontrun = TRUE)
282 | #' \dontrun{
283 | #' # files containing example data for SNP data and the phenotype
284 | #' pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
285 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
286 | #' pdat <- read.csv(pfile, header = TRUE)
287 | #' snp <- read.snpData(gfile, sep = ",")
288 | #' clust <- qtcatClust(snp)
289 | #' geno <- qtcatGeno(snp, clust)
290 | #' pheno <- qtcatPheno(names = pdat[, 1],
291 | #' pheno = pdat[, 2],
292 | #' covariates = model.matrix(~ pdat[, 3]))
293 | #' fitted <- qtcatHit(pheno, geno)
294 | #'
295 | #' # QTC medoids
296 | #' medo <- medoQtc(fitted, geno)
297 | #' }
298 | #'
299 | #' @importFrom methods is
300 | #' @importFrom stats cor
301 | #' @export
302 | medoQtc <- function(object, geno, alpha = 0.05, min.absCor = 0.05) {
303 | stopifnot(is(object, "qtcatHit"))
304 | stopifnot(is(geno, "qtcatGeno"))
305 | sigClust <- summary(object, alpha, min.absCor)
306 | if (nrow(sigClust)) {
307 | clusters <- split(rownames(sigClust), sigClust$clusters)
308 | medoids <- sapply(clusters, function(names, geno) {
309 | if (length(names) > 1L)
310 | return(names[which.max(abs(cor(geno$x[, names])))])
311 | else
312 | return(names)
313 | }, geno = geno)
314 | } else {
315 | medoids <- c()
316 | }
317 | medoids
318 | }
319 |
320 |
321 | #' @title Fitting a Linear Model to QTCs
322 | #'
323 | #' @description Linear model between phenotype and medoids of QTCs (significant SNP
324 | #' clusters).
325 | #'
326 | #' @param object an object of class \code{\link{qtcatHit}}.
327 | #' @param pheno an object of class \code{\link{qtcatPheno}}.
328 | #' @param geno an object of class \code{\link{qtcatGeno}}.
329 | #' @param alpha an alpha level for significance estimation.
330 | #' @param min.absCor minimum absolute value of correlation considered.
331 | #'
332 | #' @examples
333 | #' # If you want to run the examples, use:
334 | #' # example(lmQtc, run.dontrun = TRUE)
335 | #' \dontrun{
336 | #' # files containing example data for SNP data and the phenotype
337 | #' pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
338 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
339 | #' pdat <- read.csv(pfile, header = TRUE)
340 | #' snp <- read.snpData(gfile, sep = ",")
341 | #' clust <- qtcatClust(snp)
342 | #' geno <- qtcatGeno(snp, clust)
343 | #' pheno <- qtcatPheno(names = pdat[, 1],
344 | #' pheno = pdat[, 2],
345 | #' covariates = model.matrix(~ pdat[, 3]))
346 | #' fitted <- qtcatHit(pheno, geno)
347 | #'
348 | #' # fitting a LM to the phenotype and QTC medoids
349 | #' lmfitted <- lmQtc(fitted, pheno, geno)
350 | #' }
351 | #'
352 | #' @importFrom methods is
353 | #' @importFrom stats lm
354 | #' @export
355 | lmQtc <- function(object, pheno, geno, alpha = 0.05, min.absCor = 0.05) {
356 | stopifnot(is(object, "qtcatHit"))
357 | stopifnot(is(pheno, "qtcatPheno"))
358 | stopifnot(is(geno, "qtcatGeno"))
359 | id <- intersect(pheno$names, rownames(geno$x))
360 | phenoInx <- which(pheno$names %in% id)
361 | if (!length(id))
362 | stop("The ID intersect of 'pheno' and 'geno' is emty")
363 | if (length(id.uniqueGeno <- setdiff(rownames(geno$x), id)))
364 | cat("The following individuals are part of 'geno' but not of 'pheno':\n",
365 | paste(id.uniqueGeno, collapse = " "), "\n")
366 | if (length(id.uniquePheno <- setdiff(pheno$names, id)))
367 | cat("The following individuals are part of 'pheno' but not of 'geno':\n",
368 | paste(id.uniquePheno, collapse = " "), "\n")
369 | medoids <- medoQtc(object, geno, alpha, min.absCor)
370 | if (length(medoids)) {
371 | xg <- geno$x[, colnames(geno$x) %in% medoids, drop = FALSE]
372 | genoInx <- match(pheno$names[phenoInx], rownames(xg))
373 | rownames(xg) <- NULL
374 | if (ncol(pheno$covariates)) {
375 | dat <- data.frame(y = pheno$pheno[phenoInx],
376 | pheno$covariates[phenoInx, ],
377 | xg[genoInx, ])
378 | } else {
379 | dat <- data.frame(y = pheno$pheno[phenoInx], xg[genoInx, ])
380 | }
381 | } else if (ncol(pheno$covariates)) {
382 | dat <- data.frame(y = pheno$pheno[phenoInx],
383 | pheno$covariates[phenoInx, ])
384 | } else {
385 | dat <- data.frame(y = pheno$pheno[phenoInx])
386 | }
387 | out <- lm(y ~ ., data = dat)
388 | out
389 | }
390 |
391 |
392 | #' @title Plot resulting QTCs of the Hierarchical Inference Test
393 | #'
394 | #' @description Plot the QTCs (significant cluster of SNPs) at their
395 | #' position at the genome.
396 | #'
397 | #' @param x an object of class \code{\link{qtcatHit}}.
398 | #' @param alpha an alpha level for significance estimation.
399 | #' @param xlab a title for the x axis.
400 | #' @param ylab a title for the y axis.
401 | #' @param col.axis colors for axis line, tick marks, and title respectively.
402 | #' @param ... other graphical parameters may also be passed as arguments to this function.
403 | #'
404 | #' @examples
405 | #' # If you want to run the examples, use:
406 | #' # example(plotQtc, run.dontrun = TRUE)
407 | #' \dontrun{
408 | #' # files containing example data for SNP data and the phenotype
409 | #' pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
410 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
411 | #' pdat <- read.csv(pfile, header = TRUE)
412 | #' snp <- read.snpData(gfile, sep = ",")
413 | #' clust <- qtcatClust(snp)
414 | #' geno <- qtcatGeno(snp, clust)
415 | #' pheno <- qtcatPheno(names = pdat[, 1],
416 | #' pheno = pdat[, 2],
417 | #' covariates = model.matrix(~ pdat[, 3]))
418 | #' fitted <- qtcatHit(pheno, geno)
419 | #'
420 | #' # Plot the QTCs (loci37, loci260, and loci367 are causal)
421 | #' plotQtc(fitted)
422 | #' }
423 | #'
424 | #' @importFrom graphics plot axis mtext
425 | #' @export
426 | plotQtc <- function(x, alpha = 0.05, xlab = "Chromosomes",
427 | ylab = expression(-log[10](italic(p))), col.axis = NULL, ...) {
428 | stopifnot(is(x, "qtcatHit"))
429 | # make positions linear with gaps between chr's
430 | pos <- snpInfo(x)[, 1:2]
431 | chrminmax <- vapply(split(pos[, 2L], pos[, 1L]), function(x) c(min(x), max(x)), c(1, 2))
432 | chrgap <- sum(chrminmax[2L, ]) * .01
433 | chrsize <- cumsum(c(0, chrminmax[2L, -ncol(chrminmax)])) -
434 | cumsum(chrminmax[1L, ]) +
435 | cumsum(c(0, rep(chrgap, ncol(chrminmax) - 1L)))
436 | chr <- sort(unique(pos[, 1L]))
437 | for (i in seq_along(chr)) {
438 | inx <- which(pos[, 1L] == chr[i])
439 | pos[inx, 2L] <- pos[inx, 2L] + chrsize[i]
440 | }
441 | chrstartend <- vapply(split(pos[, 2L], pos[, 1L]), function(x) c(min(x), max(x)), c(1, 2))
442 | xlim <- c(chrstartend[1L, 1L] - chrgap, chrstartend[2L, ncol(chrstartend)] + chrgap)
443 | # get QTCs from result
444 | qtc <- qtcatQtc(x, alpha = alpha, min.absCor = .01)
445 | for (i in seq_along(chr)) {
446 | inx2 <- which(qtc[, 1L] == chr[i])
447 | qtc[inx2, 2L] <- qtc[inx2, 2L] + chrsize[i]
448 | }
449 | qtc$pValues[qtc$pValues] <- qtc$pValues[qtc$pValues] + 1e-308
450 | qtc$log.pValues <- -log10(qtc$pValues)
451 | # plot
452 | plot(qtc$pos, qtc$log.pValues, xlim = xlim, ylim = c(0, max(9, qtc$log.pValues) + 1),
453 | axes = FALSE, xlab = "", ylab = "", ...)
454 | # x
455 | for (i in seq_along(chr))
456 | axis(1, labels = FALSE, at = c(chrstartend[1, i], chrstartend[2, i]), col = col.axis)
457 | axis(1, at = colMeans(chrstartend), labels = chr, col = NA, col.axis = col.axis)
458 | mtext(xlab, 1, 2.5, col = col.axis)
459 | # y
460 | axis(2, col.axis = col.axis, col = col.axis)
461 | mtext(expression(-log[10](italic(p))), 2, 2.5, col = col.axis)
462 | }
463 |
464 |
465 | #' @title Plot markers selection frequencies of the Hierarchical Inference Test
466 | #'
467 | #' @description Plot markers selection frequencies at their
468 | #' position at the genome.
469 | #'
470 | #' @param x an object of class \code{\link{qtcatHit}}.
471 | #' @param xlab a title for the x axis.
472 | #' @param ylab a title for the y axis.
473 | #' @param col.axis colors for axis line, tick marks, and title respectively.
474 | #' @param ... other graphical parameters may also be passed as arguments to this function.
475 | #'
476 | #' @examples
477 | #' # If you want to run the examples, use:
478 | #' # example(plotSelFreq, run.dontrun = TRUE)
479 | #' \dontrun{
480 | #' # files containing example data for SNP data and the phenotype
481 | #' pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
482 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
483 | #' pdat <- read.csv(pfile, header = TRUE)
484 | #' snp <- read.snpData(gfile, sep = ",")
485 | #' clust <- qtcatClust(snp)
486 | #' geno <- qtcatGeno(snp, clust)
487 | #' pheno <- qtcatPheno(names = pdat[, 1],
488 | #' pheno = pdat[, 2],
489 | #' covariates = model.matrix(~ pdat[, 3]))
490 | #' fitted <- qtcatHit(pheno, geno)
491 | #'
492 | #' # Plot the selection frequncy of markers (loci37, loci260, and loci367 are causal)
493 | #' plotSelFreq(fitted)
494 | #' }
495 | #'
496 | #' @importFrom graphics plot axis mtext
497 | #' @export
498 | plotSelFreq <- function(x, xlab = "Chromosomes", ylab = "Sel. freq.",
499 | col.axis = NULL, ...) {
500 | stopifnot(is(x, "qtcatHit"))
501 | # make positions linear with gaps between chr's
502 | pos <- snpInfo(x)[, 1:2]
503 | chrminmax <- vapply(split(pos[, 2L], pos[, 1L]), function(x) c(min(x), max(x)), c(1, 2))
504 | chrgap <- sum(chrminmax[2L, ]) * .01
505 | chrsize <- cumsum(c(0, chrminmax[2L, -ncol(chrminmax)])) -
506 | cumsum(chrminmax[1L, ]) +
507 | cumsum(c(0, rep(chrgap, ncol(chrminmax) - 1L)))
508 | chr <- sort(unique(pos[, 1L]))
509 | for (i in seq_along(chr)) {
510 | inx <- which(pos[, 1L] == chr[i])
511 | pos[inx, 2L] <- pos[inx, 2L] + chrsize[i]
512 | }
513 | chrstartend <- vapply(split(pos[, 2L], pos[, 1L]), function(x) c(min(x), max(x)), c(1, 2))
514 | xlim <- c(chrstartend[1L, 1L] - chrgap, chrstartend[2L, ncol(chrstartend)] + chrgap)
515 | # hit lasso selection fequency
516 | inx <- which(x$selectFreq > 0)
517 | selfreq <- cbind(pos[names(x$hier), 2L][inx], x$selectFreq[inx])
518 | # hit lasso selection fequency plot
519 | plot(selfreq, xlim = xlim, ylim = c(0, 1.1), axes = FALSE, xlab = "", ylab = "", ...)
520 | # x
521 | for (i in seq_along(chr))
522 | axis(1, labels = FALSE, at = c(chrstartend[1, i], chrstartend[2, i]), col = col.axis)
523 | axis(1, at = colMeans(chrstartend), labels = chr, col = NA, col.axis = col.axis)
524 | mtext(xlab, 1, 2.5, col = col.axis)
525 | # y
526 | axis(2, at = c(0, .5, 1), col = col.axis, col.axis = col.axis)
527 | mtext(ylab, 2, 2.5, col = col.axis)
528 | }
529 |
--------------------------------------------------------------------------------
/R/qtcatPackage.R:
--------------------------------------------------------------------------------
1 | #' @title Quantitative Trait Cluster Association Test
2 | #'
3 | #' @description All SNPs are jointly associated to the phenotype and at the same time
4 | #' correlation among them is considered. Thus, correction for population structure becomes
5 | #' unnecessary, which in many cases results in a power advantages compared to other methods.
6 | #'
7 | #' @author Jonas Klasen
8 | #'
9 | #' @useDynLib qtcat
10 | #' @importFrom Rcpp evalCpp
11 | #'
12 | #' @docType package
13 | #' @name qtcat-package
14 | #' @aliases qtcat
15 | NULL
16 |
--------------------------------------------------------------------------------
/R/snpCluster.R:
--------------------------------------------------------------------------------
1 | #' @title Correlation based distance between SNPs
2 | #'
3 | #' @description This function computes a distance matrix. The distance is estimated
4 | #' as one minus the absolute value of the correlation coefficient \code{1 - abs(cor)}.
5 | #'
6 | #' @param snp an object of class \linkS4class{snpMatrix}.
7 | #'
8 | #' @details See \code{\link[stats]{dist}} for details about the output object.
9 | #' @seealso \code{\link[stats]{dist}}
10 | #'
11 | #' @examples
12 | #' # file containing example data for SNP data
13 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
14 | #' snp <- read.snpData(gfile, sep = ",")
15 | #'
16 | #' dist <- distCor(snp[, 1:10])
17 | #'
18 | #' @importFrom methods is
19 | #' @export
20 | distCor <- function(snp) {
21 | stopifnot(is(snp, "snpMatrix"))
22 | out <- corDists(snp@snpData)
23 | attr(out,"Labels") <- colnames(snp)
24 | attr(out,"Size") <- ncol(snp)
25 | attr(out,"Diag") <- FALSE
26 | attr(out,"Upper") <- FALSE
27 | attr(out,"method") <- "1-abs(cor(snp))"
28 | attr(out,"call") <- match.call()
29 | class(out) <- "dist"
30 | out
31 | }
32 |
33 |
34 | #' @title Perfect simiarity clusters of SNP
35 | #'
36 | #' @description Finds perfect similarity cluster of SNPs. This is specially usfull in
37 | #' artificial crossing populations.
38 | #'
39 | #' @param snp an object of class \linkS4class{snpMatrix}.
40 | #' @param mc.cores a positive integer for the number of cores for parallel computing. See
41 | #' \code{\link[parallel]{mclapply}} for details.
42 | #'
43 | #' @examples
44 | #' # file containing example data for SNP data
45 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
46 | #' snp <- read.snpData(gfile, sep = ",")
47 | #'
48 | #' ident <- identicals(snp)
49 | #'
50 | #' @importFrom parallel mclapply
51 | #' @importFrom methods is
52 | #' @importFrom stats optimise
53 | #' @export
54 | identicals <- function(snp, mc.cores = 1) {
55 | stopifnot(is(snp, "snpMatrix"))
56 | p <- ncol(snp@snpData)
57 | s <- optimise(function(s, p, m) p * s + p / s * (p / s - 1) / 2 * s / m,
58 | interval = c(2, p - 1), p = p, m = mc.cores)$minimum
59 | step <- as.integer(p / (s + 1))
60 | preclust <- unlist(corPreIdenticals(snp@snpData, step), FALSE)
61 | kidenticals <- mclapply(preclust, function(i, snp) corIdenticals(snp, i),
62 | snp = snp@snpData, mc.cores = mc.cores)
63 | identclust <- joinCorIdenticals(p, preclust, kidenticals)
64 | clust <- identclust[[1L]]
65 | names(clust) <- colnames(snp)
66 | out <- list(clusters = clust,
67 | medoids = colnames(snp)[identclust[[2L]] + 1])
68 | class(out) <- "identicals"
69 | out
70 | }
71 |
72 |
73 | #' @title K-medoids clustering of SNPs using randomized search
74 | #'
75 | #' @description Partitioning (clustering) into k clusters "around medoids" by randomized
76 | #' search. \code{1-abs(cor)} is used as distance between SNPs.
77 | #'
78 | #' @param snp an object of class \linkS4class{snpMatrix}.
79 | #' @param k a positive integer specifying the number of clusters, has to be greater than
80 | #' one and less than the number of SNPs.
81 | #' @param maxNeigbours a positive integer specifying the maximum number of randomized
82 | #' searches.
83 | #' @param nLocal a positive integer specifying the number of optimisation runs.
84 | #' @param mc.cores a positive integer for the number of cores for parallel computing. See
85 | #' \code{\link[parallel]{mclapply}} for details.
86 | #'
87 | #' @details The K-medoids clustering is implemented as clustering large applications based
88 | #' upon randomized search (CLARANS) algorithm (Ng and Han 2002). CLARANS is a modification
89 | #' of the partitioning around medoids (PAM) algorithm \code{\link[cluster]{pam}}. Where the
90 | #' PAM algorithm is estimating all distances between SNPs and the respective medoids,
91 | #' CLARANS is searching a random subset of the SNPs. This is independently repeated several
92 | #' times and the result which minimises the average distance the most is reported. This
93 | #' produces results close to those of the PAM algorithm (Ng and Han 2002), though the
94 | #' number of runs and the subset size have to be arbitrarily chosen by the user. The
95 | #' algorithm has two advantages: (i) the number of distance comparisons is dramatically
96 | #' reduced; and (ii) parallelizing is straightforward.
97 | #'
98 | #' @references Ng and J. Han (2002). CLARANS: A method for clustering objects for spatial
99 | #' data mining. \emph{IEEE Transactions on Knowledge and Data Engineering}.
100 | #' \url{http://dx.doi.org/10.1109/TKDE.2002.1033770}).
101 | #'
102 | #' @examples
103 | #' # file containing example data for SNP data
104 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
105 | #' snp <- read.snpData(gfile, sep = ",")
106 | #'
107 | #' clust <- clarans(snp, 3)
108 | #'
109 | #' @importFrom parallel mclapply
110 | #' @importFrom methods is
111 | #' @export
112 | clarans <- function(snp, k, maxNeigbours = 100, nLocal = 10, mc.cores = 1) {
113 | stopifnot(is(snp, "snpMatrix"))
114 | if (missing(k))
115 | stop("'k' must be specifid")
116 | if (k < 2L)
117 | stop("'k' must be at least two")
118 | # cluster optimisation by clarans in parallel
119 | clarans.i <- function(i, snp, k, maxNeigbours) {
120 | # cluster optimisation by clarans
121 | out <- corClarans(snp@snpData, k, maxNeigbours)
122 | out
123 | }
124 | out.nLocal <- mclapply(1L:nLocal, clarans.i,
125 | snp, k, maxNeigbours,
126 | mc.cores = mc.cores)
127 | opt.func <- function(i, snp) {snp[[i]][[3L]]}
128 | all.objectives <- sapply(1:nLocal, opt.func, out.nLocal)
129 | out.opt <- out.nLocal[[which.min(all.objectives)]]
130 | clusters <- out.opt[[1L]]
131 | names(clusters) <- colnames(snp)
132 | medoids <- out.opt[[2L]] + 1
133 | names(medoids) <- colnames(snp)[medoids]
134 | # output
135 | out <- list(clusters = clusters,
136 | medoids = medoids,
137 | objective = out.opt[[3L]],
138 | all.objectives = all.objectives)
139 | class(out) <- "k-medoids"
140 | out
141 | }
142 |
143 |
144 | #' @title Hierarchical clustering for big SNP data sets.
145 | #'
146 | #' @description A three step approximated hierarchical clustering of SNPs suitable to
147 | #' large data sets.
148 | #'
149 | #' @param snp an object of class \linkS4class{snpMatrix}.
150 | #' @param k a positive integer specifying the number of clusters, less than the number of
151 | #' observations.
152 | #' @param identicals logical, if zero clustering.
153 | #' @param maxNeigbours a positive integer, specifying the maximum number of randomized
154 | #' searches.
155 | #' @param nLocal a positive integer, specifying the number of optimisation runs. Columns
156 | #' have to be similar to \code{snp}.
157 | #' @param method see hclust.
158 | #' @param mc.cores a number of cores for parallel computing. See \code{mclapply} in package
159 | #' parallel for details.
160 | #' @param trace logical, if \code{TRUE} it prints current status of the program.
161 | #' @param ... additional argruments for \code{\link[fastcluster]{hclust}}
162 | #'
163 | #' @seealso clarans
164 | #'
165 | #' @examples
166 | #' # file containing example data for SNP data
167 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
168 | #' snp <- read.snpData(gfile, sep = ",")
169 | #'
170 | #' clust <- qtcatClust(snp)
171 | #'
172 | #' @importFrom fastcluster hclust
173 | #' @importFrom stats as.dendrogram
174 | #' @importFrom parallel mclapply
175 | #' @importFrom methods is
176 | #' @importFrom utils installed.packages
177 | #' @export
178 | qtcatClust <- function(snp, k, identicals = TRUE, maxNeigbours = 100, nLocal = 10,
179 | method = "complete", mc.cores = 1, trace = FALSE, ...) {
180 | stopifnot(is(snp, "snpMatrix"))
181 | if (identicals) {
182 | # identicals
183 | if (trace)
184 | cat("Step 1: Search for identicals is running\n")
185 | identicalFit <- identicals(snp, mc.cores)
186 | snp <- snp[, identicalFit$medoids]
187 | } else if (trace) {
188 | cat("Step 1: Search for identicals is switch off\n")
189 | }
190 | # CLARANS
191 | if (missing(k))
192 | k <- as.integer(ncol(snp) / 10000L)
193 | if (k >= 2L) {
194 | if (identicals && length(identicalFit$medoids) <= k * 2)
195 | stop("Number of medoids from pefect correlated clustering is < k * 2")
196 | if (trace)
197 | cat("Step 2: CLARANS is running, 'k' is:", k, "\n")
198 | clarFit <- clarans(snp, k, maxNeigbours, nLocal, mc.cores)
199 | if (trace)
200 | cat(" objectives:",
201 | format(clarFit$all.objectives, sort = TRUE, digits = 4), "\n")
202 | # if cluster < 2 add to a bigger cluster
203 | clust.inx <- seq_len(k)
204 | cluster.size <- rep(NA, k)
205 | for (i in clust.inx)
206 | cluster.size[i] <- sum(clarFit$clusters == i)
207 | smallclust <- which(cluster.size < 2)
208 | if (length(smallclust)) {
209 | clust.inx <- clust.inx[-smallclust]
210 | min.bigclust <- clust.inx[which.min(min(cluster.size[clust.inx]))]
211 | clarFit$clusters[clarFit$clusters %in% smallclust] <- min.bigclust
212 | }
213 | # HClust
214 | if (trace)
215 | cat("Step 3: HClust is running\n")
216 | hclust.sub <- function(i, snp, clarFit, method, ...) {
217 | inx.i <- which(clarFit$clusters == i)
218 | out <- as.dendrogram(hclust(distCor(snp[, inx.i]), method, ...))
219 | out
220 | }
221 | hclustFit <- mclapply(clust.inx, hclust.sub,
222 | snp, clarFit, method, ...,
223 | mc.cores = mc.cores)
224 | dendro <- do.call(merge, c(hclustFit, height = 1, adjust = "add.max"))
225 | } else {
226 | # HClust
227 | if (trace)
228 | cat("Step 2: CLARANS is switch off\nStep 3: HClust is running\n")
229 | dendro <- as.dendrogram(hclust(distCor(snp), method, ...))
230 | }
231 | if (identicals) {
232 | out <- list(dendrogram = dendro,
233 | clusters = identicalFit$clusters,
234 | medoids = identicalFit$medoids)
235 | } else {
236 | medos <- labels(dendro)
237 | clust <- 1:length(medos)
238 | names(clust) <- medos
239 | out <- list(dendrogram = dendro,
240 | clusters = clust,
241 | medoids = medos)
242 | }
243 | class(out) <- "qtcatClust"
244 | out
245 | }
246 |
247 |
248 | #' @title Cut a qtcatClust object
249 | #'
250 | #' @description Cut a qtcatClust object at an specific height.
251 | #'
252 | #' @param snp an object of class \linkS4class{snpMatrix}.
253 | #' @param snpClust an object of class \code{\link{qtcatClust}}.
254 | #' @param absCor a cutting height in absolute value of correlation.
255 | #'
256 | #' @examples
257 | #' # file containing example data for SNP data
258 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
259 | #' snp <- read.snpData(gfile, sep = ",")
260 | #' clust <- qtcatClust(snp)
261 | #'
262 | #' cclust <- cutClust(snp, clust, .5)
263 | #'
264 | #' @importFrom methods is
265 | #' @importFrom stats na.omit
266 | #' @export
267 | cutClust <- function(snp, snpClust, absCor = 1) {
268 | stopifnot(is(snp, "snpMatrix"))
269 | stopifnot(is(snpClust, "qtcatClust"))
270 | stopifnot(!missing(absCor))
271 | dend <- snpClust$dendrogram
272 | clust <- snpClust$clusters
273 | if ((1 - absCor) >= attr(dend, "height")) {
274 | stop("'absCor' outside of range")
275 | } else {
276 | cut.dend <- cut(dend, h = 1 - absCor)
277 | clust.member <- function(i, dlist, clust) {
278 | namesclust <- names(clust)[clust %in% unique(clust[labels(dlist[[i]])])]
279 | clusti <- rep(i, length(namesclust))
280 | names(clusti) <- namesclust
281 | return(clusti)
282 | }
283 | clust <- unlist(lapply(1:length(cut.dend$lower), clust.member, cut.dend$lower, clust))
284 | medo <- names(clust)[corMedoids(snp[, names(clust)]@snpData, clust)]
285 | dend <- rename.leafs(cut.dend$upper, medo)
286 | clust <- clust[na.omit(match(colnames(snp), names(clust)))]
287 | }
288 | out <- list(dendrogram = dend,
289 | clusters = clust,
290 | medoids = medo)
291 | class(out) <- "qtcatClust"
292 | out
293 | }
294 |
295 |
296 | #' @title Rename dendrogram leafs
297 | #'
298 | #' @description Rename dendrogram leafs.
299 | #'
300 | #' @param dend a dendrogram.
301 | #' @param labels a vector of new names.
302 | #'
303 | #' @importFrom stats dendrapply is.leaf
304 | #' @keywords internal
305 | rename.leafs <- function(dend, labels){
306 | dendlabel <- function(n) {
307 | if(is.leaf(n)) {
308 | i <<- i + 1L
309 | attr(n, "label") <- labels[i]
310 | }
311 | n
312 | }
313 | i <- 0L
314 | dendrapply(dend, dendlabel)
315 | }
316 |
--------------------------------------------------------------------------------
/R/snpData.R:
--------------------------------------------------------------------------------
1 | #' @title Read SNP tables as a snpMatrix object
2 | #'
3 | #' @description Reads a file in table format and as a \linkS4class{snpMatrix} object.
4 | #'
5 | #' @param file the name of the file which the data are to be read from. If it does not
6 | #' contain an absolute path, the file name is relative to the current working directory,
7 | #' \code{getwd()}. Tilde-expansion is performed where supported.
8 | #' @param sep a field separator character. Values on each line of the file are separated
9 | #' by this character.
10 | #' @param quote the set of quoting characters. To disable quoting altogether, use
11 | #' \code{quote = ""}.
12 | #' @param na.string a string which is interpreted as NA value.
13 | #' @param nrows a integer, the maximum number of rows to read.
14 | #'
15 | #' @examples
16 | #' # file containing example data for SNP data
17 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
18 | #' snp <- read.snpData(gfile, sep = ",")
19 | #'
20 | #' @importFrom methods new
21 | #' @export
22 | read.snpData <- function(file, sep = " ", quote = "\"",
23 | na.string = "NA", nrows = -1L) {
24 | if (!file.exists(file))
25 | stop("No such file or directory")
26 | file <- normalizePath(file)
27 | testRead <- strsplit(readLines(file, n = 2L), sep)
28 | if (sep != "")
29 | testRead <- lapply(testRead, function(x, sep) gsub(sep, '',x), sep = sep)
30 | if (length(testRead[[1L]]) <= 3L)
31 | stop("In line one the separator character 'sep' doesn't exist")
32 | if (length(testRead[[1L]]) != length(testRead[[2L]]))
33 | stop("Line one and two are of differnt length")
34 | if (!identical(unique(testRead[[1L]]), testRead[[1L]]))
35 | stop("Column names are not unique")
36 | firstCols <- tolower(substring(testRead[[1L]][1L:3L], 1L, 3L))
37 | if (identical(firstCols, c("nam", "chr", "pos"))) {
38 | rowNames <- TRUE
39 | snp1 <- testRead[[2L]][-1L:-3L]
40 | } else if (identical(firstCols[1L:2L], c("chr", "pos"))) {
41 | rowNames <- FALSE
42 | snp1 <- testRead[[2L]][-1L:-2L]
43 | } else {
44 | stop("Either the first three columns contain 'names', 'chr', and 'pos' or alternatively the first two columns contain 'chr', and 'pos'")
45 | }
46 | if (any(nchar(snp1) != 2L))
47 | stop("Every position in the SNP-matrix has to be specified by two characters, missing values are not allowed")
48 | if (na.string != "") {
49 | na.string <- as.character(na.string)
50 | na.string[is.na(na.string)] <- "NA"
51 | }
52 | temp <- read_snpData(file, sep, quote, rowNames, na.string, nrows)
53 | if (!length(temp$lociNames)) {
54 | lociNames <- paste0("loci", seq_len(ncol(temp$snpData)))
55 | } else {
56 | lociNames <- make.unique(temp$lociNames)
57 | }
58 | chr <- suppressWarnings(as.numeric(temp$chr))
59 | if (any(is.na(chr)))
60 | chr <- temp$chr
61 | out <- new("snpMatrix",
62 | snpData = temp$snpData,
63 | snpInfo = data.frame(chr = chr,
64 | pos = temp$pos,
65 | allele = t(temp$alleles),
66 | row.names = lociNames),
67 | dim = dim(temp$snpData),
68 | dimnames = list(temp$indivNames, lociNames))
69 | out
70 | }
71 |
72 |
73 | #' @title A snpMatrix constructor
74 | #'
75 | #' @description Constructs a \linkS4class{snpMatrix} object from the given data.
76 | #'
77 | #' @param x a matrix with individuals in rows and SNPs in columns.
78 | #' @param chr a vector with chromosoms at which SNPs are located.
79 | #' @param pos a vector with genomic positions at which SNPs are located.
80 | #' @param alleleCoding a coding scheme of \code{x} for hom (AA), het (AB), and hom (BB).
81 | #' @param allele.1 labels of allele one, for each SNP.
82 | #' @param allele.2 labels of allele two, for each SNP.
83 | #'
84 | #' @importFrom methods new
85 | #' @importFrom stats na.omit
86 | #' @export
87 | as.snpMatrix <- function(x, chr, pos, alleleCoding = c(-1, 0, 1),
88 | allele.1 = NULL, allele.2 = NULL) {
89 | if (!is.matrix(x))
90 | x <- as.matrix(x)
91 | if (is.null(colnames(x)))
92 | stop("Column names are missing for 'x'")
93 | if (ncol(x) < 2L)
94 | stop("'x' has less than two columns")
95 | if (missing(chr))
96 | stop("'chr' must be specified")
97 | if (missing(pos))
98 | stop("'pos' must be specified")
99 | if (!is.vector(alleleCoding))
100 | stop("'alleleCoding' is not a vector")
101 | x.allele <- na.omit(unique(c(x)))
102 | if (!all(c(x.allele %in% alleleCoding, alleleCoding %in% x.allele)))
103 | stop("'alleleCoding' do not match to 'x'")
104 | if (is.null(allele.1) || is.null(allele.2)) {
105 | allele.1 <- rep("A", ncol(x))
106 | allele.2 <- rep("B", ncol(x))
107 | }
108 | if (is.null(rownames(x))) {
109 | indiv.names <- paste0("indiv", seq_len(nrow(x)))
110 | } else {
111 | indiv.names <- rownames(x)
112 | }
113 | loci.names <- colnames(x)
114 | attr(x, 'dimnames') <- NULL
115 | nLabels <- length(alleleCoding)
116 | if (nLabels == 2L) {
117 | newLabels <- as.raw(c(1, 3))
118 | } else if (nLabels == 3L) {
119 | newLabels <- as.raw(c(1, 2, 3))
120 | }
121 | y <- matrix(raw(0), nrow(x), ncol(x))
122 | for (i in 1:nLabels)
123 | y[alleleCoding[i] == x] <- newLabels[i]
124 | out <- new("snpMatrix",
125 | snpData = y,
126 | snpInfo = data.frame(chr = chr,
127 | pos = pos,
128 | allele.1 = allele.1,
129 | allele.2 = allele.2,
130 | row.names = loci.names),
131 | dim = dim(y),
132 | dimnames = list(indiv.names, loci.names))
133 | out
134 | }
135 |
136 |
137 | #' @title Subsetting snpMatrix
138 | #'
139 | #' @description Subsetting an object of class \linkS4class{snpMatrix}.
140 | #'
141 | #' @param x an object of class \linkS4class{snpMatrix}.
142 | #' @param i a indices specifying elements to extract or replace. Indices are booleans,
143 | #' numeric or character vectors.
144 | #' @param j indices specifying elements to extract or replace. Indices are booleans,
145 | #' numeric or character vectors.
146 | #' @param ... not implemented.
147 | #' @param drop not implemented.
148 | #'
149 | #' @importFrom methods setMethod signature new
150 | #' @export
151 | setMethod("[", signature(x = "snpMatrix", i = "ANY", j = "ANY", drop = "missing"),
152 | function(x, i, j, ..., drop) {
153 | if (!missing(i)) {
154 | if (is.character(i)) {
155 | i <- match(i, rownames(x))
156 | }
157 | }
158 | if (!missing(j)) {
159 | if (is.character(j)) {
160 | j <- match(j, colnames(x))
161 | }
162 | }
163 | snpData <- x@snpData[i, j, drop = FALSE]
164 | out <- new("snpMatrix",
165 | snpData = snpData,
166 | snpInfo = x@snpInfo[j, ],
167 | dim = dim(snpData),
168 | dimnames = list(rownames(x)[i], colnames(x)[j]))
169 | out
170 | }
171 | )
172 |
173 |
174 | #' @title As matrix method for snpMatrix
175 | #'
176 | #' @description As matrix method for an object of class \linkS4class{snpMatrix}.
177 | #'
178 | #' @param x an object of class \linkS4class{snpMatrix}.
179 | #'
180 | #' @examples
181 | #' # file containing example data for SNP data
182 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
183 | #' snp <- read.snpData(gfile, sep = ",")
184 | #' snpmat <- as.matrix(snp)
185 | #'
186 | #' @importFrom methods setMethod signature
187 | #' @export
188 | setMethod("as.matrix", signature(x = "snpMatrix"),
189 | function(x) {
190 | out <- design(x@snpData)
191 | dimnames(out) <- dimnames(x)
192 | out
193 | }
194 | )
195 |
196 |
197 | #' @title Get position from snpMatrix
198 | #'
199 | #' @description Genetic position info from an object of class
200 | #' \linkS4class{snpMatrix}.
201 | #'
202 | #' @param object an object of class \linkS4class{snpMatrix}.
203 | #'
204 | #' @examples
205 | #' # file containing example data for SNP data
206 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
207 | #' snp <- read.snpData(gfile, sep = ",")
208 | #' info <- snpInfo(snp)
209 | #'
210 | #' @importFrom methods setMethod signature
211 | #' @export
212 | setMethod("snpInfo", signature(object = "snpMatrix"),
213 | function(object) {
214 | out <- object@snpInfo
215 | if (is.null(out)) {
216 | cat("No position information available")
217 | }
218 | out
219 | }
220 | )
221 |
222 |
223 | #' @title Allele frequency
224 | #'
225 | #' @description Allele frequency an object of class \linkS4class{snpMatrix}.
226 | #'
227 | #' @param x an object of class \linkS4class{snpMatrix}.
228 | #' @param maf logical, if true minor allele frequency (default), other ways allele
229 | #' frequency of 'allele.1'.
230 | #'
231 | #' @examples
232 | #' # file containing example data for SNP data
233 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
234 | #' snp <- read.snpData(gfile, sep = ",")
235 | #' af <- alleleFreq(snp)
236 | #'
237 | #' @importFrom methods setMethod signature
238 | #' @export
239 | setMethod("alleleFreq", signature(x = "snpMatrix"),
240 | function(x, maf = TRUE) {
241 | out <- afreq(x@snpData, maf)
242 | names(out) <- colnames(x)
243 | out
244 | }
245 | )
246 |
247 |
248 | #' @title Heterozygosity
249 | #'
250 | #' @description Heterozygosity an object of class \linkS4class{snpMatrix}.
251 | #'
252 | #' @param x an object of class \linkS4class{snpMatrix}.
253 | #' @param dim a integer for dimension. 1 (default) is for rows (individuals), 2 is for
254 | #' columns (SNPs).
255 | #'
256 | #' @examples
257 | #' # file containing example data for SNP data
258 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
259 | #' snp <- read.snpData(gfile, sep = ",")
260 | #' hf1 <- hetFreq(snp, 1)
261 | #' hf2 <- hetFreq(snp, 2)
262 | #'
263 | #' @importFrom methods setMethod signature
264 | #' @export
265 | setMethod("hetFreq", signature(x = "snpMatrix"),
266 | function(x, dim = 1) {
267 | if (dim != 1L && dim != 2L)
268 | stop("'dim' must be '1' or '2'")
269 | out <- hetfreq(x@snpData, dim)
270 | if (dim == 1L)
271 | names(out) <- rownames(x)
272 | else if (dim == 2L)
273 | names(out) <- colnames(x)
274 | return(out)
275 | }
276 | )
277 |
278 |
279 | #' @title NA frequency
280 | #'
281 | #' @description NA frequency in an object of class \linkS4class{snpMatrix}.
282 | #'
283 | #' @param x an object of class \linkS4class{snpMatrix}.
284 | #' @param dim a integer for dimension. 1 (default) is for rows (individuals), 2 is for
285 | #' columns (SNPs).
286 | #'
287 | #' @examples
288 | #' # file containing example data for SNP data
289 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
290 | #' snp <- read.snpData(gfile, sep = ",")
291 | #' na1 <- naFreq(snp, 1)
292 | #' na2 <- naFreq(snp, 2)
293 | #'
294 | #' @importFrom methods setMethod signature
295 | #' @export
296 | setMethod("naFreq", signature(x = "snpMatrix"),
297 | function(x, dim = 1) {
298 | if (dim != 1L && dim != 2L)
299 | stop("'dim' must be '1' or '2'")
300 | out <- nafreq(x@snpData, dim)
301 | if (dim == 1L)
302 | names(out) <- rownames(x)
303 | else if (dim == 2L)
304 | names(out) <- colnames(x)
305 | return(out)
306 | }
307 | )
308 |
--------------------------------------------------------------------------------
/R/snpImpute.R:
--------------------------------------------------------------------------------
1 | #' @title Impute allele in formation to SNPs with missing data
2 | #'
3 | #' @description Uses neighbor SNPs from the clustering hierarchy to impute alleles to
4 | #' positions with missing values.
5 | #'
6 | #' @param snp an object of class \linkS4class{snpMatrix}.
7 | #' @param snpClust an object of class \code{\link{qtcatClust}}.
8 | #' @param min.absCor a minimum value of correlation. If missing values still exist if this
9 | #' point in the hierarchy is reached, imputing is done via allele frequencies.
10 | #' @param mc.cores a number of cores for parallelising. Theoretical maximum is
11 | #' \code{'B'}. For details see \code{\link[parallel]{mclapply}}.
12 | #'
13 | #' @examples
14 | #' # file containing example data for SNP data
15 | #' gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
16 | #' snp1 <- read.snpData(gfile, sep = ",")
17 | #' # delete SNP information from Matrix, 33.33% NAs (-> 66.67% SNP info)
18 | #' snp2 <- snp1
19 | #' nainx <- sample(1:length(snp2@snpData), length(snp2@snpData) / 3)
20 | #' snp2@snpData[nainx] <- as.raw(0)
21 | #' # clustering
22 | #' snp2clust <- qtcatClust(snp2)
23 | #'
24 | #' # imputing
25 | #' snp3 <- imputeSnpMatrix(snp2, snp2clust)
26 | #' # comparison of the full and the imputed data set
27 | #' snpmat1 <- as.matrix(snp1)
28 | #' snpmat3 <- as.matrix(snp3)
29 | #' (1 - sum(abs(snpmat1- snpmat3)) / length(snpmat1)) * 100
30 | #'
31 | #' @importFrom hit as.hierarchy
32 | #' @export
33 | imputeSnpMatrix <- function(snp, snpClust, min.absCor = .1, mc.cores = 1) {
34 | stopifnot(is(snp, "snpMatrix"))
35 | stopifnot(is(snpClust, "qtcatClust"))
36 | snpnames <- colnames(snp)
37 | hier <- as.hierarchy(snpClust$dendrogram, names = snpnames)
38 | snp <- imputeMedoids(snp, snpClust, hier, min.absCor, mc.cores)
39 | # impute non medoid SNPs (if exist)
40 | nonMedo <- which(!(names(snpClust$clusters) %in% snpClust$medoids))
41 | if (length(nonMedo)) {
42 | flipAlleles <- as.numeric(alleleFreq(snp, FALSE) <= .5)
43 | snpList <- list()
44 | for (i in 1:ncol(snp)) {
45 | snpList[[i]] <- snp@snpData[, i]
46 | if (i %in% nonMedo) {
47 | m <- which(snpClust$medoids[snpClust$clusters[i]] == snpnames)
48 | js <- which(snpList[[i]] == is.raw(0))
49 | jAllele <- snp@snpData[js, m]
50 | if (flipAlleles[i] != flipAlleles[m]) {
51 | j1 <- which(jAllele == as.raw(1))
52 | j3 <- which(jAllele == as.raw(3))
53 | jAllele[j1] <- as.raw(3)
54 | jAllele[j3] <- as.raw(1)
55 | }
56 | snpList[[i]][js] <- jAllele
57 | }
58 | }
59 | snp@snpData <- do.call(cbind, snpList)
60 | }
61 | snp
62 | }
63 |
64 |
65 | #' @title Impute missing information at medoid SNPs
66 | #'
67 | #' @description Uses neighboring SNPs in the clustering hierarchy to impute alleles to
68 | #' positions with missing values at medoid SNPs.
69 | #'
70 | #' @param snp an object of class \linkS4class{snpMatrix}.
71 | #' @param snpClust an object of class \code{\link{qtcatClust}}.
72 | #' @param hier an object of class hierarchy.
73 | #' @param min.absCor a minimum value of correlation. If missing values still exist if this
74 | #' point in the hierarchy is reached, imputing is done via allele frequencies.
75 | #' @param mc.cores a number of cores for parallelising. Theoretical maximum is
76 | #' \code{'B'}. For details see \code{\link[parallel]{mclapply}}.
77 | #'
78 | #' @importFrom parallel mclapply
79 | #' @keywords internal
80 | imputeMedoids <- function(snp, snpClust, hier, min.absCor = .25, mc.cores = 1) {
81 | hierLeafs <- which(sapply(hier, function(x) is.null(attr(x, which = "subset"))))
82 | naSnps <- which(naFreq(snp, 2) > 0 & colnames(snp) %in% labels(hier))
83 | medoSnps <- names(snpClust$clusters) %in% snpClust$medoids
84 | flipAlleles <- as.numeric(alleleFreq(snp, FALSE) <= .5)
85 | # run thru all SNPs
86 | snpList <- mclapply(1:ncol(snp), imputeSnp,
87 | snp, hier, hierLeafs, snpClust$clusters, medoSnps,
88 | naSnps, flipAlleles, min.absCor,
89 | mc.cores = mc.cores)
90 | snp@snpData <- do.call(cbind, snpList)
91 | snp
92 | }
93 |
94 |
95 | #' @title Impute missing information at a medoid SNPs from a group of neighbors
96 | #'
97 | #' @description Uses neighboring SNPs in the clustering hierarchy to impute as many as
98 | #' possible alleles to positions with missing values at medoid SNPs.
99 | #'
100 | #' @param inxSnpOfInt a vertor of the snp of interest.
101 | #' @param snp an object of class \linkS4class{snpMatrix}.
102 | #' @param hier an object of class hierarchy.
103 | #' @param hierLeafs a vector of leafs of the hierarchy.
104 | #' @param clust a named vector of clusters.
105 | #' @param medoSnps a vector of medo turue o false.
106 | #' @param naSnps a vector of NA indeces.
107 | #' @param flipAlleles a vertor of telling for each SNP if allele one has allele freq. > 0.5
108 | #' or not.
109 | #' @param min.absCor a minimum value of correlation. If missing values still exist if this
110 | #' point in the hierarchy is reached, imputing is done via allele frequencies.
111 | #'
112 | #' @keywords internal
113 | imputeSnp <- function(inxSnpOfInt, snp, hier, hierLeafs, clust, medoSnps, naSnps,
114 | flipAlleles, min.absCor) {
115 | snpOfInt <- snp@snpData[, inxSnpOfInt]
116 | if (medoSnps[inxSnpOfInt] && (inxSnpOfInt %in% naSnps)) {
117 | unsolved <- TRUE
118 | inxSnpsNotComp <- inxSnpOfInt
119 | # check in clusters of identicals
120 | inxSnpGrp <- which(clust == clust[inxSnpOfInt])
121 | inxSnpsNotComp <- inxSnpGrp[!(inxSnpGrp %in% inxSnpsNotComp)]
122 | if (length(inxSnpsNotComp)) {
123 | temp <- imputeSnpIter(snp, snpOfInt, inxSnpsNotComp,
124 | flipAlleles[inxSnpOfInt], flipAlleles)
125 | snpOfInt <- temp[[1L]]
126 | unsolved <- temp[[2L]]
127 | }
128 | if (unsolved) {
129 | # run thru the heirarchy until NAs of the SNP are filled with information or the
130 | # height threshold is reached
131 | hierSnpOfInt <- hierLeafs[sapply(hier[hierLeafs], function(x) any(x == inxSnpOfInt))]
132 | super <- attr(hier[[hierSnpOfInt]], "superset")
133 | inxSnpGrp <- hier[[super]]
134 | h <- attr(inxSnpGrp, "height")
135 | while (unsolved && h <= (1 - min.absCor)) {
136 | inxSnpsNotComp <- c(inxSnpsNotComp, inxSnpsNotComp)
137 | inxSnpsNotComp <- inxSnpGrp[!(inxSnpGrp %in% inxSnpsNotComp)]
138 | if (length(inxSnpsNotComp)) {
139 | temp <- imputeSnpIter(snp, snpOfInt, inxSnpsNotComp,
140 | flipAlleles[inxSnpOfInt], flipAlleles)
141 | snpOfInt <- temp[[1L]]
142 | unsolved <- temp[[2L]]
143 | }
144 | super <- attr(hier[[super]], "superset")
145 | if (is.null(super))
146 | break
147 | inxSnpGrp <- hier[[super]]
148 | h <- attr(inxSnpGrp, "height")
149 | }
150 | }
151 | # if height threshold is reached use alle frequency for random imputing
152 | if (unsolved) {
153 | js <- which(snpOfInt == as.raw(0L))
154 | alleleNo <- table(as.integer(snpOfInt), exclude = 0L)
155 | alleles <- as.raw(names(alleleNo))
156 | prob <- alleleNo / sum(alleleNo)
157 | snpOfInt[js] <- sample(alleles, length(js), TRUE, prob)
158 | }
159 | }
160 | snpOfInt
161 | }
162 |
163 |
164 | #' @title Impute missing information at a medoid SNPs from a group of neighbors
165 | #'
166 | #' @description Uses neighboring SNPs in the clustering hierarchy to impute as many as
167 | #' possible alleles to positions with missing values at medoid SNPs.
168 | #'
169 | #' @param snp an object of class \linkS4class{snpMatrix}.
170 | #' @param snpOfInt a vertor of the snp of interest.
171 | #' @param inxSnpsToComp a index of neighbors.
172 | #' @param snpOfIntFlip flip status of the snp of interest.
173 | #' @param flipAlleles a vertor of telling for each SNP if allele one has allele freq. > 0.5
174 | #' or not.
175 | #' @param min.absCor a minimum value of correlation. If missing values still exist if this
176 | #' point in the hierarchy is reached, imputing is done via allele frequencies.
177 | #'
178 | #' @keywords internal
179 | imputeSnpIter <- function(snp, snpOfInt, inxSnpsToComp, snpOfIntFlip, flipAlleles) {
180 | unsolved <- TRUE
181 | n <- length(inxSnpsToComp)
182 | i <- 1L
183 | while (unsolved && i <= n) {
184 | js <- which(snpOfInt == as.raw(0L))
185 | jAllele <- snp@snpData[js, inxSnpsToComp[i]]
186 | if (snpOfIntFlip != flipAlleles[inxSnpsToComp[i]]) {
187 | j1 <- which(jAllele == as.raw(1L))
188 | j3 <- which(jAllele == as.raw(3L))
189 | jAllele[j1] <- as.raw(3L)
190 | jAllele[j3] <- as.raw(1L)
191 | }
192 | snpOfInt[js] <- jAllele
193 | unsolved <- any(jAllele == as.raw(0L))
194 | i <- i + 1L
195 | }
196 | list(snpOfInt, unsolved)
197 | }
198 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 | --------------------------------------------------------------------------------
9 |
10 | # Quantitative Trait Cluster Association Test
11 | This project has no active development and is unmaintained.
12 |
13 |
14 | ## Description:
15 | All SNPs are jointly associated to the phenotype and at the same time correlation among
16 | them is considered. Thus, correction for population structure becomes unnecessary, which in
17 | many cases results in a power advantages compared to other methods.
18 |
19 | **Klasen, J. R. et al. (2016)**. *A multi-marker association method for genome-wide
20 | association studies without the need for population structure correction*. Nature
21 | Communications. [Paper](http://www.nature.com/articles/ncomms13299)
22 |
23 | ## Installation:
24 | The package can be installed from an R console via [`devtools`](https://github.com/hadley/devtools#updating-to-the-latest-version-of-devtools)
25 | (If you haven't yet installed `devtools` please do so first).
26 |
27 | ```R
28 | # install.packages("devtools")
29 | devtools::install_github("QTCAT/qtcat")
30 |
31 | ```
32 |
33 | ## Example:
34 | The `qtcatQtc`-function example gives an overview of the functionality of
35 | the package and can be accessed once the package is loaded.
36 |
37 | ```R
38 | library(qtcat)
39 | example(qtcatQtc, run.dontrun = TRUE)
40 |
41 | ```
42 | The data used in the example can be found under [inst/extdata](https://github.com/QTCAT/qtcat/tree/master/inst/extdata)
43 |
44 | There is also a [Arabidopsis example](https://github.com/QTCAT/qtcat.data) available.
45 |
46 | --------------------------------------------------------------------------------
47 |
48 | [](https://www.gnu.org/licenses/gpl-2.0.html)
49 | © 2016 JR Klasen
50 |
--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
1 | # DO NOT CHANGE the "init" and "install" sections below
2 |
3 | # Download script file from GitHub
4 | init:
5 | ps: |
6 | $ErrorActionPreference = "Stop"
7 | Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1"
8 | Import-Module '..\appveyor-tool.ps1'
9 |
10 | install:
11 | ps: Bootstrap
12 |
13 | # Adapt as necessary starting from here
14 |
15 | environment:
16 | global:
17 | WARNINGS_ARE_ERRORS: 1
18 |
19 | matrix:
20 | - R_VERSION: release
21 | R_ARCH: x64
22 |
23 | - R_VERSION: patched
24 |
25 | build_script:
26 | - travis-tool.sh install_deps
27 | - travis-tool.sh install_github QTCAT/hit
28 |
29 | test_script:
30 | - travis-tool.sh run_tests
31 |
32 | on_failure:
33 | - 7z a failure.zip *.Rcheck\*
34 | - appveyor PushArtifact failure.zip
35 |
36 | artifacts:
37 | - path: '*.Rcheck\**\*.log'
38 | name: Logs
39 |
40 | - path: '*.Rcheck\**\*.out'
41 | name: Logs
42 |
43 | - path: '*.Rcheck\**\*.fail'
44 | name: Logs
45 |
46 | - path: '*.Rcheck\**\*.Rout'
47 | name: Logs
48 |
49 | - path: '\*_*.tar.gz'
50 | name: Bits
51 |
52 | - path: '\*_*.zip'
53 | name: Bits
54 |
--------------------------------------------------------------------------------
/inst/extdata/phenodata.csv:
--------------------------------------------------------------------------------
1 | name,phenotype,replication
2 | Indiv1,-0.712864882314292,I
3 | Indiv2,4.05700145305166,I
4 | Indiv3,2.68457752825909,I
5 | Indiv4,-0.978722003892421,I
6 | Indiv5,2.11833460559996,I
7 | Indiv6,4.69596379863478,I
8 | Indiv7,1.81492380500098,I
9 | Indiv8,8.33973545250998,I
10 | Indiv9,3.21727105571782,I
11 | Indiv10,1.75615681901381,I
12 | Indiv11,2.20340927227642,I
13 | Indiv12,3.29008788696637,I
14 | Indiv13,5.22242372780827,I
15 | Indiv14,3.9616977925181,I
16 | Indiv15,6.4184630775582,I
17 | Indiv16,2.52617864225462,I
18 | Indiv17,2.71675134747193,I
19 | Indiv18,9.08630499778771,I
20 | Indiv19,3.01776165508853,I
21 | Indiv20,2.12967794232666,I
22 | Indiv21,6.19881599227966,I
23 | Indiv22,2.60912725791431,I
24 | Indiv23,6.74137903134634,I
25 | Indiv24,5.43854561008488,I
26 | Indiv25,5.63126674897179,I
27 | Indiv26,5.3700172742071,I
28 | Indiv27,2.97767963958376,I
29 | Indiv28,5.02236976056541,I
30 | Indiv29,8.73017558942648,I
31 | Indiv30,7.67488336483054,I
32 | Indiv31,4.41931825555875,I
33 | Indiv32,5.76329762287536,I
34 | Indiv33,5.69297223264722,I
35 | Indiv34,4.43795994668944,I
36 | Indiv35,0.546417450296849,I
37 | Indiv36,5.14011399361257,I
38 | Indiv37,5.06931125177772,I
39 | Indiv38,2.6091396053827,I
40 | Indiv39,5.27906736993309,I
41 | Indiv40,7.59780299611099,I
42 | Indiv41,3.6971545640507,I
43 | Indiv42,4.30324604452561,I
44 | Indiv43,3.59785828183773,I
45 | Indiv44,5.09832009873271,I
46 | Indiv45,3.76780907857732,I
47 | Indiv46,0.844484552582176,I
48 | Indiv47,4.74916534660669,I
49 | Indiv48,5.67195293439532,I
50 | Indiv49,2.93964234735589,I
51 | Indiv50,4.90681722956387,I
52 | Indiv51,3.74911685226859,I
53 | Indiv52,1.92666742002455,I
54 | Indiv53,4.60404498058726,I
55 | Indiv54,3.53578869920731,I
56 | Indiv55,4.36984245698348,I
57 | Indiv56,3.26681032690425,I
58 | Indiv57,4.18302568636384,I
59 | Indiv58,3.70971187197379,I
60 | Indiv59,2.27999270087654,I
61 | Indiv60,3.05181399805921,I
62 | Indiv61,2.43549816911518,I
63 | Indiv62,2.34800409902198,I
64 | Indiv63,5.31821730546112,I
65 | Indiv64,-0.713122051020195,I
66 | Indiv65,4.50338394253097,I
67 | Indiv66,4.62922637029452,I
68 | Indiv67,2.24256710689108,I
69 | Indiv68,3.59909039731827,I
70 | Indiv69,6.20966957535184,I
71 | Indiv70,4.00384637556956,I
72 | Indiv71,2.6824335168794,I
73 | Indiv72,6.83166810179122,I
74 | Indiv73,2.49389386433214,I
75 | Indiv74,2.96109283271708,I
76 | Indiv75,5.90123212589518,I
77 | Indiv76,4.42447675875507,I
78 | Indiv77,4.96833384873987,I
79 | Indiv78,7.12738639539622,I
80 | Indiv79,4.42632757597075,I
81 | Indiv80,5.27826128836297,I
82 | Indiv81,4.2832569668242,I
83 | Indiv82,2.35271359835162,I
84 | Indiv83,6.75091008120157,I
85 | Indiv84,3.39601817943395,I
86 | Indiv85,0.792062124318047,I
87 | Indiv86,4.49112716920309,I
88 | Indiv87,5.48080731018814,I
89 | Indiv88,5.53831911353581,I
90 | Indiv89,3.8073404958687,I
91 | Indiv90,8.11792267567341,I
92 | Indiv91,5.24450049724668,I
93 | Indiv92,8.06636067197744,I
94 | Indiv93,3.76240790598956,I
95 | Indiv94,4.57106970441781,I
96 | Indiv95,0.930562501758845,I
97 | Indiv96,5.89207888898974,I
98 | Indiv97,4.76510770162231,I
99 | Indiv98,6.13315220689909,I
100 | Indiv99,5.15975639021241,I
101 | Indiv100,4.97858970802809,I
102 | Indiv1,0.176206906929316,II
103 | Indiv2,3.85129791007974,II
104 | Indiv3,0.89679511088741,II
105 | Indiv4,3.50501419315986,II
106 | Indiv5,2.74144800827057,II
107 | Indiv6,2.89870296149524,II
108 | Indiv7,2.83813356575242,II
109 | Indiv8,6.69393926686158,II
110 | Indiv9,0.445190879937969,II
111 | Indiv10,1.81268165056905,II
112 | Indiv11,4.18471268209546,II
113 | Indiv12,3.54037632849923,II
114 | Indiv13,7.54041530100953,II
115 | Indiv14,5.86490355924926,II
116 | Indiv15,0.541745439242708,II
117 | Indiv16,2.69319603761256,II
118 | Indiv17,4.212108002269,II
119 | Indiv18,5.48944504498334,II
120 | Indiv19,4.55532775386045,II
121 | Indiv20,4.42315823029467,II
122 | Indiv21,4.20476815196306,II
123 | Indiv22,7.19077440353425,II
124 | Indiv23,7.85927666811885,II
125 | Indiv24,4.7649811402712,II
126 | Indiv25,5.33833106226095,II
127 | Indiv26,9.51369408166689,II
128 | Indiv27,2.92203513544632,II
129 | Indiv28,6.32725491154529,II
130 | Indiv29,6.95849521528492,II
131 | Indiv30,4.49056269636047,II
132 | Indiv31,4.64224668507529,II
133 | Indiv32,6.49903099179706,II
134 | Indiv33,2.76900451424559,II
135 | Indiv34,5.10198462028238,II
136 | Indiv35,1.33235508902282,II
137 | Indiv36,3.99616006339745,II
138 | Indiv37,6.9482881756113,II
139 | Indiv38,2.21201425259469,II
140 | Indiv39,3.30168447239249,II
141 | Indiv40,5.70770160656621,II
142 | Indiv41,5.51655628789299,II
143 | Indiv42,3.22099848037914,II
144 | Indiv43,6.83821240540428,II
145 | Indiv44,1.05611152692071,II
146 | Indiv45,2.3793891845802,II
147 | Indiv46,3.486717253706,II
148 | Indiv47,4.45135169448376,II
149 | Indiv48,1.70633725047442,II
150 | Indiv49,7.78933321655033,II
151 | Indiv50,5.93711762234058,II
152 | Indiv51,2.63282544384585,II
153 | Indiv52,5.12336178663091,II
154 | Indiv53,3.13670189353915,II
155 | Indiv54,0.575601823259657,II
156 | Indiv55,5.3577809898076,II
157 | Indiv56,3.64994372110131,II
158 | Indiv57,1.95200830822741,II
159 | Indiv58,2.20290632954696,II
160 | Indiv59,3.98471425274528,II
161 | Indiv60,1.46737912491768,II
162 | Indiv61,2.92074054315639,II
163 | Indiv62,3.92929791664308,II
164 | Indiv63,2.19530471281698,II
165 | Indiv64,1.13804915005779,II
166 | Indiv65,4.15958191761742,II
167 | Indiv66,3.99613481122336,II
168 | Indiv67,1.52917842389308,II
169 | Indiv68,3.82975810221317,II
170 | Indiv69,2.06278495341624,II
171 | Indiv70,5.73101019063431,II
172 | Indiv71,5.43947438490607,II
173 | Indiv72,3.03603922924573,II
174 | Indiv73,2.288226558559,II
175 | Indiv74,2.09090076017277,II
176 | Indiv75,5.00102930990027,II
177 | Indiv76,5.97477767911573,II
178 | Indiv77,5.70601446968208,II
179 | Indiv78,5.02323274457706,II
180 | Indiv79,5.44894123457085,II
181 | Indiv80,5.43591657545378,II
182 | Indiv81,2.51784481532873,II
183 | Indiv82,3.10671706906678,II
184 | Indiv83,8.72487282279824,II
185 | Indiv84,1.27498342463781,II
186 | Indiv85,3.7876125919506,II
187 | Indiv86,5.12474566303836,II
188 | Indiv87,5.2718274186796,II
189 | Indiv88,7.85916592955344,II
190 | Indiv89,4.91608517979929,II
191 | Indiv90,5.01900455357883,II
192 | Indiv91,4.07100812407142,II
193 | Indiv92,6.6107211538395,II
194 | Indiv93,3.51727716642754,II
195 | Indiv94,4.1753329858646,II
196 | Indiv95,3.0353098664466,II
197 | Indiv96,3.32270339015034,II
198 | Indiv97,6.26606071672542,II
199 | Indiv98,7.52163666481343,II
200 | Indiv99,3.20389084994409,II
201 | Indiv100,5.63788046229403,II
202 | Indiv1,2.02029439261033,III
203 | Indiv2,1.3310022466908,III
204 | Indiv3,1.88109268308407,III
205 | Indiv4,2.0747606653117,III
206 | Indiv5,0.71136509236296,III
207 | Indiv6,5.69396750256412,III
208 | Indiv7,3.88933752587519,III
209 | Indiv8,5.40513402423043,III
210 | Indiv9,2.9082007446537,III
211 | Indiv10,5.01449395435981,III
212 | Indiv11,0.404922306483191,III
213 | Indiv12,5.16241985621497,III
214 | Indiv13,6.30167091059181,III
215 | Indiv14,3.02761398080251,III
216 | Indiv15,4.78362934229522,III
217 | Indiv16,2.62219124504208,III
218 | Indiv17,2.20120211280138,III
219 | Indiv18,7.81735976730744,III
220 | Indiv19,5.47309512628714,III
221 | Indiv20,0.326478522987693,III
222 | Indiv21,4.17417013688955,III
223 | Indiv22,4.6634139839633,III
224 | Indiv23,5.11293773267319,III
225 | Indiv24,3.83476050228082,III
226 | Indiv25,6.10600197974025,III
227 | Indiv26,4.88123830253315,III
228 | Indiv27,5.93398647240647,III
229 | Indiv28,6.79058601815249,III
230 | Indiv29,4.08974418429866,III
231 | Indiv30,6.36966316478642,III
232 | Indiv31,6.77187881311789,III
233 | Indiv32,4.2356086186249,III
234 | Indiv33,1.89829225573216,III
235 | Indiv34,8.16035874246949,III
236 | Indiv35,-0.56002492101901,III
237 | Indiv36,4.5986854641122,III
238 | Indiv37,6.90187522768069,III
239 | Indiv38,0.209604370609421,III
240 | Indiv39,4.77510809790155,III
241 | Indiv40,8.50368809488841,III
242 | Indiv41,3.08657316770405,III
243 | Indiv42,3.52485436085774,III
244 | Indiv43,5.98808046655028,III
245 | Indiv44,1.16351360622373,III
246 | Indiv45,7.69353448228304,III
247 | Indiv46,4.93097609274219,III
248 | Indiv47,3.15220320609129,III
249 | Indiv48,2.68712804101995,III
250 | Indiv49,7.71421329580518,III
251 | Indiv50,2.87368671700662,III
252 | Indiv51,3.94179831275021,III
253 | Indiv52,5.10124526695668,III
254 | Indiv53,2.92799161101486,III
255 | Indiv54,0.96951652051614,III
256 | Indiv55,6.14384764255529,III
257 | Indiv56,2.73228444251163,III
258 | Indiv57,4.41322465728952,III
259 | Indiv58,6.15032268108312,III
260 | Indiv59,0.877839915335282,III
261 | Indiv60,2.92317102138906,III
262 | Indiv61,1.94411888999608,III
263 | Indiv62,0.188646943059274,III
264 | Indiv63,3.99239953854629,III
265 | Indiv64,4.5826438337587,III
266 | Indiv65,2.52512157516696,III
267 | Indiv66,7.04756283086153,III
268 | Indiv67,3.35134198261861,III
269 | Indiv68,3.26565145307141,III
270 | Indiv69,4.13587004514256,III
271 | Indiv70,6.50598959703055,III
272 | Indiv71,1.21868886446736,III
273 | Indiv72,3.63896545676015,III
274 | Indiv73,4.93741312391645,III
275 | Indiv74,-1.87541241257004,III
276 | Indiv75,3.31873034768445,III
277 | Indiv76,4.52245145391965,III
278 | Indiv77,5.11374257847258,III
279 | Indiv78,5.48074339885391,III
280 | Indiv79,6.41669576635036,III
281 | Indiv80,2.89227943325531,III
282 | Indiv81,4.59381627240809,III
283 | Indiv82,5.33877112921947,III
284 | Indiv83,5.36755681458729,III
285 | Indiv84,2.69754684643688,III
286 | Indiv85,1.13013661082002,III
287 | Indiv86,5.16369038077365,III
288 | Indiv87,3.88016289688369,III
289 | Indiv88,7.60604371237198,III
290 | Indiv89,1.74340317670462,III
291 | Indiv90,7.43364608481622,III
292 | Indiv91,5.57022305368588,III
293 | Indiv92,4.62469210352858,III
294 | Indiv93,4.84210640577904,III
295 | Indiv94,3.54461123156264,III
296 | Indiv95,-0.0820107650207224,III
297 | Indiv96,5.4477657365215,III
298 | Indiv97,7.78709432577761,III
299 | Indiv98,5.84336889734014,III
300 | Indiv99,4.62500167547687,III
301 | Indiv100,4.9071448387676,III
302 |
--------------------------------------------------------------------------------
/man/alleleFreq-snpMatrix-method.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/snpData.R
3 | \docType{methods}
4 | \name{alleleFreq,snpMatrix-method}
5 | \alias{alleleFreq,snpMatrix-method}
6 | \title{Allele frequency}
7 | \usage{
8 | \S4method{alleleFreq}{snpMatrix}(x, maf = TRUE)
9 | }
10 | \arguments{
11 | \item{x}{an object of class \linkS4class{snpMatrix}.}
12 |
13 | \item{maf}{logical, if true minor allele frequency (default), other ways allele
14 | frequency of 'allele.1'.}
15 | }
16 | \description{
17 | Allele frequency an object of class \linkS4class{snpMatrix}.
18 | }
19 | \examples{
20 | # file containing example data for SNP data
21 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
22 | snp <- read.snpData(gfile, sep = ",")
23 | af <- alleleFreq(snp)
24 |
25 | }
26 |
--------------------------------------------------------------------------------
/man/alleleFreq.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/AllGeneric.R
3 | \name{alleleFreq}
4 | \alias{alleleFreq}
5 | \title{Allele Frequency.}
6 | \usage{
7 | alleleFreq(x, maf = TRUE)
8 | }
9 | \arguments{
10 | \item{x}{an object, for which a corresponding method exists.}
11 |
12 | \item{maf}{logical, if true minor allele frequency (default), other ways allele
13 | frequency of 'allele.1'.}
14 | }
15 | \description{
16 | Frequency of alleles in data set.
17 | }
18 |
--------------------------------------------------------------------------------
/man/as.matrix-snpMatrix-method.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/snpData.R
3 | \docType{methods}
4 | \name{as.matrix,snpMatrix-method}
5 | \alias{as.matrix,snpMatrix-method}
6 | \title{As matrix method for snpMatrix}
7 | \usage{
8 | \S4method{as.matrix}{snpMatrix}(x)
9 | }
10 | \arguments{
11 | \item{x}{an object of class \linkS4class{snpMatrix}.}
12 | }
13 | \description{
14 | As matrix method for an object of class \linkS4class{snpMatrix}.
15 | }
16 | \examples{
17 | # file containing example data for SNP data
18 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
19 | snp <- read.snpData(gfile, sep = ",")
20 | snpmat <- as.matrix(snp)
21 |
22 | }
23 |
--------------------------------------------------------------------------------
/man/as.snpMatrix.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/snpData.R
3 | \name{as.snpMatrix}
4 | \alias{as.snpMatrix}
5 | \title{A snpMatrix constructor}
6 | \usage{
7 | as.snpMatrix(x, chr, pos, alleleCoding = c(-1, 0, 1), allele.1 = NULL,
8 | allele.2 = NULL)
9 | }
10 | \arguments{
11 | \item{x}{a matrix with individuals in rows and SNPs in columns.}
12 |
13 | \item{chr}{a vector with chromosoms at which SNPs are located.}
14 |
15 | \item{pos}{a vector with genomic positions at which SNPs are located.}
16 |
17 | \item{alleleCoding}{a coding scheme of \code{x} for hom (AA), het (AB), and hom (BB).}
18 |
19 | \item{allele.1}{labels of allele one, for each SNP.}
20 |
21 | \item{allele.2}{labels of allele two, for each SNP.}
22 | }
23 | \description{
24 | Constructs a \linkS4class{snpMatrix} object from the given data.
25 | }
26 |
--------------------------------------------------------------------------------
/man/clarans.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/snpCluster.R
3 | \name{clarans}
4 | \alias{clarans}
5 | \title{K-medoids clustering of SNPs using randomized search}
6 | \usage{
7 | clarans(snp, k, maxNeigbours = 100, nLocal = 10, mc.cores = 1)
8 | }
9 | \arguments{
10 | \item{snp}{an object of class \linkS4class{snpMatrix}.}
11 |
12 | \item{k}{a positive integer specifying the number of clusters, has to be greater than
13 | one and less than the number of SNPs.}
14 |
15 | \item{maxNeigbours}{a positive integer specifying the maximum number of randomized
16 | searches.}
17 |
18 | \item{nLocal}{a positive integer specifying the number of optimisation runs.}
19 |
20 | \item{mc.cores}{a positive integer for the number of cores for parallel computing. See
21 | \code{\link[parallel]{mclapply}} for details.}
22 | }
23 | \description{
24 | Partitioning (clustering) into k clusters "around medoids" by randomized
25 | search. \code{1-abs(cor)} is used as distance between SNPs.
26 | }
27 | \details{
28 | The K-medoids clustering is implemented as clustering large applications based
29 | upon randomized search (CLARANS) algorithm (Ng and Han 2002). CLARANS is a modification
30 | of the partitioning around medoids (PAM) algorithm \code{\link[cluster]{pam}}. Where the
31 | PAM algorithm is estimating all distances between SNPs and the respective medoids,
32 | CLARANS is searching a random subset of the SNPs. This is independently repeated several
33 | times and the result which minimises the average distance the most is reported. This
34 | produces results close to those of the PAM algorithm (Ng and Han 2002), though the
35 | number of runs and the subset size have to be arbitrarily chosen by the user. The
36 | algorithm has two advantages: (i) the number of distance comparisons is dramatically
37 | reduced; and (ii) parallelizing is straightforward.
38 | }
39 | \examples{
40 | # file containing example data for SNP data
41 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
42 | snp <- read.snpData(gfile, sep = ",")
43 |
44 | clust <- clarans(snp, 3)
45 |
46 | }
47 | \references{
48 | Ng and J. Han (2002). CLARANS: A method for clustering objects for spatial
49 | data mining. \emph{IEEE Transactions on Knowledge and Data Engineering}.
50 | \url{http://dx.doi.org/10.1109/TKDE.2002.1033770}).
51 | }
52 |
--------------------------------------------------------------------------------
/man/cutClust.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/snpCluster.R
3 | \name{cutClust}
4 | \alias{cutClust}
5 | \title{Cut a qtcatClust object}
6 | \usage{
7 | cutClust(snp, snpClust, absCor = 1)
8 | }
9 | \arguments{
10 | \item{snp}{an object of class \linkS4class{snpMatrix}.}
11 |
12 | \item{snpClust}{an object of class \code{\link{qtcatClust}}.}
13 |
14 | \item{absCor}{a cutting height in absolute value of correlation.}
15 | }
16 | \description{
17 | Cut a qtcatClust object at an specific height.
18 | }
19 | \examples{
20 | # file containing example data for SNP data
21 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
22 | snp <- read.snpData(gfile, sep = ",")
23 | clust <- qtcatClust(snp)
24 |
25 | cclust <- cutClust(snp, clust, .5)
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/man/distCor.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/snpCluster.R
3 | \name{distCor}
4 | \alias{distCor}
5 | \title{Correlation based distance between SNPs}
6 | \usage{
7 | distCor(snp)
8 | }
9 | \arguments{
10 | \item{snp}{an object of class \linkS4class{snpMatrix}.}
11 | }
12 | \description{
13 | This function computes a distance matrix. The distance is estimated
14 | as one minus the absolute value of the correlation coefficient \code{1 - abs(cor)}.
15 | }
16 | \details{
17 | See \code{\link[stats]{dist}} for details about the output object.
18 | }
19 | \examples{
20 | # file containing example data for SNP data
21 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
22 | snp <- read.snpData(gfile, sep = ",")
23 |
24 | dist <- distCor(snp[, 1:10])
25 |
26 | }
27 | \seealso{
28 | \code{\link[stats]{dist}}
29 | }
30 |
--------------------------------------------------------------------------------
/man/hetFreq-snpMatrix-method.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/snpData.R
3 | \docType{methods}
4 | \name{hetFreq,snpMatrix-method}
5 | \alias{hetFreq,snpMatrix-method}
6 | \title{Heterozygosity}
7 | \usage{
8 | \S4method{hetFreq}{snpMatrix}(x, dim = 1)
9 | }
10 | \arguments{
11 | \item{x}{an object of class \linkS4class{snpMatrix}.}
12 |
13 | \item{dim}{a integer for dimension. 1 (default) is for rows (individuals), 2 is for
14 | columns (SNPs).}
15 | }
16 | \description{
17 | Heterozygosity an object of class \linkS4class{snpMatrix}.
18 | }
19 | \examples{
20 | # file containing example data for SNP data
21 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
22 | snp <- read.snpData(gfile, sep = ",")
23 | hf1 <- hetFreq(snp, 1)
24 | hf2 <- hetFreq(snp, 2)
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/man/hetFreq.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/AllGeneric.R
3 | \name{hetFreq}
4 | \alias{hetFreq}
5 | \title{Heterozygosity Frequency.}
6 | \usage{
7 | hetFreq(x, dim = 1L)
8 | }
9 | \arguments{
10 | \item{x}{an object, for which a corresponding method exists.}
11 |
12 | \item{dim}{dimension over which heterozygosity is estimated. 1 (default) is for rows
13 | (individuals), 2 is for columns (SNPs).}
14 | }
15 | \description{
16 | Frequency of heterozygosity in data set.
17 | }
18 |
--------------------------------------------------------------------------------
/man/identicals.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/snpCluster.R
3 | \name{identicals}
4 | \alias{identicals}
5 | \title{Perfect simiarity clusters of SNP}
6 | \usage{
7 | identicals(snp, mc.cores = 1)
8 | }
9 | \arguments{
10 | \item{snp}{an object of class \linkS4class{snpMatrix}.}
11 |
12 | \item{mc.cores}{a positive integer for the number of cores for parallel computing. See
13 | \code{\link[parallel]{mclapply}} for details.}
14 | }
15 | \description{
16 | Finds perfect similarity cluster of SNPs. This is specially usfull in
17 | artificial crossing populations.
18 | }
19 | \examples{
20 | # file containing example data for SNP data
21 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
22 | snp <- read.snpData(gfile, sep = ",")
23 |
24 | ident <- identicals(snp)
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/man/imputeMedoids.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/snpImpute.R
3 | \name{imputeMedoids}
4 | \alias{imputeMedoids}
5 | \title{Impute missing information at medoid SNPs}
6 | \usage{
7 | imputeMedoids(snp, snpClust, hier, min.absCor = 0.25, mc.cores = 1)
8 | }
9 | \arguments{
10 | \item{snp}{an object of class \linkS4class{snpMatrix}.}
11 |
12 | \item{snpClust}{an object of class \code{\link{qtcatClust}}.}
13 |
14 | \item{hier}{an object of class hierarchy.}
15 |
16 | \item{min.absCor}{a minimum value of correlation. If missing values still exist if this
17 | point in the hierarchy is reached, imputing is done via allele frequencies.}
18 |
19 | \item{mc.cores}{a number of cores for parallelising. Theoretical maximum is
20 | \code{'B'}. For details see \code{\link[parallel]{mclapply}}.}
21 | }
22 | \description{
23 | Uses neighboring SNPs in the clustering hierarchy to impute alleles to
24 | positions with missing values at medoid SNPs.
25 | }
26 | \keyword{internal}
27 |
--------------------------------------------------------------------------------
/man/imputeSnp.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/snpImpute.R
3 | \name{imputeSnp}
4 | \alias{imputeSnp}
5 | \title{Impute missing information at a medoid SNPs from a group of neighbors}
6 | \usage{
7 | imputeSnp(inxSnpOfInt, snp, hier, hierLeafs, clust, medoSnps, naSnps,
8 | flipAlleles, min.absCor)
9 | }
10 | \arguments{
11 | \item{inxSnpOfInt}{a vertor of the snp of interest.}
12 |
13 | \item{snp}{an object of class \linkS4class{snpMatrix}.}
14 |
15 | \item{hier}{an object of class hierarchy.}
16 |
17 | \item{hierLeafs}{a vector of leafs of the hierarchy.}
18 |
19 | \item{clust}{a named vector of clusters.}
20 |
21 | \item{medoSnps}{a vector of medo turue o false.}
22 |
23 | \item{naSnps}{a vector of NA indeces.}
24 |
25 | \item{flipAlleles}{a vertor of telling for each SNP if allele one has allele freq. > 0.5
26 | or not.}
27 |
28 | \item{min.absCor}{a minimum value of correlation. If missing values still exist if this
29 | point in the hierarchy is reached, imputing is done via allele frequencies.}
30 | }
31 | \description{
32 | Uses neighboring SNPs in the clustering hierarchy to impute as many as
33 | possible alleles to positions with missing values at medoid SNPs.
34 | }
35 | \keyword{internal}
36 |
--------------------------------------------------------------------------------
/man/imputeSnpIter.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/snpImpute.R
3 | \name{imputeSnpIter}
4 | \alias{imputeSnpIter}
5 | \title{Impute missing information at a medoid SNPs from a group of neighbors}
6 | \usage{
7 | imputeSnpIter(snp, snpOfInt, inxSnpsToComp, snpOfIntFlip, flipAlleles)
8 | }
9 | \arguments{
10 | \item{snp}{an object of class \linkS4class{snpMatrix}.}
11 |
12 | \item{snpOfInt}{a vertor of the snp of interest.}
13 |
14 | \item{inxSnpsToComp}{a index of neighbors.}
15 |
16 | \item{snpOfIntFlip}{flip status of the snp of interest.}
17 |
18 | \item{flipAlleles}{a vertor of telling for each SNP if allele one has allele freq. > 0.5
19 | or not.}
20 |
21 | \item{min.absCor}{a minimum value of correlation. If missing values still exist if this
22 | point in the hierarchy is reached, imputing is done via allele frequencies.}
23 | }
24 | \description{
25 | Uses neighboring SNPs in the clustering hierarchy to impute as many as
26 | possible alleles to positions with missing values at medoid SNPs.
27 | }
28 | \keyword{internal}
29 |
--------------------------------------------------------------------------------
/man/imputeSnpMatrix.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/snpImpute.R
3 | \name{imputeSnpMatrix}
4 | \alias{imputeSnpMatrix}
5 | \title{Impute allele in formation to SNPs with missing data}
6 | \usage{
7 | imputeSnpMatrix(snp, snpClust, min.absCor = 0.1, mc.cores = 1)
8 | }
9 | \arguments{
10 | \item{snp}{an object of class \linkS4class{snpMatrix}.}
11 |
12 | \item{snpClust}{an object of class \code{\link{qtcatClust}}.}
13 |
14 | \item{min.absCor}{a minimum value of correlation. If missing values still exist if this
15 | point in the hierarchy is reached, imputing is done via allele frequencies.}
16 |
17 | \item{mc.cores}{a number of cores for parallelising. Theoretical maximum is
18 | \code{'B'}. For details see \code{\link[parallel]{mclapply}}.}
19 | }
20 | \description{
21 | Uses neighbor SNPs from the clustering hierarchy to impute alleles to
22 | positions with missing values.
23 | }
24 | \examples{
25 | # file containing example data for SNP data
26 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
27 | snp1 <- read.snpData(gfile, sep = ",")
28 | # delete SNP information from Matrix, 33.33\% NAs (-> 66.67\% SNP info)
29 | snp2 <- snp1
30 | nainx <- sample(1:length(snp2@snpData), length(snp2@snpData) / 3)
31 | snp2@snpData[nainx] <- as.raw(0)
32 | # clustering
33 | snp2clust <- qtcatClust(snp2)
34 |
35 | # imputing
36 | snp3 <- imputeSnpMatrix(snp2, snp2clust)
37 | # comparison of the full and the imputed data set
38 | snpmat1 <- as.matrix(snp1)
39 | snpmat3 <- as.matrix(snp3)
40 | (1 - sum(abs(snpmat1- snpmat3)) / length(snpmat1)) * 100
41 |
42 | }
43 |
--------------------------------------------------------------------------------
/man/lmQtc.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/associationTest.R
3 | \name{lmQtc}
4 | \alias{lmQtc}
5 | \title{Fitting a Linear Model to QTCs}
6 | \usage{
7 | lmQtc(object, pheno, geno, alpha = 0.05, min.absCor = 0.05)
8 | }
9 | \arguments{
10 | \item{object}{an object of class \code{\link{qtcatHit}}.}
11 |
12 | \item{pheno}{an object of class \code{\link{qtcatPheno}}.}
13 |
14 | \item{geno}{an object of class \code{\link{qtcatGeno}}.}
15 |
16 | \item{alpha}{an alpha level for significance estimation.}
17 |
18 | \item{min.absCor}{minimum absolute value of correlation considered.}
19 | }
20 | \description{
21 | Linear model between phenotype and medoids of QTCs (significant SNP
22 | clusters).
23 | }
24 | \examples{
25 | # If you want to run the examples, use:
26 | # example(lmQtc, run.dontrun = TRUE)
27 | \dontrun{
28 | # files containing example data for SNP data and the phenotype
29 | pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
30 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
31 | pdat <- read.csv(pfile, header = TRUE)
32 | snp <- read.snpData(gfile, sep = ",")
33 | clust <- qtcatClust(snp)
34 | geno <- qtcatGeno(snp, clust)
35 | pheno <- qtcatPheno(names = pdat[, 1],
36 | pheno = pdat[, 2],
37 | covariates = model.matrix(~ pdat[, 3]))
38 | fitted <- qtcatHit(pheno, geno)
39 |
40 | # fitting a LM to the phenotype and QTC medoids
41 | lmfitted <- lmQtc(fitted, pheno, geno)
42 | }
43 |
44 | }
45 |
--------------------------------------------------------------------------------
/man/medoQtc.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/associationTest.R
3 | \name{medoQtc}
4 | \alias{medoQtc}
5 | \title{Find medoids of QTCs}
6 | \usage{
7 | medoQtc(object, geno, alpha = 0.05, min.absCor = 0.05)
8 | }
9 | \arguments{
10 | \item{object}{an object of class \code{\link{qtcatHit}}.}
11 |
12 | \item{geno}{an object of class \code{\link{qtcatGeno}}.}
13 |
14 | \item{alpha}{an alpha level for significance estimation.}
15 |
16 | \item{min.absCor}{minimum absolute value of correlation considered.}
17 | }
18 | \description{
19 | Find a medoid of for each quantitative trait cluster (QTC).
20 | }
21 | \examples{
22 | # If you want to run the examples, use:
23 | # example(medoQtc, run.dontrun = TRUE)
24 | \dontrun{
25 | # files containing example data for SNP data and the phenotype
26 | pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
27 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
28 | pdat <- read.csv(pfile, header = TRUE)
29 | snp <- read.snpData(gfile, sep = ",")
30 | clust <- qtcatClust(snp)
31 | geno <- qtcatGeno(snp, clust)
32 | pheno <- qtcatPheno(names = pdat[, 1],
33 | pheno = pdat[, 2],
34 | covariates = model.matrix(~ pdat[, 3]))
35 | fitted <- qtcatHit(pheno, geno)
36 |
37 | # QTC medoids
38 | medo <- medoQtc(fitted, geno)
39 | }
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/man/naFreq-snpMatrix-method.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/snpData.R
3 | \docType{methods}
4 | \name{naFreq,snpMatrix-method}
5 | \alias{naFreq,snpMatrix-method}
6 | \title{NA frequency}
7 | \usage{
8 | \S4method{naFreq}{snpMatrix}(x, dim = 1)
9 | }
10 | \arguments{
11 | \item{x}{an object of class \linkS4class{snpMatrix}.}
12 |
13 | \item{dim}{a integer for dimension. 1 (default) is for rows (individuals), 2 is for
14 | columns (SNPs).}
15 | }
16 | \description{
17 | NA frequency in an object of class \linkS4class{snpMatrix}.
18 | }
19 | \examples{
20 | # file containing example data for SNP data
21 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
22 | snp <- read.snpData(gfile, sep = ",")
23 | na1 <- naFreq(snp, 1)
24 | na2 <- naFreq(snp, 2)
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/man/naFreq.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/AllGeneric.R
3 | \name{naFreq}
4 | \alias{naFreq}
5 | \title{Missing Data Frequency.}
6 | \usage{
7 | naFreq(x, dim = 1L)
8 | }
9 | \arguments{
10 | \item{x}{an object, for which a corresponding method exists.}
11 |
12 | \item{dim}{dimension over which heterozygosity is estimated. 1 (default) is for rows
13 | (individuals), 2 is for columns (SNPs).}
14 | }
15 | \description{
16 | Frequency of missing Data in data set.
17 | }
18 |
--------------------------------------------------------------------------------
/man/plotQtc.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/associationTest.R
3 | \name{plotQtc}
4 | \alias{plotQtc}
5 | \title{Plot resulting QTCs of the Hierarchical Inference Test}
6 | \usage{
7 | plotQtc(x, alpha = 0.05, xlab = "Chromosomes",
8 | ylab = expression(-log[10](italic(p))), col.axis = NULL, ...)
9 | }
10 | \arguments{
11 | \item{x}{an object of class \code{\link{qtcatHit}}.}
12 |
13 | \item{alpha}{an alpha level for significance estimation.}
14 |
15 | \item{xlab}{a title for the x axis.}
16 |
17 | \item{ylab}{a title for the y axis.}
18 |
19 | \item{col.axis}{colors for axis line, tick marks, and title respectively.}
20 |
21 | \item{...}{other graphical parameters may also be passed as arguments to this function.}
22 | }
23 | \description{
24 | Plot the QTCs (significant cluster of SNPs) at their
25 | position at the genome.
26 | }
27 | \examples{
28 | # If you want to run the examples, use:
29 | # example(plotQtc, run.dontrun = TRUE)
30 | \dontrun{
31 | # files containing example data for SNP data and the phenotype
32 | pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
33 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
34 | pdat <- read.csv(pfile, header = TRUE)
35 | snp <- read.snpData(gfile, sep = ",")
36 | clust <- qtcatClust(snp)
37 | geno <- qtcatGeno(snp, clust)
38 | pheno <- qtcatPheno(names = pdat[, 1],
39 | pheno = pdat[, 2],
40 | covariates = model.matrix(~ pdat[, 3]))
41 | fitted <- qtcatHit(pheno, geno)
42 |
43 | # Plot the QTCs (loci37, loci260, and loci367 are causal)
44 | plotQtc(fitted)
45 | }
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/man/plotSelFreq.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/associationTest.R
3 | \name{plotSelFreq}
4 | \alias{plotSelFreq}
5 | \title{Plot markers selection frequencies of the Hierarchical Inference Test}
6 | \usage{
7 | plotSelFreq(x, xlab = "Chromosomes", ylab = "Sel. freq.", col.axis = NULL,
8 | ...)
9 | }
10 | \arguments{
11 | \item{x}{an object of class \code{\link{qtcatHit}}.}
12 |
13 | \item{xlab}{a title for the x axis.}
14 |
15 | \item{ylab}{a title for the y axis.}
16 |
17 | \item{col.axis}{colors for axis line, tick marks, and title respectively.}
18 |
19 | \item{...}{other graphical parameters may also be passed as arguments to this function.}
20 | }
21 | \description{
22 | Plot markers selection frequencies at their
23 | position at the genome.
24 | }
25 | \examples{
26 | # If you want to run the examples, use:
27 | # example(plotSelFreq, run.dontrun = TRUE)
28 | \dontrun{
29 | # files containing example data for SNP data and the phenotype
30 | pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
31 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
32 | pdat <- read.csv(pfile, header = TRUE)
33 | snp <- read.snpData(gfile, sep = ",")
34 | clust <- qtcatClust(snp)
35 | geno <- qtcatGeno(snp, clust)
36 | pheno <- qtcatPheno(names = pdat[, 1],
37 | pheno = pdat[, 2],
38 | covariates = model.matrix(~ pdat[, 3]))
39 | fitted <- qtcatHit(pheno, geno)
40 |
41 | # Plot the selection frequncy of markers (loci37, loci260, and loci367 are causal)
42 | plotSelFreq(fitted)
43 | }
44 |
45 | }
46 |
--------------------------------------------------------------------------------
/man/qtcat-package.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/qtcatPackage.R
3 | \docType{package}
4 | \name{qtcat-package}
5 | \alias{qtcat-package}
6 | \alias{qtcat}
7 | \title{Quantitative Trait Cluster Association Test}
8 | \description{
9 | All SNPs are jointly associated to the phenotype and at the same time
10 | correlation among them is considered. Thus, correction for population structure becomes
11 | unnecessary, which in many cases results in a power advantages compared to other methods.
12 | }
13 | \author{
14 | Jonas Klasen
15 | }
16 |
--------------------------------------------------------------------------------
/man/qtcatClust.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/snpCluster.R
3 | \name{qtcatClust}
4 | \alias{qtcatClust}
5 | \title{Hierarchical clustering for big SNP data sets.}
6 | \usage{
7 | qtcatClust(snp, k, identicals = TRUE, maxNeigbours = 100, nLocal = 10,
8 | method = "complete", mc.cores = 1, trace = FALSE, ...)
9 | }
10 | \arguments{
11 | \item{snp}{an object of class \linkS4class{snpMatrix}.}
12 |
13 | \item{k}{a positive integer specifying the number of clusters, less than the number of
14 | observations.}
15 |
16 | \item{identicals}{logical, if zero clustering.}
17 |
18 | \item{maxNeigbours}{a positive integer, specifying the maximum number of randomized
19 | searches.}
20 |
21 | \item{nLocal}{a positive integer, specifying the number of optimisation runs. Columns
22 | have to be similar to \code{snp}.}
23 |
24 | \item{method}{see hclust.}
25 |
26 | \item{mc.cores}{a number of cores for parallel computing. See \code{mclapply} in package
27 | parallel for details.}
28 |
29 | \item{trace}{logical, if \code{TRUE} it prints current status of the program.}
30 |
31 | \item{...}{additional argruments for \code{\link[fastcluster]{hclust}}}
32 | }
33 | \description{
34 | A three step approximated hierarchical clustering of SNPs suitable to
35 | large data sets.
36 | }
37 | \examples{
38 | # file containing example data for SNP data
39 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
40 | snp <- read.snpData(gfile, sep = ",")
41 |
42 | clust <- qtcatClust(snp)
43 |
44 | }
45 | \seealso{
46 | clarans
47 | }
48 |
--------------------------------------------------------------------------------
/man/qtcatGeno.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/associationTest.R
3 | \name{qtcatGeno}
4 | \alias{qtcatGeno}
5 | \title{A genotype object constructor}
6 | \usage{
7 | qtcatGeno(snp, snpClust, absCor, min.absCor = 0.5, mc.cores = 1)
8 | }
9 | \arguments{
10 | \item{snp}{an object of S4 class \linkS4class{snpMatrix}.}
11 |
12 | \item{snpClust}{an object of class \code{\link{qtcatClust}}.}
13 |
14 | \item{absCor}{a vector of absolute value of correlations considered in the hierarchy.}
15 |
16 | \item{min.absCor}{a minimum absolute value of correlation which is considered. A value
17 | in the range from 0 to 1.}
18 |
19 | \item{mc.cores}{a number of cores for parallelising. The maximum is \code{'B'}. For
20 | details see \code{\link[parallel]{mclapply}}.}
21 | }
22 | \description{
23 | Constructs a S3-object containing a SNP design matrix and a hierarchy. If
24 | a SNP in the input object contains missing data, the clustering is used to impute
25 | information from highly correlated neighbor SNPs. The genotype object is needed for
26 | \code{\link{qtcatHit}} as input.
27 | }
28 | \examples{
29 | # file containing example data for SNP data
30 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
31 | snp <- read.snpData(gfile, sep = ",")
32 | clust <- qtcatClust(snp)
33 |
34 | # Construct geotype object
35 | geno <- qtcatGeno(snp, clust)
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/man/qtcatHit.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/associationTest.R
3 | \name{qtcatHit}
4 | \alias{qtcatHit}
5 | \title{Fitting Hierarchical Inference Testing}
6 | \usage{
7 | qtcatHit(pheno, geno, B = 50, p.samp1 = 0.35, nfolds = 5,
8 | overall.lambda = FALSE, lambda.opt = "lambda.1se", alpha = 1,
9 | gamma = seq(0.05, 0.99, by = 0.01), max.p.esti = 1, seed = 12321,
10 | mc.cores = 1, trace = FALSE, ...)
11 | }
12 | \arguments{
13 | \item{pheno}{an object of class \code{\link{qtcatPheno}}.}
14 |
15 | \item{geno}{an object of class \code{\link{qtcatGeno}}.}
16 |
17 | \item{B}{a integer indicating the number of sample-splits.}
18 |
19 | \item{p.samp1}{a value specifying the fraction of data used for the LASSO sample-split.
20 | The ANOVA sample-split is \code{1 - p.samp1}.}
21 |
22 | \item{nfolds}{Number of folds (default is 5). See \code{\link[glmnet]{cv.glmnet}} for
23 | more details.}
24 |
25 | \item{overall.lambda}{Logical, if true, lambda is estimated once, if false (default),
26 | lambda is estimated for each sample split.}
27 |
28 | \item{lambda.opt}{a criterion for optimum selection of cross validated lasso. Either
29 | "lambda.1se" (default) or "lambda.min". See
30 | \code{\link[glmnet]{cv.glmnet}} for more details.}
31 |
32 | \item{alpha}{a single value in the range of 0 to 1 for the elastic net mixing parameter.}
33 |
34 | \item{gamma}{a vector of gamma-values used in significance estimation.}
35 |
36 | \item{max.p.esti}{a maximum for computed p-values. All p-values above this value are set
37 | to one. Small \code{max.p.esti} values reduce computing time.}
38 |
39 | \item{seed}{a RNG seed, see \code{\link{set.seed}}.}
40 |
41 | \item{mc.cores}{a number of cores for parallelising. The maximum is
42 | \code{'B'}. For details see \code{\link[parallel]{mclapply}}.}
43 |
44 | \item{trace}{logical, if \code{TRUE} it prints the current status of the program.}
45 |
46 | \item{...}{additional arguments for \code{\link[glmnet]{cv.glmnet}}.}
47 | }
48 | \description{
49 | Hierarchical inference testing for phenotype-SNP association.
50 | }
51 | \examples{
52 | # If you want to run the examples, use:
53 | # example(qtcatHit, run.dontrun = TRUE)
54 | \dontrun{
55 | # files containing example data for SNP data and the phenotype
56 | pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
57 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
58 | pdat <- read.csv(pfile, header = TRUE)
59 | snp <- read.snpData(gfile, sep = ",")
60 | clust <- qtcatClust(snp)
61 | geno <- qtcatGeno(snp, clust)
62 | pheno <- qtcatPheno(names = pdat[, 1],
63 | pheno = pdat[, 2],
64 | covariates = model.matrix(~ pdat[, 3]))
65 |
66 | # fitting HIT
67 | fitted <- qtcatHit(pheno, geno)
68 | }
69 |
70 | }
71 |
--------------------------------------------------------------------------------
/man/qtcatPheno.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/associationTest.R
3 | \name{qtcatPheno}
4 | \alias{qtcatPheno}
5 | \title{A phenotype object constructor}
6 | \usage{
7 | qtcatPheno(names, pheno, family = "gaussian", covariates = NULL)
8 | }
9 | \arguments{
10 | \item{names}{a vector of individual names of length 'n'.}
11 |
12 | \item{pheno}{a vector of length 'n' or a matrix size 'n x 2' in case of binomial family.
13 | This contains the phenotypic observations.}
14 |
15 | \item{family}{a character string specifying the family of the phenotype distribution.
16 | Either "gaussian" (default) or "binomial".}
17 |
18 | \item{covariates}{a matrix typically generated by a call of
19 | \code{\link[stats]{model.matrix}}. It contain additional variables influencing the
20 | phenotype e.g. environmental and experimental covariates.}
21 | }
22 | \description{
23 | Constructs an S3-object containing phenotype and if additional covariats
24 | exist a design matrix of those. The phenotype object is needed as input for
25 | \code{\link{qtcatHit}}.
26 | }
27 | \examples{
28 | # file containing example data for a phenotype.
29 | pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
30 | pdat <- read.csv(pfile, header = TRUE)
31 |
32 | # Construct phenotype object
33 | pheno <- qtcatPheno(names = pdat[, 1],
34 | pheno = pdat[, 2],
35 | covariates = model.matrix(~ pdat[, 3]))
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/man/qtcatQtc.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/associationTest.R
3 | \name{qtcatQtc}
4 | \alias{qtcatQtc}
5 | \title{Summarize results of Hierarchical Inference Test}
6 | \usage{
7 | qtcatQtc(object, alpha = 0.05, min.absCor = 0.05)
8 | }
9 | \arguments{
10 | \item{object}{an object of class \code{\link{qtcatHit}}.}
11 |
12 | \item{alpha}{an alpha level for significance estimation.}
13 |
14 | \item{min.absCor}{a minimum absolute value of correlation to be considered.}
15 | }
16 | \description{
17 | Summarizing the QTCs (significant cluster of SNPs) and their position at
18 | the genome.
19 | }
20 | \examples{
21 | # If you want to run the examples, use:
22 | # example(qtcatQtc, run.dontrun = TRUE)
23 | \dontrun{
24 | # files containing example data for SNP data and the phenotype
25 | pfile <- system.file("extdata/phenodata.csv", package = "qtcat")
26 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
27 | pdat <- read.csv(pfile, header = TRUE)
28 | snp <- read.snpData(gfile, sep = ",")
29 | clust <- qtcatClust(snp)
30 | geno <- qtcatGeno(snp, clust)
31 | pheno <- qtcatPheno(names = pdat[, 1],
32 | pheno = pdat[, 2],
33 | covariates = model.matrix(~ pdat[, 3]))
34 | fitted <- qtcatHit(pheno, geno)
35 |
36 | # Summarizing the QTCs (loci37, loci260, and loci367 are causal)
37 | qtcatQtc(fitted)
38 | }
39 |
40 | }
41 |
--------------------------------------------------------------------------------
/man/read.snpData.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/snpData.R
3 | \name{read.snpData}
4 | \alias{read.snpData}
5 | \title{Read SNP tables as a snpMatrix object}
6 | \usage{
7 | read.snpData(file, sep = " ", quote = "\\"", na.string = "NA",
8 | nrows = -1L)
9 | }
10 | \arguments{
11 | \item{file}{the name of the file which the data are to be read from. If it does not
12 | contain an absolute path, the file name is relative to the current working directory,
13 | \code{getwd()}. Tilde-expansion is performed where supported.}
14 |
15 | \item{sep}{a field separator character. Values on each line of the file are separated
16 | by this character.}
17 |
18 | \item{quote}{the set of quoting characters. To disable quoting altogether, use
19 | \code{quote = ""}.}
20 |
21 | \item{na.string}{a string which is interpreted as NA value.}
22 |
23 | \item{nrows}{a integer, the maximum number of rows to read.}
24 | }
25 | \description{
26 | Reads a file in table format and as a \linkS4class{snpMatrix} object.
27 | }
28 | \examples{
29 | # file containing example data for SNP data
30 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
31 | snp <- read.snpData(gfile, sep = ",")
32 |
33 | }
34 |
--------------------------------------------------------------------------------
/man/rename.leafs.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/snpCluster.R
3 | \name{rename.leafs}
4 | \alias{rename.leafs}
5 | \title{Rename dendrogram leafs}
6 | \usage{
7 | rename.leafs(dend, labels)
8 | }
9 | \arguments{
10 | \item{dend}{a dendrogram.}
11 |
12 | \item{labels}{a vector of new names.}
13 | }
14 | \description{
15 | Rename dendrogram leafs.
16 | }
17 | \keyword{internal}
18 |
--------------------------------------------------------------------------------
/man/snpInfo-qtcatHit-method.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/associationTest.R
3 | \docType{methods}
4 | \name{snpInfo,qtcatHit-method}
5 | \alias{snpInfo,qtcatHit-method}
6 | \title{Get position from qtcatHit object}
7 | \usage{
8 | \S4method{snpInfo}{qtcatHit}(object)
9 | }
10 | \arguments{
11 | \item{object}{an object of class \code{\link{qtcatHit}}.}
12 | }
13 | \description{
14 | Genetic position info from an object of class qtcatHit.
15 | }
16 |
--------------------------------------------------------------------------------
/man/snpInfo-snpMatrix-method.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/snpData.R
3 | \docType{methods}
4 | \name{snpInfo,snpMatrix-method}
5 | \alias{snpInfo,snpMatrix-method}
6 | \title{Get position from snpMatrix}
7 | \usage{
8 | \S4method{snpInfo}{snpMatrix}(object)
9 | }
10 | \arguments{
11 | \item{object}{an object of class \linkS4class{snpMatrix}.}
12 | }
13 | \description{
14 | Genetic position info from an object of class
15 | \linkS4class{snpMatrix}.
16 | }
17 | \examples{
18 | # file containing example data for SNP data
19 | gfile <- system.file("extdata/snpdata.csv", package = "qtcat")
20 | snp <- read.snpData(gfile, sep = ",")
21 | info <- snpInfo(snp)
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/man/snpInfo.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/AllGeneric.R
3 | \name{snpInfo}
4 | \alias{snpInfo}
5 | \title{Extract genomic position and allele information.}
6 | \usage{
7 | snpInfo(object)
8 | }
9 | \arguments{
10 | \item{object}{an object, for which a corresponding method exists.}
11 | }
12 | \description{
13 | Extract genomic position and allele information from object.
14 | }
15 |
--------------------------------------------------------------------------------
/man/snpMatrix-class.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/AllClass.R
3 | \docType{class}
4 | \name{snpMatrix-class}
5 | \alias{snpMatrix-class}
6 | \title{A S4 class to represent a SNP-matrix}
7 | \description{
8 | A S4 class to represent a SNP matrix. Storing SNP information, by using a
9 | byte-level (raw) storage scheme, jointly with genomic position and allele information.
10 | }
11 | \section{Slots}{
12 |
13 | \describe{
14 | \item{\code{snpData}}{a matrix of SNPs stored in type 'raw'. 00 is NA, 01 homozygote AA, 02
15 | heterozygote AB, and 03 homozygote BB.}
16 |
17 | \item{\code{snpInfo}}{data.frame with four columns. The first col. contains the chromosomes,
18 | the second col. the positions, the third col. the first allele and the fourth the second
19 | allele.}
20 |
21 | \item{\code{dim}}{an integer vector with exactly two non-negative values.}
22 |
23 | \item{\code{dimnames}}{a list of length two; each component containing NULL or a character vector
24 | length equal the corresponding dim element.}
25 | }}
26 |
27 |
--------------------------------------------------------------------------------
/man/sub-snpMatrix-ANY-ANY-missing-method.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/snpData.R
3 | \docType{methods}
4 | \name{[,snpMatrix,ANY,ANY,missing-method}
5 | \alias{[,snpMatrix,ANY,ANY,missing-method}
6 | \title{Subsetting snpMatrix}
7 | \usage{
8 | \S4method{[}{snpMatrix,ANY,ANY,missing}(x, i, j, ..., drop = TRUE)
9 | }
10 | \arguments{
11 | \item{x}{an object of class \linkS4class{snpMatrix}.}
12 |
13 | \item{i}{a indices specifying elements to extract or replace. Indices are booleans,
14 | numeric or character vectors.}
15 |
16 | \item{j}{indices specifying elements to extract or replace. Indices are booleans,
17 | numeric or character vectors.}
18 |
19 | \item{...}{not implemented.}
20 |
21 | \item{drop}{not implemented.}
22 | }
23 | \description{
24 | Subsetting an object of class \linkS4class{snpMatrix}.
25 | }
26 |
--------------------------------------------------------------------------------
/src/RcppExports.cpp:
--------------------------------------------------------------------------------
1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3 |
4 | #include
5 |
6 | using namespace Rcpp;
7 |
8 | // read_snpData
9 | Rcpp::List read_snpData(Rcpp::CharacterVector file, char sep, char quote, bool rowNames, Rcpp::CharacterVector na_str, int nrows);
10 | RcppExport SEXP _qtcat_read_snpData(SEXP fileSEXP, SEXP sepSEXP, SEXP quoteSEXP, SEXP rowNamesSEXP, SEXP na_strSEXP, SEXP nrowsSEXP) {
11 | BEGIN_RCPP
12 | Rcpp::RObject rcpp_result_gen;
13 | Rcpp::RNGScope rcpp_rngScope_gen;
14 | Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type file(fileSEXP);
15 | Rcpp::traits::input_parameter< char >::type sep(sepSEXP);
16 | Rcpp::traits::input_parameter< char >::type quote(quoteSEXP);
17 | Rcpp::traits::input_parameter< bool >::type rowNames(rowNamesSEXP);
18 | Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type na_str(na_strSEXP);
19 | Rcpp::traits::input_parameter< int >::type nrows(nrowsSEXP);
20 | rcpp_result_gen = Rcpp::wrap(read_snpData(file, sep, quote, rowNames, na_str, nrows));
21 | return rcpp_result_gen;
22 | END_RCPP
23 | }
24 | // corDist
25 | double corDist(RawVector x, RawVector y);
26 | RcppExport SEXP _qtcat_corDist(SEXP xSEXP, SEXP ySEXP) {
27 | BEGIN_RCPP
28 | Rcpp::RObject rcpp_result_gen;
29 | Rcpp::RNGScope rcpp_rngScope_gen;
30 | Rcpp::traits::input_parameter< RawVector >::type x(xSEXP);
31 | Rcpp::traits::input_parameter< RawVector >::type y(ySEXP);
32 | rcpp_result_gen = Rcpp::wrap(corDist(x, y));
33 | return rcpp_result_gen;
34 | END_RCPP
35 | }
36 | // corDists
37 | NumericVector corDists(RawMatrix x);
38 | RcppExport SEXP _qtcat_corDists(SEXP xSEXP) {
39 | BEGIN_RCPP
40 | Rcpp::RObject rcpp_result_gen;
41 | Rcpp::RNGScope rcpp_rngScope_gen;
42 | Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP);
43 | rcpp_result_gen = Rcpp::wrap(corDists(x));
44 | return rcpp_result_gen;
45 | END_RCPP
46 | }
47 | // corPreIdenticals
48 | List corPreIdenticals(RawMatrix x, const int step);
49 | RcppExport SEXP _qtcat_corPreIdenticals(SEXP xSEXP, SEXP stepSEXP) {
50 | BEGIN_RCPP
51 | Rcpp::RObject rcpp_result_gen;
52 | Rcpp::RNGScope rcpp_rngScope_gen;
53 | Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP);
54 | Rcpp::traits::input_parameter< const int >::type step(stepSEXP);
55 | rcpp_result_gen = Rcpp::wrap(corPreIdenticals(x, step));
56 | return rcpp_result_gen;
57 | END_RCPP
58 | }
59 | // corIdenticals
60 | List corIdenticals(RawMatrix x, IntegerVector clustIdx);
61 | RcppExport SEXP _qtcat_corIdenticals(SEXP xSEXP, SEXP clustIdxSEXP) {
62 | BEGIN_RCPP
63 | Rcpp::RObject rcpp_result_gen;
64 | Rcpp::RNGScope rcpp_rngScope_gen;
65 | Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP);
66 | Rcpp::traits::input_parameter< IntegerVector >::type clustIdx(clustIdxSEXP);
67 | rcpp_result_gen = Rcpp::wrap(corIdenticals(x, clustIdx));
68 | return rcpp_result_gen;
69 | END_RCPP
70 | }
71 | // joinCorIdenticals
72 | List joinCorIdenticals(int n, List preclust, List ClustMedo);
73 | RcppExport SEXP _qtcat_joinCorIdenticals(SEXP nSEXP, SEXP preclustSEXP, SEXP ClustMedoSEXP) {
74 | BEGIN_RCPP
75 | Rcpp::RObject rcpp_result_gen;
76 | Rcpp::RNGScope rcpp_rngScope_gen;
77 | Rcpp::traits::input_parameter< int >::type n(nSEXP);
78 | Rcpp::traits::input_parameter< List >::type preclust(preclustSEXP);
79 | Rcpp::traits::input_parameter< List >::type ClustMedo(ClustMedoSEXP);
80 | rcpp_result_gen = Rcpp::wrap(joinCorIdenticals(n, preclust, ClustMedo));
81 | return rcpp_result_gen;
82 | END_RCPP
83 | }
84 | // corClarans
85 | List corClarans(RawMatrix x, const int k, const int maxNeigbours);
86 | RcppExport SEXP _qtcat_corClarans(SEXP xSEXP, SEXP kSEXP, SEXP maxNeigboursSEXP) {
87 | BEGIN_RCPP
88 | Rcpp::RObject rcpp_result_gen;
89 | Rcpp::RNGScope rcpp_rngScope_gen;
90 | Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP);
91 | Rcpp::traits::input_parameter< const int >::type k(kSEXP);
92 | Rcpp::traits::input_parameter< const int >::type maxNeigbours(maxNeigboursSEXP);
93 | rcpp_result_gen = Rcpp::wrap(corClarans(x, k, maxNeigbours));
94 | return rcpp_result_gen;
95 | END_RCPP
96 | }
97 | // corMedoids
98 | IntegerVector corMedoids(RawMatrix x, IntegerVector clusters);
99 | RcppExport SEXP _qtcat_corMedoids(SEXP xSEXP, SEXP clustersSEXP) {
100 | BEGIN_RCPP
101 | Rcpp::RObject rcpp_result_gen;
102 | Rcpp::RNGScope rcpp_rngScope_gen;
103 | Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP);
104 | Rcpp::traits::input_parameter< IntegerVector >::type clusters(clustersSEXP);
105 | rcpp_result_gen = Rcpp::wrap(corMedoids(x, clusters));
106 | return rcpp_result_gen;
107 | END_RCPP
108 | }
109 | // design
110 | NumericMatrix design(RawMatrix x);
111 | RcppExport SEXP _qtcat_design(SEXP xSEXP) {
112 | BEGIN_RCPP
113 | Rcpp::RObject rcpp_result_gen;
114 | Rcpp::RNGScope rcpp_rngScope_gen;
115 | Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP);
116 | rcpp_result_gen = Rcpp::wrap(design(x));
117 | return rcpp_result_gen;
118 | END_RCPP
119 | }
120 | // afreq
121 | NumericVector afreq(RawMatrix x, bool maf);
122 | RcppExport SEXP _qtcat_afreq(SEXP xSEXP, SEXP mafSEXP) {
123 | BEGIN_RCPP
124 | Rcpp::RObject rcpp_result_gen;
125 | Rcpp::RNGScope rcpp_rngScope_gen;
126 | Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP);
127 | Rcpp::traits::input_parameter< bool >::type maf(mafSEXP);
128 | rcpp_result_gen = Rcpp::wrap(afreq(x, maf));
129 | return rcpp_result_gen;
130 | END_RCPP
131 | }
132 | // hetfreq
133 | NumericVector hetfreq(RawMatrix x, int dim);
134 | RcppExport SEXP _qtcat_hetfreq(SEXP xSEXP, SEXP dimSEXP) {
135 | BEGIN_RCPP
136 | Rcpp::RObject rcpp_result_gen;
137 | Rcpp::RNGScope rcpp_rngScope_gen;
138 | Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP);
139 | Rcpp::traits::input_parameter< int >::type dim(dimSEXP);
140 | rcpp_result_gen = Rcpp::wrap(hetfreq(x, dim));
141 | return rcpp_result_gen;
142 | END_RCPP
143 | }
144 | // nafreq
145 | NumericVector nafreq(RawMatrix x, int dim);
146 | RcppExport SEXP _qtcat_nafreq(SEXP xSEXP, SEXP dimSEXP) {
147 | BEGIN_RCPP
148 | Rcpp::RObject rcpp_result_gen;
149 | Rcpp::RNGScope rcpp_rngScope_gen;
150 | Rcpp::traits::input_parameter< RawMatrix >::type x(xSEXP);
151 | Rcpp::traits::input_parameter< int >::type dim(dimSEXP);
152 | rcpp_result_gen = Rcpp::wrap(nafreq(x, dim));
153 | return rcpp_result_gen;
154 | END_RCPP
155 | }
156 |
157 | static const R_CallMethodDef CallEntries[] = {
158 | {"_qtcat_read_snpData", (DL_FUNC) &_qtcat_read_snpData, 6},
159 | {"_qtcat_corDist", (DL_FUNC) &_qtcat_corDist, 2},
160 | {"_qtcat_corDists", (DL_FUNC) &_qtcat_corDists, 1},
161 | {"_qtcat_corPreIdenticals", (DL_FUNC) &_qtcat_corPreIdenticals, 2},
162 | {"_qtcat_corIdenticals", (DL_FUNC) &_qtcat_corIdenticals, 2},
163 | {"_qtcat_joinCorIdenticals", (DL_FUNC) &_qtcat_joinCorIdenticals, 3},
164 | {"_qtcat_corClarans", (DL_FUNC) &_qtcat_corClarans, 3},
165 | {"_qtcat_corMedoids", (DL_FUNC) &_qtcat_corMedoids, 2},
166 | {"_qtcat_design", (DL_FUNC) &_qtcat_design, 1},
167 | {"_qtcat_afreq", (DL_FUNC) &_qtcat_afreq, 2},
168 | {"_qtcat_hetfreq", (DL_FUNC) &_qtcat_hetfreq, 2},
169 | {"_qtcat_nafreq", (DL_FUNC) &_qtcat_nafreq, 2},
170 | {NULL, NULL, 0}
171 | };
172 |
173 | RcppExport void R_init_qtcat(DllInfo *dll) {
174 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
175 | R_useDynamicSymbols(dll, FALSE);
176 | }
177 |
--------------------------------------------------------------------------------
/src/ReadData.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 |
6 | using namespace std;
7 |
8 | void split(const string &s, char delim, vector &elems);
9 |
10 | // [[Rcpp::export]]
11 | Rcpp::List read_snpData(Rcpp::CharacterVector file, char sep, char quote,
12 | bool rowNames, Rcpp::CharacterVector na_str,
13 | int nrows) {
14 | string oneLine;
15 | vector lineElements;
16 | const string fname = Rcpp::as(file);
17 | const string naStr = Rcpp::as(na_str);
18 | // Input file stream
19 | ifstream fileIn(fname.c_str());
20 | // Column names (Individual names)
21 | getline(fileIn, oneLine);
22 | // Delete qoutes if exist
23 | if (quote != ' ') {
24 | oneLine.erase(remove(oneLine.begin(), oneLine.end(), quote), oneLine.end());
25 | }
26 | // check if sep is part of first line
27 | if (oneLine.find(sep) == string::npos) {
28 | Rcpp::stop("In the first line the separator character 'sep' doesn't exist");
29 | }
30 | // split string in to vector of strings
31 | split(oneLine, sep, lineElements);
32 | unsigned int row = lineElements.size();
33 | // Individual names
34 | Rcpp::CharacterVector indivNames = Rcpp::wrap(lineElements);
35 | //
36 | int nosnpcount = 0;
37 | vector lociNames;
38 | vector chr;
39 | vector pos;
40 | set allelesSet;
41 | vector allelepos;
42 | string AA;
43 | string AB;
44 | string BA;
45 | string BB;
46 | vector snpData;
47 | int col = 0;
48 | int posStart = 0;
49 | int dataStart = 2;
50 | if (rowNames) {
51 | dataStart ++;
52 | posStart ++;
53 | }
54 | int line = 0;
55 | while(getline(fileIn, oneLine)) {
56 | line ++;
57 | lineElements.clear();
58 | allelesSet.clear();
59 | // Delete qoutes if exist
60 | if (quote != ' ') {
61 | oneLine.erase(remove(oneLine.begin(), oneLine.end(), quote), oneLine.end());
62 | }
63 | // split string in to vector of strings
64 | split(oneLine, sep, lineElements);
65 | // length of line
66 | if (row != lineElements.size()){
67 | Rcpp::stop("Error: Length of line ", line, " is ",
68 | lineElements.size(), " instead of ", row);
69 | }
70 | // detect Nucleotides in this line (SNP)
71 | for (unsigned int i = dataStart; i < lineElements.size(); ++ i) {
72 | if (lineElements[i] != naStr) {
73 | allelesSet.insert(lineElements[i].begin(), lineElements[i].end());
74 | }
75 | }
76 | vector allele(allelesSet.begin(), allelesSet.end());
77 | // if more not two alleles skip
78 | if (allele.size() != 2) {
79 | nosnpcount ++;
80 | continue;
81 | }
82 | allelepos.push_back(allele[0]);
83 | allelepos.push_back(allele[1]);
84 | // if two alleles make new coding
85 | AA = allele[0]; AA += allele[0];
86 | AB = allele[0]; AB += allele[1];
87 | BA = allele[1]; BA += allele[0];
88 | BB = allele[1]; BB += allele[1];
89 | // raw coding of line (SNP)
90 | for (unsigned int i = dataStart; i < lineElements.size(); i ++) {
91 | if (lineElements[i] == AA) {
92 | snpData.push_back(0x01);
93 | } else if (lineElements[i] == BB) {
94 | snpData.push_back(0x03);
95 | } else if ((lineElements[i] == AB) |
96 | (lineElements[i] == BA)) {
97 | snpData.push_back(0x02);
98 | } else {
99 | snpData.push_back(0x00);
100 | }
101 | }
102 | // SNP names
103 | if (rowNames) {
104 | lociNames.push_back(lineElements[0]);
105 | }
106 | // genetic positions
107 | chr.push_back(lineElements[posStart]);
108 | pos.push_back(atoi(lineElements[posStart + 1].c_str()));
109 | col ++;
110 | if ((nrows > 0) && (nrows <= line)) {
111 | break;
112 | }
113 | }
114 | if (snpData.size() == 0)
115 | Rcpp::stop("No valid SNP found");
116 | if (nosnpcount > 0)
117 | Rcpp::Rcerr << "note: " << nosnpcount << " of " << line <<
118 | " lines have more or less than two alleles and are therefore not considered"
119 | << endl;
120 | // Rcpp conversioan and vector as matrix
121 | // alleles
122 | Rcpp::CharacterVector alleles = Rcpp::wrap(allelepos);
123 | alleles.attr("dim") = Rcpp::Dimension(2, col);
124 | // snpData
125 | Rcpp::RawVector snpOutData(snpData.size());
126 | copy(snpData.begin(), snpData.end(), snpOutData.begin());
127 | row = snpOutData.size() / col;
128 | snpOutData.attr("dim") = Rcpp::Dimension(row, col);
129 | // individual names
130 | indivNames.erase(indivNames.begin(), indivNames.begin() + dataStart);
131 | // Results as List
132 | Rcpp::List out = Rcpp::List::create(Rcpp::Named("snpData", snpOutData),
133 | Rcpp::Named("alleles", alleles),
134 | Rcpp::Named("chr", chr),
135 | Rcpp::Named("pos", pos),
136 | Rcpp::Named("lociNames", lociNames),
137 | Rcpp::Named("indivNames", indivNames));
138 | return out;
139 | }
140 |
141 | // split string in to vector of strings
142 | void split(const string &s, char delim, vector &elems) {
143 | stringstream ss(s);
144 | string item;
145 | while (getline(ss, item, delim)) {
146 | elems.push_back(item);
147 | }
148 | }
149 |
--------------------------------------------------------------------------------
/src/snpCluster.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | using namespace Rcpp;
4 |
5 | // correlation
6 | double cor(RawVector x, RawVector y) {
7 | int n = x.size();
8 | int n_na = 0;
9 | double ex = 0, ey = 0, xt = 0, yt = 0, sxx = 0, syy = 0, sxy = 0;
10 | // setup variables and find the mean
11 | for (int i = 0; i < n; i ++) {
12 | if ((x[i] == 0x00) | (y[i] == 0x00)) {
13 | n_na ++;
14 | continue;
15 | }
16 | ex += x[i];
17 | ey += y[i];
18 | }
19 | if ((n - n_na) < 1)
20 | stop("estimation of correlation not possible, no complete element pairs");
21 | ex /= (n - n_na);
22 | ey /= (n - n_na);
23 | // correlation coefficent
24 | for (int i = 0; i < n; i ++) {
25 | if ((x[i] == 0x00) | (y[i] == 0x00))
26 | continue;
27 | xt = x[i] - ex;
28 | yt = y[i] - ey;
29 | sxx += xt * xt;
30 | syy += yt * yt;
31 | sxy += xt * yt;
32 | }
33 | double cor = sxy / (sqrt(sxx * syy) + 1e-16);
34 | return cor;
35 | } // cor
36 |
37 | // 1 x 1 correlation distance
38 | // [[Rcpp::export]]
39 | double corDist(RawVector x, RawVector y) {
40 | double dcor = 1 - std::fabs(cor(x, y));
41 | return dcor;
42 | }
43 |
44 | // distance from all marker
45 | // [[Rcpp::export]]
46 | NumericVector corDists(RawMatrix x) {
47 | int n = x.ncol();
48 | NumericVector dist(n * (n -1) / 2);
49 | uint64_t count = 0;
50 | for (int i = 0; i < n - 1; i ++) {
51 | for (int j = i + 1; j < n; j ++) {
52 | dist[count] = corDist(x(_, i), x(_, j));
53 | count ++;
54 | }
55 | }
56 | return dist;
57 | } // corDists
58 |
59 | //pre cluster for identical search
60 | // [[Rcpp::export]]
61 | List corPreIdenticals(RawMatrix x, const int step) {
62 | int n = x.ncol();
63 | IntegerVector clusters(n);
64 | std::map< int, std::vector > preclust;
65 | std::vector premedoInx;
66 | int k = 0;
67 | if (n >= step * 2) {
68 | // medoids equally distributed over the search space
69 | bool smallDist = false;
70 | int startStep = (int)step * .5;
71 | double medodist = 0;
72 | premedoInx.push_back(startStep);
73 | for (int i = startStep + step; i < n; i += step) {
74 | for(int j = startStep; j < i; j += step) {
75 | medodist = corDist(x(_, i), x(_, j));
76 | if (medodist < .05) {
77 | smallDist = true;
78 | break;
79 | }
80 | }
81 | if (smallDist) {
82 | smallDist = false;
83 | continue;
84 | }
85 | premedoInx.push_back(i);
86 | }
87 | k = premedoInx.size();
88 | }
89 | int j0;
90 | if (k > 1) {
91 | // cluster all variables to the nearest medoid
92 | NumericVector predist(k);
93 | for (int i = 0; i < n; i ++) {
94 | for (int j = 0; j < k; j ++) {
95 | j0 = premedoInx[j];
96 | predist[j] = corDist(x(_, i), x(_, j0));
97 | }
98 | preclust[which_min(predist)].push_back(i);
99 | }
100 | } else {
101 | for (int i = 0; i < n; i ++) {
102 | preclust[0].push_back(i);
103 | }
104 | }
105 | return List::create(preclust);
106 | } // preClustIdenticals
107 |
108 | // find identicals
109 | // [[Rcpp::export]]
110 | List corIdenticals(RawMatrix x, IntegerVector clustIdx) {
111 | int nCluster = clustIdx.size();
112 | double dist = 0;
113 | IntegerVector clusters(nCluster);
114 | int clustCount = 1;
115 | std::vector grp, medoInx;
116 | int gMedo, i0, j0;
117 | for (int i = 0; i < nCluster; i ++) {
118 | i0 = clustIdx[i];
119 | if (clusters[i] == 0) {
120 | clusters[i] = clustCount;
121 | grp.push_back(i);
122 | for (int j = i + 1; j < nCluster; j ++) {
123 | j0 = clustIdx[j];
124 | if (clusters[j] == 0) {
125 | dist = corDist(x(_, i0), x(_, j0));
126 | if (dist <= 1e-7) {
127 | clusters[j] = clustCount;
128 | grp.push_back(j);
129 | }
130 | }
131 | }
132 | gMedo = grp.size() / 2;
133 | medoInx.push_back(grp[gMedo]);
134 | clustCount ++;
135 | grp.clear();
136 | }
137 | }
138 | return List::create(clusters, medoInx);
139 | } // identicals
140 |
141 | // join data to one object
142 | // [[Rcpp::export]]
143 | List joinCorIdenticals(int n, List preclust, List ClustMedo) {
144 | IntegerVector clusters(n);
145 | std::vector medoInx;
146 | List SubClustMedo;
147 | IntegerVector clustIdx, clust, medo;
148 | int count = 0, j0 = 0;
149 | for (int i = 0; i < preclust.size(); i ++) {
150 | clustIdx = preclust[i];
151 | SubClustMedo = ClustMedo[i];
152 | clust = SubClustMedo[0];
153 | medo = SubClustMedo[1];
154 | for (int j = 0; j < clustIdx.size(); j ++) {
155 | j0 = clustIdx[j];
156 | clusters[j0] = clust[j] + count;
157 | }
158 | for (int j = 0; j < medo.size(); j ++) {
159 | j0 = medo[j];
160 | medoInx.push_back(clustIdx[j0]);
161 | }
162 | count = medoInx.size();
163 | }
164 | return List::create(clusters, medoInx);
165 | } // joinIdenticals
166 |
167 |
168 | // clustering of IDB by correlation dictamce with CLARANS
169 | // [[Rcpp::export]]
170 | List corClarans(RawMatrix x, const int k, const int maxNeigbours) {
171 | RNGScope scope;
172 | Environment base("package:base");
173 | Function sample_int = base["sample.int"];
174 | int n = x.ncol();
175 | NumericMatrix medoDist(n, k);
176 | IntegerVector medoInx(k);
177 | double dist = 0;
178 | IntegerVector clusters(n);
179 | // starting medoid and clusters
180 | NumericVector pdist(n, 1.0);
181 | int m = 0;
182 | for (int c = 0; c < k; c ++) {
183 | // starting medoid
184 | m = as(sample_int(n, 1, false, pdist))-1;
185 | medoInx[c] = m;
186 | for (int i= 0; i < n; i ++) {
187 | dist = corDist(x(_, m), x(_, i));
188 | medoDist(i, c) = dist;
189 | }
190 | }
191 | int clust = 0;
192 | double costs = 0, objective = 0;
193 | for (int i= 0; i < n; i ++) {
194 | // starting cluster membership
195 | clust = which_min(medoDist(i, _));
196 | clusters[i] = clust;
197 | costs += medoDist(i, clust);
198 | }
199 | objective = costs / n;
200 | IntegerVector ranVar(maxNeigbours);
201 | int i = 0, o = 0;
202 | double minDist = 0;
203 | NumericVector objectDist(n);
204 | NumericVector objectMinDist(n);
205 | double object_cost = 0;
206 | // iteration of inner clarans loop
207 | int iter = 0;
208 | while (iter < maxNeigbours) {
209 | if ( iter == 0) {
210 | ranVar = floor(runif(maxNeigbours, 0, n - 1e-15));
211 | }
212 | // replacing medoids with objects if costs are smaller
213 | i = ranVar[iter];
214 | o = clusters[i];
215 | for (int j = 0; j < n; j ++) {
216 | dist = corDist(x(_, i), x(_, j));
217 | objectDist[j] = dist;
218 | minDist = dist;
219 | for (int c = 0; c < k; c ++) {
220 | if (c != o) {
221 | minDist = std::min(minDist, medoDist(j, c));
222 | }
223 | }
224 | objectMinDist[j] = minDist;
225 | }
226 | object_cost = sum(objectMinDist);
227 | if (object_cost < costs) {
228 | // new medoids
229 | medoDist(_, o) = objectDist;
230 | medoInx[o] = i;
231 | // new costs
232 | costs = object_cost;
233 | objective = costs / n;
234 | // new membership to cluster
235 | for (int j = 0; j < n; j ++) {
236 | clusters[j] = which_min(medoDist(j, _));
237 | }
238 | // restarting loop
239 | iter = 0;
240 | } else {
241 | iter ++;
242 | }
243 | }
244 | // Result as list
245 | return List::create(clusters + 1, medoInx, objective);
246 | } // clarans
247 |
248 |
249 | // medoids of SNP clusters
250 | // [[Rcpp::export]]
251 | IntegerVector corMedoids(RawMatrix x, IntegerVector clusters) {
252 | int n = clusters.size();
253 | // map of vectors with cluster indexes
254 | std::map< int, std::vector > indexMap;
255 | for (int i = 0; i < n; i ++) {
256 | indexMap[clusters[i]].push_back(i);
257 | }
258 | // for each cluster a dist matrix were the col with min dist is the medoid
259 | std::vector clustInx;
260 | int nClust = 0;
261 | double dist = 0;
262 | int dist_j, dist_i;
263 | std::vector medoInx;
264 | for (std::map< int, std::vector >::iterator it = indexMap.begin();
265 | it != indexMap.end(); it ++) {
266 | clustInx = it->second;
267 | nClust = clustInx.size();
268 | NumericMatrix dist_mat(nClust, nClust);
269 | NumericVector dist_sums(nClust);
270 | for (int j = 0; j < nClust - 1; j ++) {
271 | dist_j = clustInx[j];
272 | for (int i = j; i < nClust; i ++) {
273 | if ( i == j) {
274 | dist_mat(j, i) = 0;
275 | } else {
276 | dist_i = clustInx[i];
277 | dist = corDist(x(_, dist_j), x(_, dist_i));
278 | dist_mat(j, i) = dist;
279 | dist_mat(i, j) = dist;
280 | }
281 | }
282 | }
283 | for (int i = 0; i < nClust; i ++) {
284 | dist_sums[i] = sum(dist_mat(_, i));
285 | }
286 | medoInx.push_back(clustInx[which_min(dist_sums)] + 1);
287 | }
288 | return wrap(medoInx);
289 | }
290 |
--------------------------------------------------------------------------------
/src/snpData.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | using namespace Rcpp;
6 |
7 |
8 | // [[Rcpp::export]]
9 | NumericMatrix design(RawMatrix x) {
10 | size_t n = x.nrow();
11 | size_t p = x.ncol();
12 | NumericMatrix out(n, p);
13 | for (size_t i = 0; i < p; i ++) {
14 | for (size_t j = 0; j < n; j ++) {
15 | if (x(j, i) == 0x01) {
16 | out(j, i) = 0;
17 | } else if (x(j, i) == 0x03) {
18 | out(j, i) = 1;
19 | } else if (x(j, i) == 0x02) {
20 | out(j, i) = .5;
21 | }
22 | }
23 | }
24 | return out;
25 | }
26 |
27 |
28 | // [[Rcpp::export]]
29 | NumericVector afreq(RawMatrix x, bool maf) {
30 | size_t p = x.ncol();
31 | size_t n = x.nrow();
32 | size_t n_na = 0;
33 | double a = 0;
34 | double af = 0;
35 | NumericVector allfreq(p);
36 | for (size_t i = 0; i < p; i ++) {
37 | for (size_t j = 0; j < n; j ++) {
38 | if (x(j, i) == 0x00) {
39 | n_na ++;
40 | } else if (x(j, i) == 0x01) {
41 | a ++;
42 | } else if (x(j, i) == 0x02) {
43 | a += .5;
44 | }
45 | }
46 | af = a / (n - n_na);
47 | if (maf) {
48 | if (af <= .5) {
49 | allfreq[i] = af;
50 | } else {
51 | allfreq[i] = 1 - af;
52 | }
53 | } else {
54 | allfreq[i] = af;
55 | }
56 | n_na = 0;
57 | a = 0;
58 | }
59 | return allfreq;
60 | }
61 |
62 |
63 | // [[Rcpp::export]]
64 | NumericVector hetfreq(RawMatrix x, int dim) {
65 | size_t k, l;
66 | if (dim == 1) {
67 | k = x.nrow();
68 | l = x.ncol();
69 | } else {
70 | k = x.ncol();
71 | l = x.nrow();
72 | }
73 | size_t l_na = 0;
74 | double het = 0;
75 | NumericVector hetf(k);
76 | for (size_t i = 0; i < k; i ++) {
77 | for (size_t j = 0; j < l; j ++) {
78 | if (dim == 1) {
79 | if (x(i, j) == 0x00) {
80 | l_na ++;
81 | } else if (x(i, j) == 0x02) {
82 | het ++;
83 | }
84 | } else {
85 | if (x(j, i) == 0x00) {
86 | l_na ++;
87 | } else if (x(j, i) == 0x02) {
88 | het ++;
89 | }
90 | }
91 | }
92 | hetf[i] = het / (l - l_na);
93 | het = 0;
94 | l_na = 0;
95 | }
96 | return hetf;
97 | }
98 |
99 |
100 | // [[Rcpp::export]]
101 | NumericVector nafreq(RawMatrix x, int dim) {
102 | size_t k, l;
103 | if (dim == 1) {
104 | k = x.nrow();
105 | l = x.ncol();
106 | } else {
107 | k = x.ncol();
108 | l = x.nrow();
109 | }
110 | double na = 0;
111 | NumericVector naf(k);
112 | for (size_t i = 0; i < k; i ++) {
113 | for (size_t j = 0; j < l; j ++) {
114 | if (dim == 1) {
115 | if (x(i, j) == 0x00)
116 | na ++;
117 | } else {
118 | if (x(j, i) == 0x00)
119 | na ++;
120 | }
121 | }
122 | naf[i] = na / l;
123 | na = 0;
124 | }
125 | return naf;
126 | }
127 |
--------------------------------------------------------------------------------