├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── NEWS.md
├── R
    ├── BGData.R
    ├── FWD.R
    ├── GWAS.R
    ├── chunkedApply.R
    ├── findRelated.R
    ├── getG.R
    ├── preprocess.R
    ├── segments.R
    ├── summarize.R
    └── utils.R
├── README.md
├── inst
    ├── CITATION
    └── extdata
    │   ├── chr1.bed
    │   ├── chr1.bim
    │   ├── chr1.fam
    │   ├── chr1.raw
    │   ├── chr2.bed
    │   ├── chr2.bim
    │   ├── chr2.fam
    │   ├── chr2.raw
    │   ├── chr3.bed
    │   ├── chr3.bim
    │   ├── chr3.fam
    │   ├── chr3.raw
    │   └── pheno.txt
├── man
    ├── BGData-class.Rd
    ├── BGData-package.Rd
    ├── BGData.Rd
    ├── FWD.Rd
    ├── GWAS.Rd
    ├── as.BGData.Rd
    ├── chunkedApply.Rd
    ├── chunkedMap.Rd
    ├── file-backed-matrices.Rd
    ├── findRelated.Rd
    ├── geno-class.Rd
    ├── geno.Rd
    ├── getG.Rd
    ├── getG_symDMatrix.Rd
    ├── load.BGData.Rd
    ├── multi-level-parallelism.Rd
    ├── orderedMerge.Rd
    ├── preprocess.Rd
    ├── readRAW.Rd
    ├── segments.Rd
    └── summarize.Rd
├── src
    ├── .gitignore
    ├── Makevars
    ├── fitLSYS.c
    ├── fitLSYS.h
    ├── init.c
    ├── preprocess.c
    ├── preprocess.h
    ├── rayOLS.c
    ├── rayOLS.h
    ├── summarize.c
    └── summarize.h
└── tests
    ├── testthat.R
    └── testthat
        ├── helper-utils.R
        ├── test-BGData.R
        ├── test-GWAS.R
        ├── test-chunkedApply.R
        ├── test-getG.R
        ├── test-preprocess-int.R
        ├── test-preprocess-real.R
        ├── test-summarize.R
        └── test-utils.R


/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: BGData
 2 | Version: 2.4.1
 3 | License: MIT + file LICENSE
 4 | Title: A Suite of Packages for Analysis of Big Genomic Data
 5 | Description: An umbrella package providing a phenotype/genotype data structure
 6 |     and scalable and efficient computational methods for large genomic datasets
 7 |     in combination with several other packages: 'BEDMatrix', 'LinkedMatrix',
 8 |     and 'symDMatrix'.
 9 | Authors@R: c(
10 |     person("Gustavo", "de los Campos", email = "gustavoc@msu.edu", role = c("aut")),
11 |     person("Alexander", "Grueneberg", email = "cran@agrueneberg.info", role = c("aut", "cre")),
12 |     person("Paulino", "Perez", email = "perpdgo@gmail.com", role = c("ctb")),
13 |     person("Ana", "Vazquez", email = "avazquez@epi.msu.edu", role = c("ctb")))
14 | URL: https://github.com/QuantGen/BGData
15 | BugReports: https://github.com/QuantGen/BGData/issues
16 | Depends:
17 |     R (>= 3.0.2),
18 |     BEDMatrix (>= 1.4.0),
19 |     LinkedMatrix (>= 1.3.0),
20 |     symDMatrix (>= 2.0.0)
21 | Imports:
22 |     methods,
23 |     parallel,
24 |     crochet (>= 2.1.0),
25 |     bigmemory,
26 |     synchronicity,
27 |     ff,
28 |     bit
29 | Suggests:
30 |     data.table (>= 1.9.6),
31 |     lme4,
32 |     SKAT,
33 |     testthat
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2014-2015
2 | COPYRIGHT HOLDER: Gustavo de los Campos


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | S3method(as.BGData, BEDMatrix)
 2 | S3method(as.BGData, ColumnLinkedMatrix)
 3 | S3method(as.BGData, RowLinkedMatrix)
 4 | S3method(findRelated, matrix)
 5 | S3method(findRelated, symDMatrix)
 6 | export(BGData)
 7 | export(GWAS)
 8 | export(as.BGData)
 9 | export(chunkedApply)
10 | export(chunkedMap)
11 | export(findRelated)
12 | export(FWD)
13 | export(geno)
14 | export("geno<-")
15 | export(getG)
16 | export(getG_symDMatrix)
17 | export(load.BGData)
18 | export(map)
19 | export("map<-")
20 | export(orderedMerge)
21 | export(pheno)
22 | export("pheno<-")
23 | export(preprocess)
24 | export(readRAW)
25 | export(readRAW_big.matrix)
26 | export(readRAW_matrix)
27 | export(segments)
28 | export(summarize)
29 | exportClasses(BGData)
30 | exportMethods(geno)
31 | exportMethods("geno<-")
32 | exportMethods(pheno)
33 | exportMethods("pheno<-")
34 | exportMethods(map)
35 | exportMethods("map<-")
36 | importFrom(BEDMatrix, BEDMatrix)
37 | importFrom(LinkedMatrix, ColumnLinkedMatrix, LinkedMatrix, nNodes)
38 | importFrom(symDMatrix, nBlocks, symDMatrix)
39 | import(methods)
40 | importFrom(parallel, mclapply)
41 | importFrom(crochet, convertIndex)
42 | importFrom(bigmemory, attach.big.matrix, big.matrix, filebacked.big.matrix)
43 | importFrom(synchronicity, boost.mutex, lock, unlock)
44 | importFrom(ff, as.ff, ff, vt)
45 | importFrom(bit, physical, "physical<-")
46 | importFrom(stats, coef, lsfit, ls.print, model.frame, model.matrix, na.pass,
47 |                   pnorm, sd, terms, update)
48 | importFrom(utils, read.table, type.convert)
49 | importClassesFrom(BEDMatrix, BEDMatrix)
50 | importClassesFrom(LinkedMatrix, LinkedMatrix)
51 | importClassesFrom(bigmemory, big.matrix)
52 | importClassesFrom(symDMatrix, symDMatrix)
53 | useDynLib(BGData, .registration = TRUE, .fixes = "C_")
54 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
  1 | # BGData 2.4.1
  2 | 
  3 | - Fixed minor native routine registration issue.
  4 | - Drop defunct CI services.
  5 | - Update link to paper.
  6 | 
  7 | 
  8 | # BGData 2.4.0
  9 | 
 10 | - Add `FWD()` function for performing forward regressions.
 11 | 
 12 | 
 13 | # BGData 2.3.0
 14 | 
 15 | - Add `segments()` function for finding non-overlapping segments based on a
 16 |   summary statistic.
 17 | - `preprocess()`: Add `nCores` parameter.
 18 | - `findRelated()`: Map indices to sample names for matrix inputs.
 19 | - `getG()`: Add `impute` parameter.
 20 | - `getG()`: Impute by mean instead of 0 if `center = FALSE`.
 21 | - `getG()`: Use `preprocess()` internally for better performance.
 22 | - `as.BGData()`: Read genetic distances in .bim file as double instead of
 23 |   integer.
 24 | 
 25 | 
 26 | # BGData 2.2.0
 27 | 
 28 | - Follow [Bioconductor S4 practices][1].
 29 |   - If you have used `new()` to create `BGData` instances, please use the
 30 |     `BGData()` constructor function instead.
 31 |   - If you have used `@` to access the slots of `BGData` instances, please use
 32 |     the `geno()`, `pheno()`, and `map()` accessors instead.
 33 | - `BGData()`:
 34 |   - Do not create dimnames for `geno` as this object is likely shared.
 35 |   - Check if `geno` has row names before creating `pheno` stub.
 36 |   - Check if `geno` has column names before creating `map` stub.
 37 |   - Rename `IID` in `pheno` stub to `sample_id`.
 38 |   - Rename `mrk` in `map` stub to `variant_id`.
 39 |   - Change format of rownames for `pheno` stub to a sequence starting with
 40 |     `sample_` and rownames for `map` stub to a sequence starting with
 41 |     `variant_` if `geno` does not have dimnames.
 42 | - `as.BGData()`:
 43 |   - Force column classes when loading .fam and .bim files.
 44 |   - Force `FID` and `IID` columns to be of type `character` when loading
 45 |     alternate phenotype files.
 46 |   - Do not make assumptions about the structure of dimnames of a BEDMatrix
 47 |     object if it is passed without .fam and .bim file unless they are `NULL`.
 48 | - Add validity tests for `BGData` objects:
 49 |   - Check if number of rows of `geno` matches number of rows of `pheno`.
 50 |   - Check if number of columns of `geno` matches number of rows of `map`.
 51 |   - Warn if the row names of `pheno` do not match the row names of `geno`.
 52 |   - Warn if the row names of `map` do not match the column names of `geno`.
 53 | - Add `preprocess()` function for fast centering, scaling, and imputation.
 54 | - `GWAS()`: Return number of records used for each variant and allele
 55 |   frequencies in `rayOLS`.
 56 | - Update citation instructions.
 57 | - Use `inherits(., *)` instead of `class(.) == *` (R4 compat).
 58 | 
 59 | 
 60 | # BGData 2.1.0
 61 | 
 62 | - Add `chunkedMap()` function.
 63 | - Improve error handling in `chunkedMap()` and `chunkedApply()`.
 64 | - `summarize()`: Improve performance.
 65 | - `GWAS()`: Improve performance of `rayOLS` method.
 66 | - `GWAS()`: Fix bug when computing p-values for methods other than rayOLS,
 67 |   lsfit, or SKAT when `i` is used to subset samples.
 68 | - `GWAS()`: Fix wrong results in `lsfit` method when covariates with missing
 69 |   values are used.
 70 | - `as.BGData()`: Fix bug loading .fam and .bim files when path contains the
 71 |   word `bed`.
 72 | 
 73 | 
 74 | # BGData 2.0.0
 75 | 
 76 | ## Breaking Changes
 77 | 
 78 | - Rename `bufferSize` to `chunkSize`.
 79 | - Remove `nTasks` parameter from `chunkedApply()` and methods based on it.
 80 | - Remove `crossprods` function.
 81 | 
 82 | ## Other Changes
 83 | 
 84 | - Change chunking strategy to improve parallelism: instead of loading a subset
 85 |   of `chunkSize` in the main process, load a subset of `chunkSize` in the each
 86 |   fork. That way `nTasks` is not necessary anymore and the same code can be
 87 |   used for one core and multiple cores.
 88 | - Add `findRelated()` function for use with matrices and symDMatrix objects.
 89 | - Add `orderedMerge()` function that allows for phenotypes to be easily merged
 90 |   into a BGData object.
 91 | - Performance improvements in `getG()` function: use single shared memory
 92 |   matrix to collect results.
 93 | - Performance improvements in `rayOLS` method in `GWAS()` function.
 94 | - `getG_symDMatrix()`: Support version 2 of symDMatrix package.
 95 | - `getG_symDMatrix()`: Add `chunkSize` parameter.
 96 | - `getG_symDMatrix()`: Add `minVar` parameter.
 97 | - `as.BGData()`: Use rownames of BEDMatrix object as rownames for pheno, and
 98 |   colnames of BEDMatrix object as rownames for map.
 99 | - Include process ID in verbose output if `nCores` > 1.
100 | 
101 | ## Bug Fixes
102 | 
103 | - `getG_symDMatrix()`: Fix scaling error when `scale = FALSE`.
104 | - `getG_symDMatrix()`: Compute block indices correctly for out-of-order,
105 |   non-sequential indices.
106 | - `getG_symDMatrix()`: Do not include centers and scales in attributes anymore
107 |   because the influence of `j` and `minVar` is difficult to retain.
108 | 
109 | 
110 | # BGData 1.0.0
111 | 
112 | Initial release.
113 | 
114 | [1]: https://bioconductor.org/help/course-materials/2017/Zurich/S4-classes-and-methods.html
115 | 


--------------------------------------------------------------------------------
/R/FWD.R:
--------------------------------------------------------------------------------
  1 | FWD <- function(y, X, df = 20, tol = 1e-7, maxIter = 1000, centerImpute = TRUE, verbose = TRUE) {
  2 |     y <- y - mean(y)
  3 |     if (centerImpute) {
  4 |         X <- BGData::preprocess(X, center = TRUE, impute = TRUE)
  5 |     }
  6 |     if (is.null(colnames(X))) {
  7 |         colNames <- paste0("X", 1:ncol(X))
  8 |     } else {
  9 |         colNames <- colnames(X)
 10 |     }
 11 |     X <- cbind(1, X)
 12 |     df <- df + 1
 13 |     colNames <- c("Int", colNames)
 14 |     C <- crossprod(X)
 15 |     rhs <- crossprod(X, y)
 16 |     n <- length(y)
 17 |     p <- ncol(X)
 18 |     active <- rep(FALSE, p)
 19 |     names(active) <- colNames
 20 |     B <- matrix(data = 0, nrow = p, ncol = df)
 21 |     rownames(B) <- colNames
 22 |     RSS <- rep(NA_real_, df)
 23 |     DF <- rep(NA_real_, df)
 24 |     VARE <- rep(NA_real_, df)
 25 |     LogLik <- rep(NA_real_, df)
 26 |     AIC <- rep(NA_real_, df)
 27 |     BIC <- rep(NA_real_, df)
 28 |     path <- rep(NA_character_, df)
 29 |     active[1] <- TRUE
 30 |     B[1, 1] <- mean(y)
 31 |     RSS[1] <- sum((y - B[1, 1])^2)
 32 |     DF[1] <- 1
 33 |     VARE[1] <- RSS[1] / (n - DF[1])
 34 |     LogLik[1] <- -(n / 2) * log(2 * pi * VARE[1]) - RSS[1] / (2 * VARE[1])
 35 |     AIC[1] <- -2 * LogLik[1] + 2 * DF[1]
 36 |     BIC[1] <- -2 * LogLik[1] + log(n) * (DF[1] + 1)
 37 |     path[1] <- colNames[1]
 38 |     tol <- tol * RSS[1]
 39 |     for (i in 2:df) {
 40 |         tmp <- addOne(
 41 |             C = C,
 42 |             rhs = rhs,
 43 |             active = active,
 44 |             b = B[, i - 1],
 45 |             RSS = RSS[i - 1],
 46 |             maxIter = maxIter,
 47 |             tol = tol
 48 |         )
 49 |         B[, i] <- tmp[["b"]]
 50 |         if (length(tmp[["newPred"]]) > 0) {
 51 |             active[tmp[["newPred"]]] <- TRUE
 52 |             path[i] <- colNames[tmp[["newPred"]]]
 53 |         } else {
 54 |             path[i] <- NA
 55 |         }
 56 |         RSS[i] <- tmp[["RSS"]]
 57 |         DF[i] <- sum(active)
 58 |         VARE[i] <- RSS[i] / (n - DF[i])
 59 |         LogLik[i] <- -(n / 2) * log(2 * pi * VARE[i]) - RSS[i] / VARE[i] / 2
 60 |         AIC[i] <- -2 * LogLik[i] + 2 * (DF[i] + 1)
 61 |         BIC[i] <- -2 * LogLik[i] + log(n) * (DF[i] + 1)
 62 |         if (verbose) {
 63 |             message("  ", DF[i] - 1, " predictors, AIC=", round(AIC[i], 2))
 64 |         }
 65 |     }
 66 |     OUT <- list(
 67 |         B = B,
 68 |         path = data.frame(
 69 |             variable = path,
 70 |             RSS = RSS,
 71 |             LogLik = LogLik,
 72 |             VARE = VARE,
 73 |             DF = DF,
 74 |             AIC = AIC,
 75 |             BIC = BIC
 76 |         )
 77 |     )
 78 |     return(OUT)
 79 | }
 80 | 
 81 | addOne <- function(C, rhs, active, b, RSS, maxIter, tol) {
 82 |     activeSet <- which(active)
 83 |     inactiveSet <- which(!active)
 84 |     nActive <- length(activeSet)
 85 |     nInactive <- length(inactiveSet)
 86 |     # if model is not null
 87 |     if (nActive > 1) {
 88 |         RSSNew <- rep(NA_real_, nInactive)
 89 |         for (i in 1:nInactive) {
 90 |             fm <- fitSYS(
 91 |                 C = C,
 92 |                 rhs = rhs,
 93 |                 b = b,
 94 |                 active = c(inactiveSet[i], activeSet),
 95 |                 RSS = RSS,
 96 |                 maxIter = maxIter,
 97 |                 tol = tol
 98 |             )
 99 |             RSSNew[i] <- fm[["RSS"]]
100 |         }
101 |         k <- which.min(RSSNew)
102 |         fm <- fitSYS(
103 |             C = C,
104 |             rhs = rhs,
105 |             b = b,
106 |             active = c(inactiveSet[k], activeSet),
107 |             RSS = RSS,
108 |             maxIter = maxIter,
109 |             tol = tol
110 |         )
111 |         ans <- list(b = fm[["b"]], newPred = inactiveSet[k], RSS = fm[["RSS"]])
112 |     # if model is null
113 |     } else {
114 |         bOLS <- rhs / diag(C)
115 |         dRSS <- diag(C) * bOLS^2
116 |         k <- which.max(dRSS)
117 |         b[k] <- bOLS[k]
118 |         RSS <- RSS - bOLS[k]^2 * C[k, k]
119 |         ans <- list(b = b, newPred = k, RSS = RSS)
120 |     }
121 |     return(ans)
122 | }
123 | 
124 | fitSYS <- function(C, rhs, b, active, RSS, maxIter, tol) {
125 |     active <- active - 1L # for the 0-based index
126 |     ans <- .Call(C_fitLSYS, C, rhs, b, active, RSS, maxIter, tol)
127 |     return(list(b = ans[[1]], RSS = ans[[2]]))
128 | }
129 | 


--------------------------------------------------------------------------------
/R/GWAS.R:
--------------------------------------------------------------------------------
  1 | GWAS <- function(formula, data, method = "lsfit", i = seq_len(nrow(geno(data))), j = seq_len(ncol(geno(data))), chunkSize = 5000L, nCores = getOption("mc.cores", 2L), verbose = FALSE, ...) {
  2 | 
  3 |     if (!inherits(data, "BGData")) {
  4 |         stop("data must BGData")
  5 |     }
  6 | 
  7 |     if (!method %in% c("rayOLS", "lsfit", "lm", "lm.fit", "glm", "lmer", "SKAT")) {
  8 |         stop("Only rayOLS, lsfit, lm, lm.fit, glm, lmer, and SKAT have been implemented so far.")
  9 |     }
 10 | 
 11 |     i <- convertIndex(geno(data), i, "i")
 12 |     j <- convertIndex(geno(data), j, "j")
 13 | 
 14 |     if (method == "rayOLS") {
 15 |         if (length(labels(terms(formula))) > 0L) {
 16 |             stop("method rayOLS can only be used with y~1 formula, if you want to add covariates pre-adjust your phenotype.")
 17 |         }
 18 |         OUT <- GWAS.rayOLS(formula = formula, data = data, i = i, j = j, chunkSize = chunkSize, nCores = nCores, verbose = verbose, ...)
 19 |     } else if (method == "lsfit") {
 20 |         OUT <- GWAS.lsfit(formula = formula, data = data, i = i, j = j, chunkSize = chunkSize, nCores = nCores, verbose = verbose, ...)
 21 |     } else if (method == "SKAT") {
 22 |         if (!requireNamespace("SKAT", quietly = TRUE)) {
 23 |             stop("SKAT needed for this function to work. Please install it.", call. = FALSE)
 24 |         }
 25 |         OUT <- GWAS.SKAT(formula = formula, data = data, i = i, j = j, verbose = verbose, ...)
 26 |     } else {
 27 |         if (method == "lmer") {
 28 |             if (!requireNamespace("lme4", quietly = TRUE)) {
 29 |                 stop("lme4 needed for this function to work. Please install it.", call. = FALSE)
 30 |             }
 31 |             FUN <- lme4::lmer
 32 |         } else {
 33 |             FUN <- match.fun(method)
 34 |         }
 35 |         GWAS.model <- update(formula, ".~z+.")
 36 |         OUT <- chunkedApply(X = geno(data), MARGIN = 2L, FUN = function(col, ...) {
 37 |             df <- pheno(data)[i, , drop = FALSE]
 38 |             df[["z"]] <- col
 39 |             fm <- FUN(GWAS.model, data = df, ...)
 40 |             getCoefficients(fm)
 41 |         }, i = i, j = j, chunkSize = chunkSize, nCores = nCores, verbose = verbose, ...)
 42 |         OUT <- t(OUT)
 43 |         rownames(OUT) <- colnames(geno(data))[j]
 44 |     }
 45 | 
 46 |     return(OUT)
 47 | }
 48 | 
 49 | GWAS.rayOLS <- function(formula, data, i = seq_len(nrow(geno(data))), j = seq_len(ncol(geno(data))), chunkSize = 5000L, nCores = getOption("mc.cores", 2L), verbose = FALSE, ...) {
 50 |     y <- pheno(data)[i, getResponse(formula)]
 51 |     y <- as.numeric(y)
 52 |     res <- chunkedMap(X = geno(data), FUN = rayOLS, i = i, j = j, chunkSize = chunkSize, nCores = nCores, verbose = verbose, y = y, ...)
 53 |     res <- do.call(rbind, res)
 54 |     colnames(res) <- c("Estimate", "Std.Err", "t-value", "Pr(>|t|)", "n", "allele_freq")
 55 |     rownames(res) <- colnames(geno(data))[j]
 56 |     return(res)
 57 | }
 58 | 
 59 | GWAS.lsfit <- function(formula, data, i = seq_len(nrow(geno(data))), j = seq_len(ncol(geno(data))), chunkSize = 5000L, nCores = getOption("mc.cores", 2L), verbose = FALSE, ...) {
 60 | 
 61 |     # The subset argument of model.frame is evaluated in the environment of the
 62 |     # formula, therefore subset after building the frame.
 63 |     frame <- model.frame(formula = formula, data = pheno(data), na.action = na.pass)[i, , drop = FALSE]
 64 |     model <- model.matrix(formula, frame)
 65 | 
 66 |     y <- pheno(data)[i, getResponse(formula)]
 67 | 
 68 |     res <- chunkedApply(X = geno(data), MARGIN = 2L, FUN = function(col, ...) {
 69 |         fm <- lsfit(x = cbind(col, model), y = y, intercept = FALSE)
 70 |         ls.print(fm, print.it = FALSE)[["coef.table"]][[1L]][1L, ]
 71 |     }, i = i, j = j, chunkSize = chunkSize, nCores = nCores, verbose = verbose, ...)
 72 |     res <- t(res)
 73 |     rownames(res) <- colnames(geno(data))[j]
 74 | 
 75 |     return(res)
 76 | }
 77 | 
 78 | # formula: the formula for the GWAS model without including the markers, e.g.
 79 | # y~1 or y~factor(sex)+age
 80 | # all the variables in the formula must be in data@pheno (BGData)
 81 | # containing slots @pheno and @geno
 82 | # groups: a vector mapping markers into groups (can be integer, character or
 83 | # factor)
 84 | GWAS.SKAT <- function(formula, data, groups, i = seq_len(nrow(geno(data))), j = seq_len(ncol(geno(data))), verbose = FALSE, ...) {
 85 | 
 86 |     uniqueGroups <- unique(groups)
 87 | 
 88 |     OUT <- matrix(data = double(), nrow = length(uniqueGroups), ncol = 2L)
 89 |     colnames(OUT) <- c("nMrk", "p-value")
 90 |     rownames(OUT) <- uniqueGroups
 91 | 
 92 |     H0 <- SKAT::SKAT_Null_Model(formula, data = pheno(data)[i, , drop = FALSE], ...)
 93 | 
 94 |     for (group in seq_along(uniqueGroups)) {
 95 |         Z <- geno(data)[i, groups == uniqueGroups[group], drop = FALSE]
 96 |         fm <- SKAT::SKAT(Z = Z, obj = H0, ...)
 97 |         OUT[group, ] <- c(ncol(Z), fm[["p.value"]])
 98 |         if (verbose) {
 99 |             message("Group ", group, " of ", length(uniqueGroups), " ...")
100 |         }
101 |     }
102 | 
103 |     return(OUT)
104 | }
105 | 
106 | rayOLS <- function(x, y) {
107 |     .Call(C_rayOLS, x, y)
108 | }
109 | 
110 | getCoefficients <- function(x) {
111 |     UseMethod("getCoefficients")
112 | }
113 | 
114 | getCoefficients.lm <- function(x) {
115 |     coef(summary(x))[2L, ]
116 | }
117 | 
118 | getCoefficients.glm <- function(x) {
119 |     coef(summary(x))[2L, ]
120 | }
121 | 
122 | getCoefficients.lmerMod <- function(x) {
123 |     ans <- coef(summary(x))[2L, ]
124 |     ans <- c(ans, c(1L - pnorm(ans[3L])))
125 |     return(ans)
126 | }
127 | 
128 | getResponse <- function(formula) {
129 |     # Extract component from parse tree (see https://cran.r-project.org/doc/manuals/r-release/R-lang.html#Language-objects)
130 |     sym <- formula[[2L]]
131 |     # Convert symbol to character
132 |     as.character(sym)
133 | }
134 | 


--------------------------------------------------------------------------------
/R/chunkedApply.R:
--------------------------------------------------------------------------------
  1 | chunkedMap <- function(X, FUN, i = seq_len(nrow(X)), j = seq_len(ncol(X)), chunkBy = 2L, chunkSize = 5000L, nCores = getOption("mc.cores", 2L), verbose = FALSE, ...) {
  2 |     if (length(dim(X)) != 2L) {
  3 |         stop("X must be a matrix-like object")
  4 |     }
  5 |     i <- convertIndex(X, i, "i")
  6 |     j <- convertIndex(X, j, "j")
  7 |     dim <- c(length(i), length(j))
  8 |     if (is.null(chunkSize)) {
  9 |         chunkSize <- dim[chunkBy]
 10 |         nChunks <- 1L
 11 |     } else {
 12 |         nChunks <- ceiling(dim[chunkBy] / chunkSize)
 13 |     }
 14 |     chunkApply <- function(curChunk, ...) {
 15 |         if (verbose) {
 16 |             if (nCores > 1) {
 17 |                 message("Process ", Sys.getpid(), ": Chunk ", curChunk, " of ", nChunks, " ...")
 18 |             } else {
 19 |                 message("Chunk ", curChunk, " of ", nChunks, " ...")
 20 |             }
 21 |         }
 22 |         range <- seq(
 23 |             ((curChunk - 1L) * chunkSize) + 1L,
 24 |             min(curChunk * chunkSize, dim[chunkBy])
 25 |         )
 26 |         if (chunkBy == 2L) {
 27 |             chunk <- X[i, j[range], drop = FALSE]
 28 |         } else {
 29 |             chunk <- X[i[range], j, drop = FALSE]
 30 |         }
 31 |         FUN(chunk, ...)
 32 |     }
 33 |     if (nCores == 1L) {
 34 |         res <- lapply(X = seq_len(nChunks), FUN = chunkApply, ...)
 35 |     } else {
 36 |         # Suppress warnings because of custom error handling
 37 |         res <- suppressWarnings(mclapply(X = seq_len(nChunks), FUN = chunkApply, ..., mc.cores = nCores))
 38 |         errors <- which(vapply(res, inherits, TRUE, "try-error"))
 39 |         if (length(errors) > 0L) {
 40 |             # With mc.preschedule = TRUE (the default), if a job fails, the
 41 |             # remaining jobs will fail as well with the same error message.
 42 |             # Therefore, the number of errors does not tell how many errors
 43 |             # actually occurred and only the first error message is forwarded.
 44 |             errorMessage <- attr(res[[errors[1L]]], "condition")[["message"]]
 45 |             stop("in chunk ", errors[1L], " (only first error is shown)", ": ", errorMessage, call. = FALSE)
 46 |         }
 47 |     }
 48 |     return(res)
 49 | }
 50 | 
 51 | chunkedApply <- function(X, MARGIN, FUN, i = seq_len(nrow(X)), j = seq_len(ncol(X)), chunkSize = 5000L, nCores = getOption("mc.cores", 2L), verbose = FALSE, ...) {
 52 |     res <- chunkedMap(X = X, FUN = function(chunk, ...) {
 53 |         apply2(X = chunk, MARGIN = MARGIN, FUN = FUN, ...)
 54 |     }, i = i, j = j, chunkBy = MARGIN, chunkSize = chunkSize, nCores = nCores, verbose = verbose, ...)
 55 |     simplifyList(res)
 56 | }
 57 | 
 58 | # A more memory-efficient version of apply.
 59 | #
 60 | # apply always makes a copy of the data.
 61 | apply2 <- function(X, MARGIN, FUN, ...) {
 62 |     d <- dim(X)
 63 |     if (MARGIN == 1L) {
 64 |         subset <- X[1L, ]
 65 |     } else {
 66 |         subset <- X[, 1L]
 67 |     }
 68 |     sample <- FUN(subset, ...)
 69 |     if (is.table(sample)) {
 70 |         stop("tables are not supported.")
 71 |     } else if (is.list(sample)) {
 72 |         # List
 73 |         OUT <- vector(mode = "list", length = d[MARGIN])
 74 |         names(OUT) <- dimnames(X)[[MARGIN]]
 75 |         OUT[[1L]] <- sample
 76 |         if (d[MARGIN] > 1L) {
 77 |             for (i in seq(2L, d[MARGIN])) {
 78 |                 if (MARGIN == 1L) {
 79 |                     subset <- X[i, ]
 80 |                 } else {
 81 |                     subset <- X[, i]
 82 |                 }
 83 |                 OUT[[i]] <- FUN(subset, ...)
 84 |             }
 85 |         }
 86 |     } else {
 87 |         if (length(sample) > 1L) {
 88 |             # Matrix or atomic vector of length > 1
 89 |             OUT <- matrix(data = normalizeType(typeof(sample)), nrow = length(sample), ncol = d[MARGIN])
 90 |             if (!is.matrix(sample) && !is.null(names(sample))) {
 91 |                 if (MARGIN == 1L) {
 92 |                     dimnames(OUT) <- list(NULL, names(sample))
 93 |                 } else {
 94 |                     dimnames(OUT) <- list(names(sample), NULL)
 95 |                 }
 96 |             }
 97 |             OUT[, 1L] <- sample
 98 |             if (d[MARGIN] > 1L) {
 99 |                 for (i in seq(2L, d[MARGIN])) {
100 |                     if (MARGIN == 1L) {
101 |                         subset <- X[i, ]
102 |                     } else {
103 |                         subset <- X[, i]
104 |                     }
105 |                     OUT[, i] <- FUN(subset, ...)
106 |                 }
107 |             }
108 |         } else {
109 |             # Atomic vector of length 1
110 |             OUT <- vector(mode = typeof(sample), length = d[MARGIN])
111 |             names(OUT) <- dimnames(X)[[MARGIN]]
112 |             OUT[1L] <- sample
113 |             if (d[MARGIN] > 1L) {
114 |                 for (i in seq(2L, d[MARGIN])) {
115 |                     if (MARGIN == 1L) {
116 |                         subset <- X[i, ]
117 |                     } else {
118 |                         subset <- X[, i]
119 |                     }
120 |                     OUT[i] <- FUN(subset, ...)
121 |                 }
122 |             }
123 |         }
124 |     }
125 |     return(OUT)
126 | }
127 | 
128 | simplifyList <- function(x) {
129 |     sample <- x[[1L]]
130 |     if (is.matrix(sample)) {
131 |         x <- matrix(data = unlist(x), nrow = nrow(sample), byrow = FALSE)
132 |         rownames(x) <- rownames(sample)
133 |     } else {
134 |         x <- unlist(x)
135 |     }
136 |     return(x)
137 | }
138 | 


--------------------------------------------------------------------------------
/R/findRelated.R:
--------------------------------------------------------------------------------
 1 | findRelated <- function(x, ...) {
 2 |     UseMethod("findRelated")
 3 | }
 4 | 
 5 | findRelated.matrix <- function(x, cutoff = 0.03, ...) {
 6 |     x[lower.tri(x, diag = TRUE)] <- 0
 7 |     pairs <- which(x > cutoff, arr.ind = TRUE, useNames = FALSE)
 8 |     samples <- unique(pairs[, 1L])
 9 |     rownames(x)[samples]
10 | }
11 | 
12 | findRelated.symDMatrix <- function(x, cutoff = 0.03, verbose = FALSE, ...) {
13 |     n <- nBlocks(x)
14 |     pairs <- lapply(seq_len(n), function(i) {
15 |         lapply(seq(i, n), function(j) {
16 |             if (verbose) {
17 |                 message("Working on block ", i, " ", j)
18 |             }
19 |             block <- x[[i]][[j]][]
20 |             # Remove lower triangle in blocks that contain the diagonal
21 |             if (i == j) {
22 |                 block[lower.tri(block, diag = TRUE)] <- 0
23 |             }
24 |             pairs <- which(block > cutoff, arr.ind = TRUE, useNames = FALSE)
25 |             # Remap local indices to sample names
26 |             remap <- matrix(character(), nrow = nrow(pairs), ncol = ncol(pairs))
27 |             remap[, 1L] <- rownames(block)[pairs[, 1L]]
28 |             remap[, 2L] <- colnames(block)[pairs[, 2L]]
29 |             return(remap)
30 |         })
31 |     })
32 |     pairs <- do.call(rbind, lapply(pairs, function(x) do.call(rbind, x)))
33 |     unique(pairs[, 1L])
34 | }
35 | 


--------------------------------------------------------------------------------
/R/getG.R:
--------------------------------------------------------------------------------
  1 | padDigits <- function(x, total) {
  2 |     formatC(x, width = as.integer(log10(total) + 1L), format = "d", flag = "0")
  3 | }
  4 | 
  5 | getG <- function(X, center = TRUE, scale = TRUE, impute = TRUE, scaleG = TRUE, minVar = 1e-05, i = seq_len(nrow(X)), j = seq_len(ncol(X)), i2 = NULL, chunkSize = 5000L, nCores = getOption("mc.cores", 2L), verbose = FALSE) {
  6 | 
  7 |     # compute XY' rather than XX'
  8 |     hasY <- !is.null(i2)
  9 | 
 10 |     if (hasY) {
 11 |         if (is.logical(center) && center == TRUE) {
 12 |             stop("centers need to be precomputed.")
 13 |         }
 14 |         if (is.logical(scale) && scale == TRUE) {
 15 |             stop("scales need to be precomputed.")
 16 |         }
 17 |     }
 18 | 
 19 |     i <- convertIndex(X, i, "i")
 20 |     j <- convertIndex(X, j, "j")
 21 |     if (hasY) {
 22 |         i2 <- convertIndex(X, i2, "i")
 23 |     }
 24 | 
 25 |     nX <- nrow(X)
 26 |     pX <- ncol(X)
 27 | 
 28 |     if (min(i) < 1L || max(i) > nX) {
 29 |         stop("Index out of bounds")
 30 |     }
 31 |     if (min(j) < 1L || max(j) > pX) {
 32 |         stop("Index out of bounds")
 33 |     }
 34 |     if (hasY) {
 35 |         if (min(i2) < 1L || max(i2) > nX) {
 36 |             stop("Index out of bounds")
 37 |         }
 38 |     }
 39 | 
 40 |     n <- length(i)
 41 |     p <- length(j)
 42 |     if (hasY) {
 43 |         n2 <- length(i2)
 44 |     }
 45 | 
 46 |     if (is.null(chunkSize)) {
 47 |         chunkSize <- p
 48 |         nChunks <- 1L
 49 |     } else {
 50 |         nChunks <- ceiling(p / chunkSize)
 51 |     }
 52 | 
 53 |     if (hasY) {
 54 |         G <- big.matrix(nrow = n, ncol = n2, type = "double", init = 0.0, dimnames = list(rownames(X)[i], rownames(X)[i2]))
 55 |     } else {
 56 |         G <- big.matrix(nrow = n, ncol = n, type = "double", init = 0.0, dimnames = list(rownames(X)[i], rownames(X)[i]))
 57 |     }
 58 | 
 59 |     mutex <- boost.mutex()
 60 | 
 61 |     chunkApply <- function(curChunk) {
 62 | 
 63 |         if (verbose) {
 64 |             if (nCores > 1) {
 65 |                 message("Process ", Sys.getpid(), ": Chunk ", curChunk, " of ", nChunks, " ...")
 66 |             } else {
 67 |                 message("Chunk ", curChunk, " of ", nChunks, " ...")
 68 |             }
 69 |         }
 70 | 
 71 |         # subset
 72 |         range <- seq(
 73 |             ((curChunk - 1L) * chunkSize) + 1L,
 74 |             min(curChunk * chunkSize, p)
 75 |         )
 76 |         X1 <- X[i, j[range], drop = FALSE]
 77 |         if (hasY) {
 78 |             X2 <- X[i2, j[range], drop = FALSE]
 79 |         }
 80 | 
 81 |         # compute centers
 82 |         if (is.logical(center) && center == TRUE) {
 83 |             center.chunk <- colMeans(X1, na.rm = TRUE)
 84 |         } else if (is.numeric(center)) {
 85 |             center.chunk <- center[j[range]]
 86 |         } else {
 87 |             center.chunk = FALSE
 88 |         }
 89 | 
 90 |         # compute scales
 91 |         if (is.logical(scale) && scale == TRUE) {
 92 |             scale.chunk <- apply(X = X1, MARGIN = 2L, FUN = sd, na.rm = TRUE)
 93 |         } else if (is.numeric(scale)) {
 94 |             scale.chunk <- scale[j[range]]
 95 |         } else {
 96 |             scale.chunk <- FALSE
 97 |         }
 98 | 
 99 |         # remove constant columns
100 |         if (is.numeric(scale.chunk)) {
101 |             removeCols <- which(scale.chunk < minVar)
102 |             if (length(removeCols) > 0L) {
103 |                 X1 <- X1[, -removeCols]
104 |                 if (hasY) {
105 |                     X2 <- X2[, -removeCols]
106 |                 }
107 |                 scale.chunk <- scale.chunk[-removeCols]
108 |                 center.chunk <- center.chunk[-removeCols]
109 |             }
110 |         }
111 | 
112 |         p <- ncol(X1)
113 | 
114 |         # compute XX'
115 |         if (p > 0L) {
116 | 
117 |             # center, scale and impute without duplications
118 |             # set nCores to 1 here because section is already parallelized
119 |             X1 <- preprocess(X1, center = center.chunk, scale = scale.chunk, impute = impute, nCores = 1)
120 |             if (hasY) {
121 |                 X2 <- preprocess(X2, center = center.chunk, scale = scale.chunk, impute = impute, nCores = 1)
122 |             }
123 | 
124 |             if (hasY) {
125 |                 G_chunk <- tcrossprod(x = X1, y = X2)
126 |             } else {
127 |                 G_chunk <- tcrossprod(X1)
128 |             }
129 | 
130 |             lock(mutex)
131 |             G[] <- G[] + G_chunk
132 |             unlock(mutex)
133 | 
134 |         }
135 | 
136 |         return(p)
137 | 
138 |     }
139 | 
140 |     if (nCores == 1L) {
141 |         res <- lapply(X = seq_len(nChunks), FUN = chunkApply)
142 |     } else {
143 |         res <- mclapply(X = seq_len(nChunks), FUN = chunkApply, mc.cores = nCores)
144 |     }
145 | 
146 |     # Convert big.matrix to matrix
147 |     G <- G[]
148 | 
149 |     if (scaleG) {
150 |         if (hasY) {
151 |             K <- do.call(sum, res)
152 |         } else {
153 |             # Use seq instead of diag to avoid copy as it does not increase ref count
154 |             K <- mean(G[seq(from = 1L, to = n * n, by = n + 1L)])
155 |         }
156 |         G[] <- G / K
157 |     }
158 | 
159 |     return(G)
160 | 
161 | }
162 | 
163 | getG_symDMatrix <- function(X, center = TRUE, scale = TRUE, impute = TRUE, scaleG = TRUE, minVar = 1e-05, blockSize = 5000L, folderOut = paste0("symDMatrix_", randomString()), vmode = "double", i = seq_len(nrow(X)), j = seq_len(ncol(X)), chunkSize = 5000L, nCores = getOption("mc.cores", 2L), verbose = FALSE) {
164 | 
165 |     i <- convertIndex(X, i, "i")
166 |     j <- convertIndex(X, j, "j")
167 | 
168 |     nX <- nrow(X)
169 |     pX <- ncol(X)
170 | 
171 |     if (min(i) < 1L || max(i) > nX) {
172 |         stop("Index out of bounds")
173 |     }
174 |     if (min(j) < 1L || max(j) > pX) {
175 |         stop("Index out of bounds")
176 |     }
177 | 
178 |     n <- length(i)
179 |     p <- length(j)
180 | 
181 |     if (is.null(chunkSize)) {
182 |         chunkSize <- p
183 |         nChunks <- 1L
184 |     } else {
185 |         nChunks <- ceiling(p / chunkSize)
186 |     }
187 | 
188 |     if (is.logical(center) && center == TRUE) {
189 |         if (verbose) {
190 |             message("Computing centers ...")
191 |         }
192 |         center <- rep(0, pX)
193 |         names(center) <- colnames(X)
194 |         center[j] <- chunkedApply(X = X, MARGIN = 2L, FUN = mean, i = i, j = j, chunkSize = chunkSize, nCores = nCores, verbose = FALSE, na.rm = TRUE)
195 |     }
196 | 
197 |     if (is.logical(scale) && scale == TRUE) {
198 |         if (verbose) {
199 |             message("Computing scales ...")
200 |         }
201 |         scale <- rep(1, pX)
202 |         names(scale) <- colnames(X)
203 |         scale[j] <- chunkedApply(X = X, MARGIN = 2L, FUN = sd, i = i, j = j, chunkSize = chunkSize, nCores = nCores, verbose = FALSE, na.rm = TRUE)
204 |     }
205 | 
206 |     if (file.exists(folderOut)) {
207 |         stop(folderOut, " already exists")
208 |     }
209 |     dir.create(folderOut)
210 | 
211 |     if (is.null(blockSize)) {
212 |         blockSize <- n
213 |         nBlocks <- 1L
214 |     } else {
215 |         nBlocks <- ceiling(n / blockSize)
216 |     }
217 | 
218 |     blockIndices <- split(i, ceiling(seq_along(i) / blockSize))
219 |     args <- vector(mode = "list", length = nBlocks)
220 |     counter <- 1L
221 |     for (rowIndex in 1L:nBlocks) {
222 |         rowArgs <- vector(mode = "list", length = nBlocks)
223 |         for (colIndex in 1L:nBlocks) {
224 |             if (verbose) {
225 |                 message("Block ", rowIndex, "-", colIndex, " ...")
226 |             }
227 |             if (colIndex >= rowIndex) {
228 |                 blockName <- paste0("data_", padDigits(rowIndex, nBlocks), "_", padDigits(colIndex, nBlocks), ".bin")
229 |                 block <- as.ff(getG(X, center = center, scale = scale, impute = impute, scaleG = FALSE, minVar = minVar, i = blockIndices[[rowIndex]], j = j, i2 = blockIndices[[colIndex]], chunkSize = chunkSize, nCores = nCores, verbose = FALSE), filename = paste0(folderOut, "/", blockName), vmode = vmode)
230 |                 # Change ff path to a relative one
231 |                 physical(block)[["filename"]] <- blockName
232 |                 rowArgs[[colIndex]] <- block
233 |                 counter <- counter + 1L
234 |             } else {
235 |                 rowArgs[[colIndex]] <- vt(args[[colIndex]][[rowIndex]])
236 |             }
237 |         }
238 |         args[[rowIndex]] <- do.call(ColumnLinkedMatrix, rowArgs)
239 |     }
240 | 
241 |     G <- do.call(symDMatrix, args)
242 | 
243 |     if (scaleG) {
244 |         K <- mean(diag(G))
245 |         for (rowIndex in seq_len(nBlocks)) {
246 |             for (colIndex in seq(rowIndex, nBlocks)) {
247 |                 G[[rowIndex]][[colIndex]][] <- G[[rowIndex]][[colIndex]][] / K
248 |             }
249 |         }
250 |     }
251 | 
252 |     save(G, file = paste0(folderOut, "/G.RData"))
253 | 
254 |     return(G)
255 | 
256 | }
257 | 


--------------------------------------------------------------------------------
/R/preprocess.R:
--------------------------------------------------------------------------------
 1 | preprocess <- function(X, center = FALSE, scale = FALSE, impute = FALSE, nCores = getOption("mc.cores", 2L)) {
 2 |     if (!(is.numeric(X) && length(dim(X)) == 2)) {
 3 |         stop("'X' needs to be a numeric matrix")
 4 |     }
 5 |     if (!(is.logical(center) && length(center) == 1L) && !(is.numeric(center) && length(center) == ncol(X))) {
 6 |         stop("'center' needs to be either a logical vector of size 1 or a numeric vector of size 'ncol(X)'")
 7 |     }
 8 |     if (!(is.logical(scale) && length(scale) == 1L) && !(is.numeric(scale) && length(scale) == ncol(X))) {
 9 |         stop("'scale' needs to be either a logical vector of size 1 or a numeric vector of size 'ncol(X)'")
10 |     }
11 |     if (!(is.logical(impute) && length(impute) == 1L)) {
12 |         stop("'impute' needs to be a logical vector of size 1")
13 |     }
14 |     if (!(is.numeric(nCores) && nCores > 0L)) {
15 |         stop("'nCores' needs to be a positive number")
16 |     }
17 |     .Call(C_preprocess, X, center, scale, impute, as.integer(nCores))
18 | }
19 | 


--------------------------------------------------------------------------------
/R/segments.R:
--------------------------------------------------------------------------------
  1 | segments <- function(statistic, chr, bp, threshold, gap, trim = FALSE, verbose = FALSE,snpid=NULL) {
  2 |     if (length(unique(c(length(statistic), length(chr), length(bp)))) != 1) {
  3 |         stop("statistic, chr, and bp need to match in length")
  4 |     }
  5 |     if (!is.numeric(statistic)) {
  6 |         stop("'statistic' needs to be a numeric vector")
  7 |     }
  8 |     if (!(is.numeric(chr) || is.character(chr))) {
  9 |         stop("'chr' needs to be a either a character or numeric vector")
 10 |     }
 11 |     if (!is.numeric(bp)) {
 12 |         stop("'bp' needs to be a numeric vector")
 13 |     }
 14 |     if (!is.numeric(threshold)) {
 15 |         stop("'threshold' needs to a number")
 16 |     }
 17 |     if (!is.numeric(gap)) {
 18 |         stop("'gap' needs to a number")
 19 |     }
 20 |     uniqueChr <- unique(chr)
 21 |     out <- vector(mode = "list", length = length(uniqueChr))
 22 |     for (curChr in uniqueChr) {
 23 |         if (verbose) {
 24 |             message("Working on chromosome ", curChr)
 25 |         }
 26 |         # Extract chromosome data
 27 |         chrFilter <- which(chr == curChr)
 28 |         statisticChr <- statistic[chrFilter]
 29 |         bpChr <- bp[chrFilter]
 30 |         # Determine variants below threshold
 31 |         discoverySet <- which(statisticChr <= threshold)
 32 |         # Set discoveries and all variants within +/- gap to 1, leave rest as 0
 33 |         signal <- rep(0, length(chrFilter))
 34 |         for (discovery in discoverySet) {
 35 |             signal[abs(bpChr - bpChr[discovery]) <= gap] <- 1
 36 |         }
 37 |         # Determine the runs in the 0/1 signal
 38 |         runs <- rle(signal)
 39 |         # Determine at what positions within the chromosome the runs start and
 40 |         # end while removing 0-runs
 41 |         runStart <- c(1, cumsum(runs[["lengths"]][-length(runs[["lengths"]])]) + 1)
 42 |         withinSegment <- runs[["values"]] == 1
 43 |         runStart <- runStart[withinSegment]
 44 |         runEnd <- runStart + runs[["lengths"]][withinSegment] - 1
 45 |         runLength <- runs[["lengths"]][withinSegment]
 46 |         # Determine value and position of smallest variant within segment, and
 47 |         # optionally trim segment (i.e., remove variants that are not internal
 48 |         # to the segment containing GWAS-significant variants)
 49 |         # Would be nice to vectorize this like the other operations ...
 50 |         minValue <- vector(mode = "numeric", length = length(runStart))
 51 |         minValuePos <- vector(mode = "integer", length = length(runStart))
 52 |         for (curSeg in seq_along(runStart)) {
 53 |             segFilter <- seq(runStart[curSeg], runEnd[curSeg])
 54 |             statisticSeq <- statisticChr[segFilter]
 55 |             minValuePosSeg <- which.min(statisticSeq)
 56 |             minValue[curSeg] <- statisticSeq[minValuePosSeg]
 57 |             minValuePos[curSeg] <- chrFilter[1] + segFilter[1] + minValuePosSeg - 2
 58 |             if (trim) {
 59 |                 # Determine which variants in the segment passed the threshold
 60 |                 significantVariants <- which(statisticSeq <= threshold)
 61 |                 # Set start of run to first significant variant and end of run
 62 |                 # to last significant variant
 63 |                 runStart[curSeg] <- segFilter[significantVariants[1]]
 64 |                 runEnd[curSeg] <- segFilter[significantVariants[length(significantVariants)]]
 65 |                 runLength[curSeg] <- runEnd[curSeg] - runStart[curSeg] + 1
 66 |             }
 67 |         }
 68 |         # Determine at what base-pair positions the runs start and end
 69 |         bpStart <- bpChr[runStart]
 70 |         bpEnd <- bpChr[runEnd]
 71 |         bpLength <- bpEnd - bpStart + 1
 72 |         # Determine at what positions within x the runs start and end (more
 73 |         # useful information than chromosome by chromosome because it is easier
 74 |         # to extract)
 75 |         xStart <- chrFilter[runStart]
 76 |         xEnd <- chrFilter[runEnd]
 77 |         # Prepare chromosome summary (there might be no segments, so do not
 78 |         # rely on recycling)
 79 |         outChr <- data.frame(
 80 |             chr = rep(curChr, times = length(runStart)),
 81 |             start = xStart,
 82 |             end = xEnd,
 83 |             length = runLength,
 84 |             bpStart = bpStart,
 85 |             bpEnd = bpEnd,
 86 |             bpLength = bpLength,
 87 |             minValue = minValue,
 88 |             minValuePos = minValuePos,
 89 |             minValueBp=bp[minValuePos]
 90 |         )
 91 |         if(!is.null(snpid)){
 92 |             outChr$leadSNP=snpid[minValuePos]
 93 |         }
 94 |         out[[curChr]] <- outChr
 95 |     }
 96 |     # Combine chromosomes
 97 |     out <- do.call(rbind, out)
 98 |     rownames(out) <- NULL
 99 |     return(out)
100 | }
101 | 


--------------------------------------------------------------------------------
/R/summarize.R:
--------------------------------------------------------------------------------
 1 | summarize <- function(X, i = seq_len(nrow(X)), j = seq_len(ncol(X)), chunkSize = 5000L, nCores = getOption("mc.cores", 2L), verbose = FALSE) {
 2 |     res <- chunkedMap(X = X, FUN = function(chunk) {
 3 |         summaries <- .Call(C_summarize, chunk)
 4 |         rownames(summaries) <- colnames(chunk)
 5 |         colnames(summaries) <- c("freq_na", "allele_freq", "sd")
 6 |         return(summaries)
 7 |     }, i = i, j = j, chunkSize = chunkSize, nCores = nCores, verbose = verbose)
 8 |     res <- do.call(rbind, res)
 9 |     as.data.frame(res)
10 | }
11 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
 1 | getLineCount <- function(path, header) {
 2 |     file <- file(path, open = "r")
 3 |     n <- 0L
 4 |     while (length(readLines(file, n = 1L)) > 0L) {
 5 |         n <- n + 1L
 6 |     }
 7 |     if (header) {
 8 |         n <- n - 1L
 9 |     }
10 |     close(file)
11 |     return(n)
12 | }
13 | 
14 | getFileHeader <- function(path, sep = "") {
15 |     file <- file(path, open = "r")
16 |     header <- scan(file, nlines = 1L, what = character(), sep = sep, quiet = TRUE)
17 |     close(file)
18 |     return(header)
19 | }
20 | 
21 | getColumnCount <- function(path, sep = "") {
22 |     header <- getFileHeader(path, sep)
23 |     p <- length(header)
24 |     return(p)
25 | }
26 | 
27 | randomString <- function() {
28 |     paste(sample(c(0L:9L, letters, LETTERS), size = 5L, replace = TRUE), collapse = "")
29 | }
30 | 
31 | normalizeType <- function(val) {
32 |     type <- typeof(val)
33 |     # detect strings
34 |     if (type == "character" && length(val) > 0L) {
35 |         # convert to type if type and value match
36 |         convert <- try(vector(mode = val), silent = TRUE)
37 |         if (inherits(convert, "try-error")) {
38 |             # return a character type if conversion failed
39 |             warning("could no convert type, using character instead")
40 |             character()
41 |         } else {
42 |             # return conversion result otherwise
43 |             convert
44 |         }
45 |         # value doesn't contain type information and can be handled by typeof
46 |     } else {
47 |         val
48 |     }
49 | }
50 | 
51 | loadExample <- function() {
52 |     path <- system.file("extdata", package = "BGData")
53 |     message("Loading chromosomes as .bed files...")
54 |     m <- do.call(ColumnLinkedMatrix, lapply(c("chr1", "chr2", "chr3"), function(chr) {
55 |         suppressMessages(BEDMatrix(paste0(path, "/", chr)))
56 |     }))
57 |     as.BGData(m, alternatePhenotypeFile = paste0(path, "/pheno.txt"))
58 | }
59 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | BGData: A Suite of Packages for Analysis of Big Genomic Data
  2 | ============================================================
  3 | 
  4 | [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/BGData)](https://CRAN.R-project.org/package=BGData)
  5 | 
  6 | BGData ([Grueneberg & de los Campos, 2019](https://doi.org/10.1534/g3.119.400018)) is an R package that provides scalable and efficient computational methods for large genomic datasets, e.g., genome-wide association studies (GWAS) or genomic relationship matrices (G matrices). It also contains a container class called `BGData` that holds genotypes, sample information, and variant information.
  7 | 
  8 | Modern genomic datasets are big (large *n*), high-dimensional (large *p*), and multi-layered. The challenges that need to be addressed are memory requirements and computational demands. Our goal is to develop software that will enable researchers to carry out analyses with big genomic data within the R environment.
  9 | 
 10 | We have identified several approaches to tackle those challenges within R:
 11 | 
 12 | - File-backed matrices: The data is stored in on the hard drive and users can read in smaller chunks when they are needed.
 13 | - Linked arrays: For very large datasets a single file-backed array may not be enough or convenient. A linked array is an array whose content is distributed over multiple file-backed nodes.
 14 | - Multiple dispatch: Methods are presented to users so that they can treat these arrays pretty much as if they were RAM arrays.
 15 | - Multi-level parallelism: Exploit multi-core and multi-node computing.
 16 | - Inputs: Users can create these arrays from standard formats (e.g., PLINK .bed).
 17 | 
 18 | The BGData package is an umbrella package that comprises several packages: [BEDMatrix](https://CRAN.R-project.org/package=BEDMatrix), [LinkedMatrix](https://CRAN.R-project.org/package=LinkedMatrix), and [symDMatrix](https://CRAN.R-project.org/package=symDMatrix).
 19 | 
 20 | 
 21 | Examples
 22 | --------
 23 | 
 24 | ### Loading the package
 25 | 
 26 | Load the BGData package:
 27 | 
 28 | ```R
 29 | library(BGData)
 30 | ```
 31 | 
 32 | ### Inspecting the example dataset
 33 | 
 34 | The `inst/extdata` folder contains example files that were generated from the 250k SNP and phenotype data in [Atwell et al. (2010)](https://doi.org/10.1038/nature08800). Only the first 300 SNPs of chromosome 1, 2, and 3 were included to keep the size of the example dataset small enough for CRAN. [PLINK](https://www.cog-genomics.org/plink2) was used to convert the data to [.bed](https://www.cog-genomics.org/plink2/input#bed) and [.raw](https://www.cog-genomics.org/plink2/input#raw) files. `FT10` has been chosen as a phenotype and is provided as an [alternate phenotype file](https://www.cog-genomics.org/plink2/input#pheno). The file is intentionally shuffled to demonstrate that the additional phenotypes are put in the same order as the rest of the phenotypes.
 35 | 
 36 | ```R
 37 | path <- system.file("extdata", package = "BGData")
 38 | list.files(path)
 39 | #>  [1] "chr1.bed"  "chr1.bim"  "chr1.fam"  "chr1.raw"  "chr2.bed"  "chr2.bim"
 40 | #>  [7] "chr2.fam"  "chr2.raw"  "chr3.bed"  "chr3.bim"  "chr3.fam"  "chr3.raw"
 41 | #> [13] "pheno.txt"
 42 | ```
 43 | 
 44 | ### Loading example dataset
 45 | 
 46 | #### Loading individual PLINK .bed files
 47 | 
 48 | Load the .bed file for chromosome 1 (chr1.bed) using the [BEDMatrix](https://CRAN.R-project.org/package=BEDMatrix) package:
 49 | 
 50 | ```R
 51 | chr1 <- BEDMatrix(paste0(path, "/chr1.bed"))
 52 | #> Extracting number of individuals and rownames from .fam file...
 53 | #> Extracting number of markers and colnames from .bim file...
 54 | ```
 55 | 
 56 | `BEDMatrix` objects behave similarly to regular matrices:
 57 | 
 58 | ```R
 59 | dim(chr1)
 60 | #> [1] 199 300
 61 | rownames(chr1)[1:10]
 62 | #> [1] "5837_5837" "6008_6008" "6009_6009" "6016_6016" "6040_6040" "6042_6042"
 63 | #> [7] "6043_6043" "6046_6046" "6064_6064" "6074_6074"
 64 | colnames(chr1)[1:10]
 65 | #> [1] "snp1_T"  "snp2_G"  "snp3_A"  "snp4_T"  "snp5_G"  "snp6_T"  "snp7_C"
 66 | #> [8] "snp8_C"  "snp9_C"  "snp10_G"
 67 | chr1["6008_6008", "snp5_G"]
 68 | #> [1] 0
 69 | ```
 70 | 
 71 | #### Linking multiple BEDMatrix objects together
 72 | 
 73 | Load the other two .bed files:
 74 | 
 75 | ```R
 76 | chr2 <- BEDMatrix(paste0(path, "/chr2.bed"))
 77 | #> Extracting number of individuals and rownames from .fam file...
 78 | #> Extracting number of markers and colnames from .bim file...
 79 | chr3 <- BEDMatrix(paste0(path, "/chr3.bed"))
 80 | #> Extracting number of individuals and rownames from .fam file...
 81 | #> Extracting number of markers and colnames from .bim file...
 82 | ```
 83 | 
 84 | Combine the BEDMatrix objects by columns using the [LinkedMatrix](https://CRAN.R-project.org/package=LinkedMatrix) to avoid the inconvenience of having three separate matrices:
 85 | 
 86 | ```R
 87 | wg <- ColumnLinkedMatrix(chr1, chr2, chr3)
 88 | ```
 89 | 
 90 | Just like `BEDMatrix` objects, `LinkedMatrix` objects also behave similarly to regular matrices:
 91 | 
 92 | ```R
 93 | dim(wg)
 94 | #> [1] 199 900
 95 | rownames(wg)[1:10]
 96 | #> [1] "5837_5837" "6008_6008" "6009_6009" "6016_6016" "6040_6040" "6042_6042"
 97 | #> [7] "6043_6043" "6046_6046" "6064_6064" "6074_6074"
 98 | colnames(wg)[1:10]
 99 | #> [1] "snp1_T"  "snp2_G"  "snp3_A"  "snp4_T"  "snp5_G"  "snp6_T"  "snp7_C"
100 | #> [8] "snp8_C"  "snp9_C"  "snp10_G"
101 | wg["6008_6008", "snp5_G"]
102 | #> [1] 0
103 | ```
104 | 
105 | ### Creating a BGData object
106 | 
107 | `BGData` objects can be created from individual `BEDMatrix` objects or a collection of `BEDMatrix` objects as a `LinkedMatrix` object using the `as.BGData()` function. This will read the .fam and .bim file that comes with the .bed files. The `alternatePhenotypeFile` parameter points to the file that contains the `FT10` phenotype:
108 | 
109 | ```R
110 | bg <- as.BGData(wg, alternatePhenotypeFile = paste0(path, "/pheno.txt"))
111 | #> Extracting phenotypes from .fam file, assuming that the .fam file of the first BEDMatrix instance is representative of all the other nodes...
112 | #> Extracting map from .bim files...
113 | #> Merging alternate phenotype file...
114 | ```
115 | 
116 | The `bg` object will use the `LinkedMatrix` object as genotypes, the .fam file augmented by the `FT10` phenotype as sample information, and the .bim file as variant information.
117 | 
118 | ```R
119 | str(bg)
120 | #> Formal class 'BGData' [package "BGData"] with 3 slots
121 | #>   ..@ geno :Formal class 'ColumnLinkedMatrix' [package "LinkedMatrix"] with 1 slot
122 | #>   .. .. ..@ .Data:List of 3
123 | #>   .. .. .. ..$ :BEDMatrix: 199 x 300 [/home/agrueneberg/.pkgs/R/BGData/extdata/chr1.bed]
124 | #>   .. .. .. ..$ :BEDMatrix: 199 x 300 [/home/agrueneberg/.pkgs/R/BGData/extdata/chr2.bed]
125 | #>   .. .. .. ..$ :BEDMatrix: 199 x 300 [/home/agrueneberg/.pkgs/R/BGData/extdata/chr3.bed]
126 | #>   ..@ pheno:'data.frame':       199 obs. of  7 variables:
127 | #>   .. ..$ FID      : int [1:199] 5837 6008 6009 6016 6040 6042 6043 6046 6064 6074 ...
128 | #>   .. ..$ IID      : int [1:199] 5837 6008 6009 6016 6040 6042 6043 6046 6064 6074 ...
129 | #>   .. ..$ PAT      : int [1:199] 0 0 0 0 0 0 0 0 0 0 ...
130 | #>   .. ..$ MAT      : int [1:199] 0 0 0 0 0 0 0 0 0 0 ...
131 | #>   .. ..$ SEX      : int [1:199] 0 0 0 0 0 0 0 0 0 0 ...
132 | #>   .. ..$ PHENOTYPE: int [1:199] -9 -9 -9 -9 -9 -9 -9 -9 -9 -9 ...
133 | #>   .. ..$ FT10     : num [1:199] 57 60 98 75 71 56 90 93 96 91 ...
134 | #>   ..@ map  :'data.frame':       900 obs. of  6 variables:
135 | #>   .. ..$ chromosome        : int [1:900] 1 1 1 1 1 1 1 1 1 1 ...
136 | #>   .. ..$ snp_id            : chr [1:900] "snp1" "snp2" "snp3" "snp4" ...
137 | #>   .. ..$ genetic_distance  : int [1:900] 0 0 0 0 0 0 0 0 0 0 ...
138 | #>   .. ..$ base_pair_position: int [1:900] 657 3102 4648 4880 5975 6063 6449 6514 6603 6768 ...
139 | #>   .. ..$ allele_1          : chr [1:900] "T" "G" "A" "T" ...
140 | #>   .. ..$ allele_2          : chr [1:900] "C" "A" "C" "C" ...
141 | ```
142 | 
143 | ### Saving a BGData object
144 | 
145 | A BGData object can be saved like any other R object using the `save` function:
146 | 
147 | ```R
148 | save(bg, file = "BGData.RData")
149 | ```
150 | 
151 | ### Loading a BGData object
152 | 
153 | The genotypes in a `BGData` object can be of various types, some of which need to be initialized in a particular way. The `load.BGData` takes care of reloading a saved BGData object properly:
154 | 
155 | ```R
156 | load.BGData("BGData.RData")
157 | #> Loaded objects: bg
158 | ```
159 | 
160 | ### Summarizing data
161 | 
162 | Use `chunkedApply` to count missing values (among others):
163 | 
164 | ```R
165 | countNAs <- chunkedApply(X = geno(bg), MARGIN = 2, FUN = function(x) sum(is.na(x)))
166 | ```
167 | 
168 | Use the `summarize` function to calculate minor allele frequencies and frequency of missing values:
169 | 
170 | ```R
171 | summarize(geno(bg))
172 | ```
173 | 
174 | ### Running GWASes with different regression methods
175 | 
176 | A data structure for genomic data is useful when defining methods that act on both phenotype and genotype information. We have implemented a `GWAS` function that supports various regression methods. The formula takes phenotypes from the sample information of the `BGData` object and inserts one marker at a time.
177 | 
178 | ```R
179 | gwas <- GWAS(formula = FT10 ~ 1, data = bg)
180 | ```
181 | 
182 | ### Generating the G Matrix
183 | 
184 | ```R
185 | G <- getG(geno(bg))
186 | ```
187 | 
188 | 
189 | Installation
190 | ------------
191 | 
192 | Install the stable version from CRAN:
193 | 
194 | ```R
195 | install.packages("BGData")
196 | ```
197 | 
198 | Alternatively, install the development version from GitHub:
199 | 
200 | ```R
201 | # install.packages("remotes")
202 | remotes::install_github("QuantGen/BGData")
203 | ```
204 | 
205 | 
206 | Documentation
207 | -------------
208 | 
209 | Further documentation can be found on [RDocumentation](https://www.rdocumentation.org/packages/BGData).
210 | 
211 | 
212 | Contributing
213 | ------------
214 | 
215 | - Issue Tracker: https://github.com/QuantGen/BGData/issues
216 | - Source Code: https://github.com/QuantGen/BGData
217 | 


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | bibentry(
 2 |     bibtype = "Article",
 3 |     textVersion = "Grueneberg, A., and G. de los Campos, 2019 BGData - A Suite of R Packages for Genomic Analysis with Big Data. G3: Genes, Genomes, Genetics 9(5): 1377-1383.",
 4 |     title = "BGData - A Suite of R Packages for Genomic Analysis with Big Data",
 5 |     author = c(
 6 |         person("Alexander", "Grueneberg"),
 7 |         person("Gustavo", "de los Campos")
 8 |     ),
 9 |     journal = "G3: Genes, Genomes, Genetics",
10 |     year = "2019",
11 |     volume = "9",
12 |     number = "5",
13 |     pages = "1377--1383",
14 |     doi = "10.1534/g3.119.400018",
15 |     url = "https://doi.org/10.1534/g3.119.400018"
16 | )
17 | 


--------------------------------------------------------------------------------
/inst/extdata/chr1.bed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuantGen/BGData/fe7a57779d903f7056d5841482c9afdbeae5744b/inst/extdata/chr1.bed


--------------------------------------------------------------------------------
/inst/extdata/chr1.bim:
--------------------------------------------------------------------------------
  1 | 1	snp1	0	657	T	C
  2 | 1	snp2	0	3102	G	A
  3 | 1	snp3	0	4648	A	C
  4 | 1	snp4	0	4880	T	C
  5 | 1	snp5	0	5975	G	T
  6 | 1	snp6	0	6063	T	C
  7 | 1	snp7	0	6449	C	T
  8 | 1	snp8	0	6514	C	T
  9 | 1	snp9	0	6603	C	T
 10 | 1	snp10	0	6768	G	A
 11 | 1	snp11	0	7601	T	C
 12 | 1	snp12	0	8193	A	G
 13 | 1	snp13	0	8617	A	G
 14 | 1	snp14	0	10219	A	T
 15 | 1	snp15	0	10449	T	A
 16 | 1	snp16	0	10969	G	A
 17 | 1	snp17	0	11493	A	G
 18 | 1	snp18	0	11696	A	C
 19 | 1	snp19	0	12584	A	C
 20 | 1	snp20	0	12659	C	A
 21 | 1	snp21	0	13045	C	T
 22 | 1	snp22	0	14385	A	T
 23 | 1	snp23	0	19819	T	A
 24 | 1	snp24	0	20892	C	A
 25 | 1	snp25	0	21043	T	A
 26 | 1	snp26	0	21128	T	A
 27 | 1	snp27	0	22522	G	C
 28 | 1	snp28	0	23838	T	A
 29 | 1	snp29	0	25315	G	A
 30 | 1	snp30	0	25365	T	C
 31 | 1	snp31	0	25773	T	C
 32 | 1	snp32	0	26288	T	A
 33 | 1	snp33	0	27265	C	T
 34 | 1	snp34	0	28948	T	C
 35 | 1	snp35	0	28978	A	C
 36 | 1	snp36	0	29291	A	G
 37 | 1	snp37	0	30529	A	G
 38 | 1	snp38	0	30683	T	A
 39 | 1	snp39	0	31515	G	A
 40 | 1	snp40	0	31926	A	G
 41 | 1	snp41	0	32210	T	C
 42 | 1	snp42	0	32807	A	G
 43 | 1	snp43	0	34125	A	G
 44 | 1	snp44	0	34599	C	G
 45 | 1	snp45	0	35856	C	T
 46 | 1	snp46	0	37072	G	T
 47 | 1	snp47	0	38946	C	A
 48 | 1	snp48	0	39751	T	G
 49 | 1	snp49	0	41178	G	A
 50 | 1	snp50	0	41427	A	G
 51 | 1	snp51	0	41887	G	A
 52 | 1	snp52	0	44567	C	A
 53 | 1	snp53	0	45075	T	A
 54 | 1	snp54	0	45105	C	G
 55 | 1	snp55	0	45580	A	T
 56 | 1	snp56	0	45683	T	C
 57 | 1	snp57	0	46373	C	T
 58 | 1	snp58	0	46499	A	G
 59 | 1	snp59	0	46912	A	G
 60 | 1	snp60	0	46935	C	T
 61 | 1	snp61	0	47577	G	A
 62 | 1	snp62	0	47692	G	T
 63 | 1	snp63	0	48118	A	G
 64 | 1	snp64	0	48181	T	C
 65 | 1	snp65	0	49080	C	T
 66 | 1	snp66	0	51392	A	C
 67 | 1	snp67	0	51706	A	T
 68 | 1	snp68	0	51878	C	A
 69 | 1	snp69	0	52202	C	A
 70 | 1	snp70	0	53183	T	G
 71 | 1	snp71	0	53729	T	C
 72 | 1	snp72	0	53901	A	G
 73 | 1	snp73	0	55684	C	T
 74 | 1	snp74	0	57136	T	G
 75 | 1	snp75	0	57686	C	A
 76 | 1	snp76	0	59637	A	G
 77 | 1	snp77	0	60083	G	T
 78 | 1	snp78	0	60772	C	G
 79 | 1	snp79	0	61122	T	A
 80 | 1	snp80	0	61266	T	A
 81 | 1	snp81	0	61405	T	A
 82 | 1	snp82	0	61661	T	C
 83 | 1	snp83	0	62259	A	T
 84 | 1	snp84	0	62935	G	C
 85 | 1	snp85	0	63084	T	A
 86 | 1	snp86	0	63645	C	G
 87 | 1	snp87	0	63759	G	A
 88 | 1	snp88	0	63915	G	C
 89 | 1	snp89	0	64149	G	A
 90 | 1	snp90	0	64651	G	A
 91 | 1	snp91	0	68340	T	G
 92 | 1	snp92	0	68880	A	G
 93 | 1	snp93	0	69311	C	G
 94 | 1	snp94	0	70933	C	T
 95 | 1	snp95	0	71326	G	T
 96 | 1	snp96	0	71348	T	C
 97 | 1	snp97	0	71868	A	C
 98 | 1	snp98	0	72138	A	T
 99 | 1	snp99	0	72756	G	C
100 | 1	snp100	0	72894	T	C
101 | 1	snp101	0	72924	A	G
102 | 1	snp102	0	73047	T	G
103 | 1	snp103	0	73467	T	G
104 | 1	snp104	0	73691	T	C
105 | 1	snp105	0	73851	A	G
106 | 1	snp106	0	73989	A	G
107 | 1	snp107	0	74169	G	C
108 | 1	snp108	0	74707	T	C
109 | 1	snp109	0	75481	T	C
110 | 1	snp110	0	75721	T	C
111 | 1	snp111	0	75899	C	T
112 | 1	snp112	0	76188	C	G
113 | 1	snp113	0	76217	T	G
114 | 1	snp114	0	76847	G	A
115 | 1	snp115	0	76879	C	T
116 | 1	snp116	0	76906	A	G
117 | 1	snp117	0	77127	T	C
118 | 1	snp118	0	77140	A	G
119 | 1	snp119	0	77243	A	G
120 | 1	snp120	0	77458	T	A
121 | 1	snp121	0	78803	A	T
122 | 1	snp122	0	78975	A	T
123 | 1	snp123	0	79418	G	C
124 | 1	snp124	0	80216	T	C
125 | 1	snp125	0	80374	C	A
126 | 1	snp126	0	80400	T	G
127 | 1	snp127	0	80850	A	T
128 | 1	snp128	0	81068	T	A
129 | 1	snp129	0	81496	G	C
130 | 1	snp130	0	81854	T	C
131 | 1	snp131	0	81869	T	C
132 | 1	snp132	0	82197	C	T
133 | 1	snp133	0	82290	T	C
134 | 1	snp134	0	83117	G	C
135 | 1	snp135	0	83177	A	C
136 | 1	snp136	0	83219	C	T
137 | 1	snp137	0	84144	G	A
138 | 1	snp138	0	84379	T	A
139 | 1	snp139	0	84558	G	C
140 | 1	snp140	0	85561	C	A
141 | 1	snp141	0	85860	C	G
142 | 1	snp142	0	86656	C	T
143 | 1	snp143	0	87060	A	G
144 | 1	snp144	0	87791	C	G
145 | 1	snp145	0	87985	T	C
146 | 1	snp146	0	88300	C	T
147 | 1	snp147	0	88658	A	G
148 | 1	snp148	0	89312	G	T
149 | 1	snp149	0	90606	G	T
150 | 1	snp150	0	92353	T	C
151 | 1	snp151	0	92507	T	A
152 | 1	snp152	0	92866	T	C
153 | 1	snp153	0	93562	G	T
154 | 1	snp154	0	93740	T	C
155 | 1	snp155	0	93800	A	G
156 | 1	snp156	0	95018	G	C
157 | 1	snp157	0	95225	G	A
158 | 1	snp158	0	95386	C	A
159 | 1	snp159	0	95749	C	G
160 | 1	snp160	0	95962	A	T
161 | 1	snp161	0	96521	T	C
162 | 1	snp162	0	96726	T	C
163 | 1	snp163	0	96770	A	C
164 | 1	snp164	0	97473	T	C
165 | 1	snp165	0	98317	G	T
166 | 1	snp166	0	98617	T	C
167 | 1	snp167	0	99001	T	C
168 | 1	snp168	0	99456	A	G
169 | 1	snp169	0	99757	A	G
170 | 1	snp170	0	99785	G	A
171 | 1	snp171	0	99805	A	C
172 | 1	snp172	0	100267	A	T
173 | 1	snp173	0	100314	C	T
174 | 1	snp174	0	100501	G	A
175 | 1	snp175	0	100832	G	C
176 | 1	snp176	0	103175	G	C
177 | 1	snp177	0	103453	T	G
178 | 1	snp178	0	103654	G	C
179 | 1	snp179	0	103749	T	G
180 | 1	snp180	0	103963	T	A
181 | 1	snp181	0	105029	A	C
182 | 1	snp182	0	105282	G	A
183 | 1	snp183	0	105894	C	G
184 | 1	snp184	0	107585	T	C
185 | 1	snp185	0	109472	T	C
186 | 1	snp186	0	109627	T	C
187 | 1	snp187	0	112907	A	G
188 | 1	snp188	0	113076	A	G
189 | 1	snp189	0	114187	T	G
190 | 1	snp190	0	114648	T	C
191 | 1	snp191	0	114737	C	A
192 | 1	snp192	0	114915	C	T
193 | 1	snp193	0	115320	G	T
194 | 1	snp194	0	117199	C	A
195 | 1	snp195	0	117379	T	A
196 | 1	snp196	0	118192	C	A
197 | 1	snp197	0	118441	A	G
198 | 1	snp198	0	119003	T	C
199 | 1	snp199	0	120328	G	C
200 | 1	snp200	0	120376	G	T
201 | 1	snp201	0	120456	G	A
202 | 1	snp202	0	120787	T	C
203 | 1	snp203	0	122005	G	C
204 | 1	snp204	0	122503	G	A
205 | 1	snp205	0	122809	A	G
206 | 1	snp206	0	123313	C	G
207 | 1	snp207	0	123924	A	T
208 | 1	snp208	0	125155	G	A
209 | 1	snp209	0	126251	A	G
210 | 1	snp210	0	127093	T	C
211 | 1	snp211	0	127891	T	C
212 | 1	snp212	0	128116	A	G
213 | 1	snp213	0	128366	A	G
214 | 1	snp214	0	129025	T	C
215 | 1	snp215	0	131281	T	C
216 | 1	snp216	0	132276	G	A
217 | 1	snp217	0	132773	A	C
218 | 1	snp218	0	132898	A	G
219 | 1	snp219	0	133144	T	G
220 | 1	snp220	0	133186	C	T
221 | 1	snp221	0	133607	C	A
222 | 1	snp222	0	133626	C	A
223 | 1	snp223	0	133701	G	A
224 | 1	snp224	0	133794	G	A
225 | 1	snp225	0	134063	C	A
226 | 1	snp226	0	135519	G	A
227 | 1	snp227	0	136188	C	T
228 | 1	snp228	0	136211	T	A
229 | 1	snp229	0	136399	T	G
230 | 1	snp230	0	136467	T	C
231 | 1	snp231	0	136773	A	G
232 | 1	snp232	0	137104	C	T
233 | 1	snp233	0	137134	A	G
234 | 1	snp234	0	137545	A	G
235 | 1	snp235	0	138060	C	T
236 | 1	snp236	0	138335	A	G
237 | 1	snp237	0	138627	T	C
238 | 1	snp238	0	138875	C	A
239 | 1	snp239	0	138989	A	G
240 | 1	snp240	0	139393	A	C
241 | 1	snp241	0	139832	C	T
242 | 1	snp242	0	139991	G	T
243 | 1	snp243	0	140005	G	C
244 | 1	snp244	0	140671	T	C
245 | 1	snp245	0	141411	G	T
246 | 1	snp246	0	141556	A	T
247 | 1	snp247	0	142940	G	T
248 | 1	snp248	0	143199	G	C
249 | 1	snp249	0	143272	A	T
250 | 1	snp250	0	143471	A	C
251 | 1	snp251	0	143509	C	G
252 | 1	snp252	0	143704	C	G
253 | 1	snp253	0	144331	A	C
254 | 1	snp254	0	144392	T	G
255 | 1	snp255	0	144410	G	T
256 | 1	snp256	0	144832	A	G
257 | 1	snp257	0	144872	A	G
258 | 1	snp258	0	145457	A	C
259 | 1	snp259	0	146220	G	T
260 | 1	snp260	0	147220	G	T
261 | 1	snp261	0	147236	G	T
262 | 1	snp262	0	148601	C	G
263 | 1	snp263	0	148990	T	A
264 | 1	snp264	0	150430	A	T
265 | 1	snp265	0	150457	C	A
266 | 1	snp266	0	150992	C	T
267 | 1	snp267	0	151942	C	A
268 | 1	snp268	0	153104	G	T
269 | 1	snp269	0	153232	T	A
270 | 1	snp270	0	155442	G	T
271 | 1	snp271	0	155984	A	C
272 | 1	snp272	0	156619	T	C
273 | 1	snp273	0	157491	C	T
274 | 1	snp274	0	157974	T	C
275 | 1	snp275	0	158134	A	G
276 | 1	snp276	0	158305	A	G
277 | 1	snp277	0	158640	A	G
278 | 1	snp278	0	158910	G	C
279 | 1	snp279	0	163082	G	A
280 | 1	snp280	0	163492	A	G
281 | 1	snp281	0	163569	C	A
282 | 1	snp282	0	163766	T	C
283 | 1	snp283	0	164085	T	C
284 | 1	snp284	0	164310	C	G
285 | 1	snp285	0	164375	A	T
286 | 1	snp286	0	164393	G	C
287 | 1	snp287	0	164553	A	G
288 | 1	snp288	0	164679	C	T
289 | 1	snp289	0	164815	A	T
290 | 1	snp290	0	164969	G	T
291 | 1	snp291	0	165386	G	C
292 | 1	snp292	0	165397	A	G
293 | 1	snp293	0	165625	T	C
294 | 1	snp294	0	165766	C	T
295 | 1	snp295	0	165984	C	T
296 | 1	snp296	0	166030	C	G
297 | 1	snp297	0	166386	A	G
298 | 1	snp298	0	166850	G	T
299 | 1	snp299	0	167493	A	G
300 | 1	snp300	0	167692	G	T
301 | 


--------------------------------------------------------------------------------
/inst/extdata/chr1.fam:
--------------------------------------------------------------------------------
  1 | 5837 5837 0 0 0 -9
  2 | 6008 6008 0 0 0 -9
  3 | 6009 6009 0 0 0 -9
  4 | 6016 6016 0 0 0 -9
  5 | 6040 6040 0 0 0 -9
  6 | 6042 6042 0 0 0 -9
  7 | 6043 6043 0 0 0 -9
  8 | 6046 6046 0 0 0 -9
  9 | 6064 6064 0 0 0 -9
 10 | 6074 6074 0 0 0 -9
 11 | 6243 6243 0 0 0 -9
 12 | 6709 6709 0 0 0 -9
 13 | 6897 6897 0 0 0 -9
 14 | 6898 6898 0 0 0 -9
 15 | 6899 6899 0 0 0 -9
 16 | 6900 6900 0 0 0 -9
 17 | 6901 6901 0 0 0 -9
 18 | 6903 6903 0 0 0 -9
 19 | 6904 6904 0 0 0 -9
 20 | 6905 6905 0 0 0 -9
 21 | 6906 6906 0 0 0 -9
 22 | 6907 6907 0 0 0 -9
 23 | 6908 6908 0 0 0 -9
 24 | 6909 6909 0 0 0 -9
 25 | 6910 6910 0 0 0 -9
 26 | 6911 6911 0 0 0 -9
 27 | 6913 6913 0 0 0 -9
 28 | 6914 6914 0 0 0 -9
 29 | 6915 6915 0 0 0 -9
 30 | 6916 6916 0 0 0 -9
 31 | 6917 6917 0 0 0 -9
 32 | 6918 6918 0 0 0 -9
 33 | 6919 6919 0 0 0 -9
 34 | 6920 6920 0 0 0 -9
 35 | 6921 6921 0 0 0 -9
 36 | 6922 6922 0 0 0 -9
 37 | 6923 6923 0 0 0 -9
 38 | 6924 6924 0 0 0 -9
 39 | 6926 6926 0 0 0 -9
 40 | 6927 6927 0 0 0 -9
 41 | 6928 6928 0 0 0 -9
 42 | 6929 6929 0 0 0 -9
 43 | 6930 6930 0 0 0 -9
 44 | 6931 6931 0 0 0 -9
 45 | 6932 6932 0 0 0 -9
 46 | 6933 6933 0 0 0 -9
 47 | 6936 6936 0 0 0 -9
 48 | 6937 6937 0 0 0 -9
 49 | 6939 6939 0 0 0 -9
 50 | 6940 6940 0 0 0 -9
 51 | 6942 6942 0 0 0 -9
 52 | 6943 6943 0 0 0 -9
 53 | 6944 6944 0 0 0 -9
 54 | 6945 6945 0 0 0 -9
 55 | 6946 6946 0 0 0 -9
 56 | 6951 6951 0 0 0 -9
 57 | 6956 6956 0 0 0 -9
 58 | 6957 6957 0 0 0 -9
 59 | 6958 6958 0 0 0 -9
 60 | 6959 6959 0 0 0 -9
 61 | 6960 6960 0 0 0 -9
 62 | 6961 6961 0 0 0 -9
 63 | 6962 6962 0 0 0 -9
 64 | 6963 6963 0 0 0 -9
 65 | 6964 6964 0 0 0 -9
 66 | 6965 6965 0 0 0 -9
 67 | 6966 6966 0 0 0 -9
 68 | 6967 6967 0 0 0 -9
 69 | 6968 6968 0 0 0 -9
 70 | 6969 6969 0 0 0 -9
 71 | 6970 6970 0 0 0 -9
 72 | 6971 6971 0 0 0 -9
 73 | 6972 6972 0 0 0 -9
 74 | 6973 6973 0 0 0 -9
 75 | 6974 6974 0 0 0 -9
 76 | 6975 6975 0 0 0 -9
 77 | 6976 6976 0 0 0 -9
 78 | 6977 6977 0 0 0 -9
 79 | 6978 6978 0 0 0 -9
 80 | 6979 6979 0 0 0 -9
 81 | 6980 6980 0 0 0 -9
 82 | 6981 6981 0 0 0 -9
 83 | 6982 6982 0 0 0 -9
 84 | 6983 6983 0 0 0 -9
 85 | 6984 6984 0 0 0 -9
 86 | 6985 6985 0 0 0 -9
 87 | 6988 6988 0 0 0 -9
 88 | 7000 7000 0 0 0 -9
 89 | 7014 7014 0 0 0 -9
 90 | 7033 7033 0 0 0 -9
 91 | 7062 7062 0 0 0 -9
 92 | 7064 7064 0 0 0 -9
 93 | 7081 7081 0 0 0 -9
 94 | 7094 7094 0 0 0 -9
 95 | 7123 7123 0 0 0 -9
 96 | 7147 7147 0 0 0 -9
 97 | 7163 7163 0 0 0 -9
 98 | 7231 7231 0 0 0 -9
 99 | 7255 7255 0 0 0 -9
100 | 7275 7275 0 0 0 -9
101 | 7282 7282 0 0 0 -9
102 | 7296 7296 0 0 0 -9
103 | 7306 7306 0 0 0 -9
104 | 7323 7323 0 0 0 -9
105 | 7346 7346 0 0 0 -9
106 | 7418 7418 0 0 0 -9
107 | 7424 7424 0 0 0 -9
108 | 7438 7438 0 0 0 -9
109 | 7460 7460 0 0 0 -9
110 | 7461 7461 0 0 0 -9
111 | 7477 7477 0 0 0 -9
112 | 7514 7514 0 0 0 -9
113 | 7515 7515 0 0 0 -9
114 | 7516 7516 0 0 0 -9
115 | 7517 7517 0 0 0 -9
116 | 7518 7518 0 0 0 -9
117 | 7519 7519 0 0 0 -9
118 | 7520 7520 0 0 0 -9
119 | 7521 7521 0 0 0 -9
120 | 7522 7522 0 0 0 -9
121 | 7523 7523 0 0 0 -9
122 | 7524 7524 0 0 0 -9
123 | 7525 7525 0 0 0 -9
124 | 7526 7526 0 0 0 -9
125 | 8213 8213 0 0 0 -9
126 | 8214 8214 0 0 0 -9
127 | 8215 8215 0 0 0 -9
128 | 8222 8222 0 0 0 -9
129 | 8230 8230 0 0 0 -9
130 | 8231 8231 0 0 0 -9
131 | 8233 8233 0 0 0 -9
132 | 8235 8235 0 0 0 -9
133 | 8236 8236 0 0 0 -9
134 | 8237 8237 0 0 0 -9
135 | 8239 8239 0 0 0 -9
136 | 8240 8240 0 0 0 -9
137 | 8241 8241 0 0 0 -9
138 | 8242 8242 0 0 0 -9
139 | 8243 8243 0 0 0 -9
140 | 8245 8245 0 0 0 -9
141 | 8247 8247 0 0 0 -9
142 | 8249 8249 0 0 0 -9
143 | 8254 8254 0 0 0 -9
144 | 8256 8256 0 0 0 -9
145 | 8258 8258 0 0 0 -9
146 | 8259 8259 0 0 0 -9
147 | 8264 8264 0 0 0 -9
148 | 8265 8265 0 0 0 -9
149 | 8266 8266 0 0 0 -9
150 | 8270 8270 0 0 0 -9
151 | 8271 8271 0 0 0 -9
152 | 8274 8274 0 0 0 -9
153 | 8275 8275 0 0 0 -9
154 | 8283 8283 0 0 0 -9
155 | 8284 8284 0 0 0 -9
156 | 8285 8285 0 0 0 -9
157 | 8290 8290 0 0 0 -9
158 | 8296 8296 0 0 0 -9
159 | 8297 8297 0 0 0 -9
160 | 8300 8300 0 0 0 -9
161 | 8306 8306 0 0 0 -9
162 | 8310 8310 0 0 0 -9
163 | 8311 8311 0 0 0 -9
164 | 8312 8312 0 0 0 -9
165 | 8313 8313 0 0 0 -9
166 | 8314 8314 0 0 0 -9
167 | 8323 8323 0 0 0 -9
168 | 8325 8325 0 0 0 -9
169 | 8326 8326 0 0 0 -9
170 | 8329 8329 0 0 0 -9
171 | 8334 8334 0 0 0 -9
172 | 8335 8335 0 0 0 -9
173 | 8337 8337 0 0 0 -9
174 | 8343 8343 0 0 0 -9
175 | 8351 8351 0 0 0 -9
176 | 8353 8353 0 0 0 -9
177 | 8354 8354 0 0 0 -9
178 | 8357 8357 0 0 0 -9
179 | 8365 8365 0 0 0 -9
180 | 8366 8366 0 0 0 -9
181 | 8369 8369 0 0 0 -9
182 | 8374 8374 0 0 0 -9
183 | 8376 8376 0 0 0 -9
184 | 8378 8378 0 0 0 -9
185 | 8387 8387 0 0 0 -9
186 | 8388 8388 0 0 0 -9
187 | 8389 8389 0 0 0 -9
188 | 8395 8395 0 0 0 -9
189 | 8411 8411 0 0 0 -9
190 | 8412 8412 0 0 0 -9
191 | 8420 8420 0 0 0 -9
192 | 8422 8422 0 0 0 -9
193 | 8423 8423 0 0 0 -9
194 | 8424 8424 0 0 0 -9
195 | 8426 8426 0 0 0 -9
196 | 8430 8430 0 0 0 -9
197 | 9057 9057 0 0 0 -9
198 | 9058 9058 0 0 0 -9
199 | 100000 100000 0 0 0 -9
200 | 


--------------------------------------------------------------------------------
/inst/extdata/chr2.bed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuantGen/BGData/fe7a57779d903f7056d5841482c9afdbeae5744b/inst/extdata/chr2.bed


--------------------------------------------------------------------------------
/inst/extdata/chr2.bim:
--------------------------------------------------------------------------------
  1 | 2	snp52382	0	2651	T	C
  2 | 2	snp52383	0	2961	C	T
  3 | 2	snp52384	0	10035	T	G
  4 | 2	snp52385	0	29477	C	T
  5 | 2	snp52386	0	30398	A	G
  6 | 2	snp52387	0	30725	G	C
  7 | 2	snp52388	0	30947	A	G
  8 | 2	snp52389	0	31142	T	C
  9 | 2	snp52390	0	31187	A	G
 10 | 2	snp52391	0	36510	C	T
 11 | 2	snp52392	0	36822	G	T
 12 | 2	snp52393	0	37182	T	C
 13 | 2	snp52394	0	37890	A	G
 14 | 2	snp52395	0	40795	C	T
 15 | 2	snp52396	0	57230	G	A
 16 | 2	snp52397	0	66351	T	C
 17 | 2	snp52398	0	68457	G	C
 18 | 2	snp52399	0	69008	T	A
 19 | 2	snp52400	0	69432	T	C
 20 | 2	snp52401	0	69598	T	G
 21 | 2	snp52402	0	70249	G	A
 22 | 2	snp52403	0	71642	G	T
 23 | 2	snp52404	0	72058	G	T
 24 | 2	snp52405	0	72539	T	G
 25 | 2	snp52406	0	73074	C	T
 26 | 2	snp52407	0	73116	G	T
 27 | 2	snp52408	0	74088	T	C
 28 | 2	snp52409	0	74635	T	C
 29 | 2	snp52410	0	75978	C	A
 30 | 2	snp52411	0	76134	C	T
 31 | 2	snp52412	0	76310	C	T
 32 | 2	snp52413	0	77634	A	G
 33 | 2	snp52414	0	78295	T	C
 34 | 2	snp52415	0	78565	G	A
 35 | 2	snp52416	0	78958	A	G
 36 | 2	snp52417	0	79128	A	G
 37 | 2	snp52418	0	79708	C	A
 38 | 2	snp52419	0	79962	A	C
 39 | 2	snp52420	0	80137	G	T
 40 | 2	snp52421	0	80228	A	G
 41 | 2	snp52422	0	80356	G	A
 42 | 2	snp52423	0	80495	A	C
 43 | 2	snp52424	0	80723	C	T
 44 | 2	snp52425	0	80789	G	C
 45 | 2	snp52426	0	80818	T	G
 46 | 2	snp52427	0	81127	C	G
 47 | 2	snp52428	0	81342	T	C
 48 | 2	snp52429	0	81563	T	A
 49 | 2	snp52430	0	81613	T	C
 50 | 2	snp52431	0	81750	T	G
 51 | 2	snp52432	0	81886	C	T
 52 | 2	snp52433	0	81914	T	A
 53 | 2	snp52434	0	82340	G	A
 54 | 2	snp52435	0	82580	C	T
 55 | 2	snp52436	0	82809	G	A
 56 | 2	snp52437	0	82934	T	A
 57 | 2	snp52438	0	83103	G	A
 58 | 2	snp52439	0	83234	T	A
 59 | 2	snp52440	0	83262	T	C
 60 | 2	snp52441	0	83533	A	G
 61 | 2	snp52442	0	83554	C	T
 62 | 2	snp52443	0	83795	T	C
 63 | 2	snp52444	0	84000	G	A
 64 | 2	snp52445	0	84037	A	G
 65 | 2	snp52446	0	84615	T	C
 66 | 2	snp52447	0	86107	A	G
 67 | 2	snp52448	0	88461	A	C
 68 | 2	snp52449	0	88866	C	T
 69 | 2	snp52450	0	91203	T	C
 70 | 2	snp52451	0	92516	G	T
 71 | 2	snp52452	0	92756	C	A
 72 | 2	snp52453	0	93291	G	C
 73 | 2	snp52454	0	93777	T	C
 74 | 2	snp52455	0	93818	C	T
 75 | 2	snp52456	0	94509	A	G
 76 | 2	snp52457	0	94621	C	A
 77 | 2	snp52458	0	95458	G	T
 78 | 2	snp52459	0	95862	G	A
 79 | 2	snp52460	0	97587	T	C
 80 | 2	snp52461	0	97966	A	G
 81 | 2	snp52462	0	98024	T	C
 82 | 2	snp52463	0	98731	T	C
 83 | 2	snp52464	0	98752	A	G
 84 | 2	snp52465	0	99935	T	C
 85 | 2	snp52466	0	100206	T	C
 86 | 2	snp52467	0	100348	G	C
 87 | 2	snp52468	0	100542	C	A
 88 | 2	snp52469	0	101022	C	T
 89 | 2	snp52470	0	102165	T	G
 90 | 2	snp52471	0	102672	A	C
 91 | 2	snp52472	0	102919	T	G
 92 | 2	snp52473	0	104217	A	G
 93 | 2	snp52474	0	104434	T	C
 94 | 2	snp52475	0	105664	T	C
 95 | 2	snp52476	0	106117	C	A
 96 | 2	snp52477	0	107505	C	A
 97 | 2	snp52478	0	108183	A	C
 98 | 2	snp52479	0	108597	A	G
 99 | 2	snp52480	0	108880	A	G
100 | 2	snp52481	0	108975	C	G
101 | 2	snp52482	0	110845	C	T
102 | 2	snp52483	0	111267	T	C
103 | 2	snp52484	0	112063	A	T
104 | 2	snp52485	0	114175	A	G
105 | 2	snp52486	0	114479	C	T
106 | 2	snp52487	0	114506	G	C
107 | 2	snp52488	0	115564	T	C
108 | 2	snp52489	0	116876	G	C
109 | 2	snp52490	0	118895	C	T
110 | 2	snp52491	0	119449	C	T
111 | 2	snp52492	0	119519	A	C
112 | 2	snp52493	0	120486	T	G
113 | 2	snp52494	0	120672	T	C
114 | 2	snp52495	0	120948	C	T
115 | 2	snp52496	0	121071	G	T
116 | 2	snp52497	0	121404	G	A
117 | 2	snp52498	0	121590	A	T
118 | 2	snp52499	0	123187	G	A
119 | 2	snp52500	0	123262	T	C
120 | 2	snp52501	0	123532	A	G
121 | 2	snp52502	0	123653	C	T
122 | 2	snp52503	0	124927	T	C
123 | 2	snp52504	0	126555	C	A
124 | 2	snp52505	0	128418	C	G
125 | 2	snp52506	0	128543	G	C
126 | 2	snp52507	0	128964	A	G
127 | 2	snp52508	0	129109	G	A
128 | 2	snp52509	0	129268	G	A
129 | 2	snp52510	0	129448	T	A
130 | 2	snp52511	0	131134	A	G
131 | 2	snp52512	0	134343	C	A
132 | 2	snp52513	0	134490	C	A
133 | 2	snp52514	0	137159	A	G
134 | 2	snp52515	0	137260	G	A
135 | 2	snp52516	0	137433	A	G
136 | 2	snp52517	0	138851	A	G
137 | 2	snp52518	0	139479	C	G
138 | 2	snp52519	0	142739	A	G
139 | 2	snp52520	0	143193	T	G
140 | 2	snp52521	0	144173	C	T
141 | 2	snp52522	0	144219	T	A
142 | 2	snp52523	0	144377	T	A
143 | 2	snp52524	0	144953	T	G
144 | 2	snp52525	0	145185	G	T
145 | 2	snp52526	0	145226	G	C
146 | 2	snp52527	0	145550	G	T
147 | 2	snp52528	0	146013	A	G
148 | 2	snp52529	0	146066	G	C
149 | 2	snp52530	0	146345	G	A
150 | 2	snp52531	0	146396	T	C
151 | 2	snp52532	0	146895	A	G
152 | 2	snp52533	0	147035	A	G
153 | 2	snp52534	0	147264	G	A
154 | 2	snp52535	0	147837	C	A
155 | 2	snp52536	0	148060	T	C
156 | 2	snp52537	0	148353	C	T
157 | 2	snp52538	0	149064	A	T
158 | 2	snp52539	0	149254	G	C
159 | 2	snp52540	0	149398	A	C
160 | 2	snp52541	0	149692	T	G
161 | 2	snp52542	0	150627	T	C
162 | 2	snp52543	0	151189	A	G
163 | 2	snp52544	0	151273	T	G
164 | 2	snp52545	0	151343	T	A
165 | 2	snp52546	0	151415	A	G
166 | 2	snp52547	0	151709	T	G
167 | 2	snp52548	0	152431	T	C
168 | 2	snp52549	0	152591	G	A
169 | 2	snp52550	0	153545	C	T
170 | 2	snp52551	0	153631	G	A
171 | 2	snp52552	0	154223	A	G
172 | 2	snp52553	0	154749	C	T
173 | 2	snp52554	0	154775	T	C
174 | 2	snp52555	0	154824	T	A
175 | 2	snp52556	0	154920	T	C
176 | 2	snp52557	0	154938	C	T
177 | 2	snp52558	0	155224	T	G
178 | 2	snp52559	0	155258	A	G
179 | 2	snp52560	0	155327	T	C
180 | 2	snp52561	0	156174	G	A
181 | 2	snp52562	0	156225	A	G
182 | 2	snp52563	0	156448	C	T
183 | 2	snp52564	0	156860	C	G
184 | 2	snp52565	0	157053	T	G
185 | 2	snp52566	0	157749	C	G
186 | 2	snp52567	0	158102	A	C
187 | 2	snp52568	0	158765	G	T
188 | 2	snp52569	0	158979	G	A
189 | 2	snp52570	0	159184	G	T
190 | 2	snp52571	0	159209	A	G
191 | 2	snp52572	0	159309	T	C
192 | 2	snp52573	0	159818	A	C
193 | 2	snp52574	0	159872	C	A
194 | 2	snp52575	0	160448	C	T
195 | 2	snp52576	0	160925	A	T
196 | 2	snp52577	0	161051	C	T
197 | 2	snp52578	0	161382	T	A
198 | 2	snp52579	0	161598	A	G
199 | 2	snp52580	0	161820	G	A
200 | 2	snp52581	0	162538	C	T
201 | 2	snp52582	0	162725	T	A
202 | 2	snp52583	0	163083	G	C
203 | 2	snp52584	0	163111	C	T
204 | 2	snp52585	0	163497	A	G
205 | 2	snp52586	0	164150	C	T
206 | 2	snp52587	0	164375	G	A
207 | 2	snp52588	0	165036	C	G
208 | 2	snp52589	0	166310	A	C
209 | 2	snp52590	0	166668	T	C
210 | 2	snp52591	0	167441	G	C
211 | 2	snp52592	0	167647	A	C
212 | 2	snp52593	0	167716	T	C
213 | 2	snp52594	0	168816	A	T
214 | 2	snp52595	0	169729	A	G
215 | 2	snp52596	0	169989	T	C
216 | 2	snp52597	0	170202	C	T
217 | 2	snp52598	0	170351	C	T
218 | 2	snp52599	0	172244	A	G
219 | 2	snp52600	0	172482	A	T
220 | 2	snp52601	0	172548	T	A
221 | 2	snp52602	0	172584	T	C
222 | 2	snp52603	0	172629	C	T
223 | 2	snp52604	0	172813	T	G
224 | 2	snp52605	0	172931	C	A
225 | 2	snp52606	0	172980	G	A
226 | 2	snp52607	0	173320	C	A
227 | 2	snp52608	0	173442	G	A
228 | 2	snp52609	0	173651	G	A
229 | 2	snp52610	0	173823	C	A
230 | 2	snp52611	0	173858	T	G
231 | 2	snp52612	0	174115	G	C
232 | 2	snp52613	0	174364	A	T
233 | 2	snp52614	0	174885	G	A
234 | 2	snp52615	0	174960	G	A
235 | 2	snp52616	0	175289	A	G
236 | 2	snp52617	0	175529	T	C
237 | 2	snp52618	0	175874	G	T
238 | 2	snp52619	0	176156	A	T
239 | 2	snp52620	0	176616	T	C
240 | 2	snp52621	0	176666	C	T
241 | 2	snp52622	0	178596	A	G
242 | 2	snp52623	0	178849	A	T
243 | 2	snp52624	0	180461	A	T
244 | 2	snp52625	0	180480	G	A
245 | 2	snp52626	0	180942	C	A
246 | 2	snp52627	0	181030	G	T
247 | 2	snp52628	0	182538	T	G
248 | 2	snp52629	0	182818	G	A
249 | 2	snp52630	0	182841	A	G
250 | 2	snp52631	0	184364	C	T
251 | 2	snp52632	0	184749	G	C
252 | 2	snp52633	0	185018	A	G
253 | 2	snp52634	0	186236	T	G
254 | 2	snp52635	0	186947	C	T
255 | 2	snp52636	0	187048	A	T
256 | 2	snp52637	0	187084	C	T
257 | 2	snp52638	0	187383	T	G
258 | 2	snp52639	0	187747	C	T
259 | 2	snp52640	0	189659	G	A
260 | 2	snp52641	0	189683	C	T
261 | 2	snp52642	0	190634	G	A
262 | 2	snp52643	0	191136	C	G
263 | 2	snp52644	0	191181	G	A
264 | 2	snp52645	0	191716	A	G
265 | 2	snp52646	0	191913	T	G
266 | 2	snp52647	0	192676	T	C
267 | 2	snp52648	0	193364	A	G
268 | 2	snp52649	0	194546	A	G
269 | 2	snp52650	0	198408	A	G
270 | 2	snp52651	0	199672	C	T
271 | 2	snp52652	0	202467	G	A
272 | 2	snp52653	0	203117	A	G
273 | 2	snp52654	0	203192	A	G
274 | 2	snp52655	0	203286	G	A
275 | 2	snp52656	0	204027	T	G
276 | 2	snp52657	0	204325	A	C
277 | 2	snp52658	0	204522	A	C
278 | 2	snp52659	0	204596	T	C
279 | 2	snp52660	0	204935	C	A
280 | 2	snp52661	0	205036	G	C
281 | 2	snp52662	0	205338	T	C
282 | 2	snp52663	0	205446	G	A
283 | 2	snp52664	0	205489	C	T
284 | 2	snp52665	0	206117	C	G
285 | 2	snp52666	0	207344	G	C
286 | 2	snp52667	0	207433	A	G
287 | 2	snp52668	0	210149	G	C
288 | 2	snp52669	0	210357	A	T
289 | 2	snp52670	0	212442	T	C
290 | 2	snp52671	0	212787	C	A
291 | 2	snp52672	0	212833	C	A
292 | 2	snp52673	0	214360	A	G
293 | 2	snp52674	0	214861	G	C
294 | 2	snp52675	0	215794	A	G
295 | 2	snp52676	0	215873	T	C
296 | 2	snp52677	0	219015	G	C
297 | 2	snp52678	0	220105	T	C
298 | 2	snp52679	0	220163	T	G
299 | 2	snp52680	0	220283	T	C
300 | 2	snp52681	0	220523	G	A
301 | 


--------------------------------------------------------------------------------
/inst/extdata/chr2.fam:
--------------------------------------------------------------------------------
  1 | 5837 5837 0 0 0 -9
  2 | 6008 6008 0 0 0 -9
  3 | 6009 6009 0 0 0 -9
  4 | 6016 6016 0 0 0 -9
  5 | 6040 6040 0 0 0 -9
  6 | 6042 6042 0 0 0 -9
  7 | 6043 6043 0 0 0 -9
  8 | 6046 6046 0 0 0 -9
  9 | 6064 6064 0 0 0 -9
 10 | 6074 6074 0 0 0 -9
 11 | 6243 6243 0 0 0 -9
 12 | 6709 6709 0 0 0 -9
 13 | 6897 6897 0 0 0 -9
 14 | 6898 6898 0 0 0 -9
 15 | 6899 6899 0 0 0 -9
 16 | 6900 6900 0 0 0 -9
 17 | 6901 6901 0 0 0 -9
 18 | 6903 6903 0 0 0 -9
 19 | 6904 6904 0 0 0 -9
 20 | 6905 6905 0 0 0 -9
 21 | 6906 6906 0 0 0 -9
 22 | 6907 6907 0 0 0 -9
 23 | 6908 6908 0 0 0 -9
 24 | 6909 6909 0 0 0 -9
 25 | 6910 6910 0 0 0 -9
 26 | 6911 6911 0 0 0 -9
 27 | 6913 6913 0 0 0 -9
 28 | 6914 6914 0 0 0 -9
 29 | 6915 6915 0 0 0 -9
 30 | 6916 6916 0 0 0 -9
 31 | 6917 6917 0 0 0 -9
 32 | 6918 6918 0 0 0 -9
 33 | 6919 6919 0 0 0 -9
 34 | 6920 6920 0 0 0 -9
 35 | 6921 6921 0 0 0 -9
 36 | 6922 6922 0 0 0 -9
 37 | 6923 6923 0 0 0 -9
 38 | 6924 6924 0 0 0 -9
 39 | 6926 6926 0 0 0 -9
 40 | 6927 6927 0 0 0 -9
 41 | 6928 6928 0 0 0 -9
 42 | 6929 6929 0 0 0 -9
 43 | 6930 6930 0 0 0 -9
 44 | 6931 6931 0 0 0 -9
 45 | 6932 6932 0 0 0 -9
 46 | 6933 6933 0 0 0 -9
 47 | 6936 6936 0 0 0 -9
 48 | 6937 6937 0 0 0 -9
 49 | 6939 6939 0 0 0 -9
 50 | 6940 6940 0 0 0 -9
 51 | 6942 6942 0 0 0 -9
 52 | 6943 6943 0 0 0 -9
 53 | 6944 6944 0 0 0 -9
 54 | 6945 6945 0 0 0 -9
 55 | 6946 6946 0 0 0 -9
 56 | 6951 6951 0 0 0 -9
 57 | 6956 6956 0 0 0 -9
 58 | 6957 6957 0 0 0 -9
 59 | 6958 6958 0 0 0 -9
 60 | 6959 6959 0 0 0 -9
 61 | 6960 6960 0 0 0 -9
 62 | 6961 6961 0 0 0 -9
 63 | 6962 6962 0 0 0 -9
 64 | 6963 6963 0 0 0 -9
 65 | 6964 6964 0 0 0 -9
 66 | 6965 6965 0 0 0 -9
 67 | 6966 6966 0 0 0 -9
 68 | 6967 6967 0 0 0 -9
 69 | 6968 6968 0 0 0 -9
 70 | 6969 6969 0 0 0 -9
 71 | 6970 6970 0 0 0 -9
 72 | 6971 6971 0 0 0 -9
 73 | 6972 6972 0 0 0 -9
 74 | 6973 6973 0 0 0 -9
 75 | 6974 6974 0 0 0 -9
 76 | 6975 6975 0 0 0 -9
 77 | 6976 6976 0 0 0 -9
 78 | 6977 6977 0 0 0 -9
 79 | 6978 6978 0 0 0 -9
 80 | 6979 6979 0 0 0 -9
 81 | 6980 6980 0 0 0 -9
 82 | 6981 6981 0 0 0 -9
 83 | 6982 6982 0 0 0 -9
 84 | 6983 6983 0 0 0 -9
 85 | 6984 6984 0 0 0 -9
 86 | 6985 6985 0 0 0 -9
 87 | 6988 6988 0 0 0 -9
 88 | 7000 7000 0 0 0 -9
 89 | 7014 7014 0 0 0 -9
 90 | 7033 7033 0 0 0 -9
 91 | 7062 7062 0 0 0 -9
 92 | 7064 7064 0 0 0 -9
 93 | 7081 7081 0 0 0 -9
 94 | 7094 7094 0 0 0 -9
 95 | 7123 7123 0 0 0 -9
 96 | 7147 7147 0 0 0 -9
 97 | 7163 7163 0 0 0 -9
 98 | 7231 7231 0 0 0 -9
 99 | 7255 7255 0 0 0 -9
100 | 7275 7275 0 0 0 -9
101 | 7282 7282 0 0 0 -9
102 | 7296 7296 0 0 0 -9
103 | 7306 7306 0 0 0 -9
104 | 7323 7323 0 0 0 -9
105 | 7346 7346 0 0 0 -9
106 | 7418 7418 0 0 0 -9
107 | 7424 7424 0 0 0 -9
108 | 7438 7438 0 0 0 -9
109 | 7460 7460 0 0 0 -9
110 | 7461 7461 0 0 0 -9
111 | 7477 7477 0 0 0 -9
112 | 7514 7514 0 0 0 -9
113 | 7515 7515 0 0 0 -9
114 | 7516 7516 0 0 0 -9
115 | 7517 7517 0 0 0 -9
116 | 7518 7518 0 0 0 -9
117 | 7519 7519 0 0 0 -9
118 | 7520 7520 0 0 0 -9
119 | 7521 7521 0 0 0 -9
120 | 7522 7522 0 0 0 -9
121 | 7523 7523 0 0 0 -9
122 | 7524 7524 0 0 0 -9
123 | 7525 7525 0 0 0 -9
124 | 7526 7526 0 0 0 -9
125 | 8213 8213 0 0 0 -9
126 | 8214 8214 0 0 0 -9
127 | 8215 8215 0 0 0 -9
128 | 8222 8222 0 0 0 -9
129 | 8230 8230 0 0 0 -9
130 | 8231 8231 0 0 0 -9
131 | 8233 8233 0 0 0 -9
132 | 8235 8235 0 0 0 -9
133 | 8236 8236 0 0 0 -9
134 | 8237 8237 0 0 0 -9
135 | 8239 8239 0 0 0 -9
136 | 8240 8240 0 0 0 -9
137 | 8241 8241 0 0 0 -9
138 | 8242 8242 0 0 0 -9
139 | 8243 8243 0 0 0 -9
140 | 8245 8245 0 0 0 -9
141 | 8247 8247 0 0 0 -9
142 | 8249 8249 0 0 0 -9
143 | 8254 8254 0 0 0 -9
144 | 8256 8256 0 0 0 -9
145 | 8258 8258 0 0 0 -9
146 | 8259 8259 0 0 0 -9
147 | 8264 8264 0 0 0 -9
148 | 8265 8265 0 0 0 -9
149 | 8266 8266 0 0 0 -9
150 | 8270 8270 0 0 0 -9
151 | 8271 8271 0 0 0 -9
152 | 8274 8274 0 0 0 -9
153 | 8275 8275 0 0 0 -9
154 | 8283 8283 0 0 0 -9
155 | 8284 8284 0 0 0 -9
156 | 8285 8285 0 0 0 -9
157 | 8290 8290 0 0 0 -9
158 | 8296 8296 0 0 0 -9
159 | 8297 8297 0 0 0 -9
160 | 8300 8300 0 0 0 -9
161 | 8306 8306 0 0 0 -9
162 | 8310 8310 0 0 0 -9
163 | 8311 8311 0 0 0 -9
164 | 8312 8312 0 0 0 -9
165 | 8313 8313 0 0 0 -9
166 | 8314 8314 0 0 0 -9
167 | 8323 8323 0 0 0 -9
168 | 8325 8325 0 0 0 -9
169 | 8326 8326 0 0 0 -9
170 | 8329 8329 0 0 0 -9
171 | 8334 8334 0 0 0 -9
172 | 8335 8335 0 0 0 -9
173 | 8337 8337 0 0 0 -9
174 | 8343 8343 0 0 0 -9
175 | 8351 8351 0 0 0 -9
176 | 8353 8353 0 0 0 -9
177 | 8354 8354 0 0 0 -9
178 | 8357 8357 0 0 0 -9
179 | 8365 8365 0 0 0 -9
180 | 8366 8366 0 0 0 -9
181 | 8369 8369 0 0 0 -9
182 | 8374 8374 0 0 0 -9
183 | 8376 8376 0 0 0 -9
184 | 8378 8378 0 0 0 -9
185 | 8387 8387 0 0 0 -9
186 | 8388 8388 0 0 0 -9
187 | 8389 8389 0 0 0 -9
188 | 8395 8395 0 0 0 -9
189 | 8411 8411 0 0 0 -9
190 | 8412 8412 0 0 0 -9
191 | 8420 8420 0 0 0 -9
192 | 8422 8422 0 0 0 -9
193 | 8423 8423 0 0 0 -9
194 | 8424 8424 0 0 0 -9
195 | 8426 8426 0 0 0 -9
196 | 8430 8430 0 0 0 -9
197 | 9057 9057 0 0 0 -9
198 | 9058 9058 0 0 0 -9
199 | 100000 100000 0 0 0 -9
200 | 


--------------------------------------------------------------------------------
/inst/extdata/chr3.bed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/QuantGen/BGData/fe7a57779d903f7056d5841482c9afdbeae5744b/inst/extdata/chr3.bed


--------------------------------------------------------------------------------
/inst/extdata/chr3.bim:
--------------------------------------------------------------------------------
  1 | 3	snp81183	0	1394	T	C
  2 | 3	snp81184	0	4681	T	G
  3 | 3	snp81185	0	5444	A	C
  4 | 3	snp81186	0	5826	A	T
  5 | 3	snp81187	0	9268	A	G
  6 | 3	snp81188	0	9374	T	C
  7 | 3	snp81189	0	10352	T	G
  8 | 3	snp81190	0	14199	G	C
  9 | 3	snp81191	0	14851	A	T
 10 | 3	snp81192	0	16517	C	T
 11 | 3	snp81193	0	16871	T	C
 12 | 3	snp81194	0	22781	T	A
 13 | 3	snp81195	0	26145	A	G
 14 | 3	snp81196	0	26826	A	C
 15 | 3	snp81197	0	30382	A	G
 16 | 3	snp81198	0	30509	G	T
 17 | 3	snp81199	0	32608	T	C
 18 | 3	snp81200	0	34164	T	G
 19 | 3	snp81201	0	38218	A	C
 20 | 3	snp81202	0	38447	A	G
 21 | 3	snp81203	0	41804	A	C
 22 | 3	snp81204	0	44099	A	C
 23 | 3	snp81205	0	44736	T	G
 24 | 3	snp81206	0	45059	G	T
 25 | 3	snp81207	0	45416	G	A
 26 | 3	snp81208	0	46607	T	A
 27 | 3	snp81209	0	46839	C	T
 28 | 3	snp81210	0	46968	A	C
 29 | 3	snp81211	0	46998	G	A
 30 | 3	snp81212	0	47215	C	G
 31 | 3	snp81213	0	47535	C	G
 32 | 3	snp81214	0	49398	G	A
 33 | 3	snp81215	0	49571	A	C
 34 | 3	snp81216	0	51068	C	T
 35 | 3	snp81217	0	53767	T	C
 36 | 3	snp81218	0	54885	C	T
 37 | 3	snp81219	0	55635	A	T
 38 | 3	snp81220	0	56047	G	A
 39 | 3	snp81221	0	56555	C	G
 40 | 3	snp81222	0	56807	C	T
 41 | 3	snp81223	0	57407	A	G
 42 | 3	snp81224	0	57780	C	T
 43 | 3	snp81225	0	58114	T	C
 44 | 3	snp81226	0	58346	G	T
 45 | 3	snp81227	0	59032	A	G
 46 | 3	snp81228	0	59497	T	G
 47 | 3	snp81229	0	61218	C	T
 48 | 3	snp81230	0	61477	G	A
 49 | 3	snp81231	0	61772	A	T
 50 | 3	snp81232	0	62096	C	T
 51 | 3	snp81233	0	62216	C	G
 52 | 3	snp81234	0	63393	C	G
 53 | 3	snp81235	0	63580	T	C
 54 | 3	snp81236	0	64824	T	A
 55 | 3	snp81237	0	65817	C	A
 56 | 3	snp81238	0	65996	C	G
 57 | 3	snp81239	0	66603	C	G
 58 | 3	snp81240	0	69268	G	C
 59 | 3	snp81241	0	70128	C	T
 60 | 3	snp81242	0	70545	G	A
 61 | 3	snp81243	0	71143	A	C
 62 | 3	snp81244	0	72096	T	C
 63 | 3	snp81245	0	73628	T	C
 64 | 3	snp81246	0	74169	C	A
 65 | 3	snp81247	0	74405	T	A
 66 | 3	snp81248	0	77175	C	T
 67 | 3	snp81249	0	77505	C	T
 68 | 3	snp81250	0	77597	C	T
 69 | 3	snp81251	0	78632	C	T
 70 | 3	snp81252	0	78916	G	A
 71 | 3	snp81253	0	79525	A	G
 72 | 3	snp81254	0	79573	G	C
 73 | 3	snp81255	0	80528	A	T
 74 | 3	snp81256	0	80622	A	G
 75 | 3	snp81257	0	80699	A	G
 76 | 3	snp81258	0	80725	A	T
 77 | 3	snp81259	0	80937	G	T
 78 | 3	snp81260	0	81124	C	G
 79 | 3	snp81261	0	81142	G	A
 80 | 3	snp81262	0	81258	G	A
 81 | 3	snp81263	0	81306	T	C
 82 | 3	snp81264	0	81339	G	C
 83 | 3	snp81265	0	81894	T	C
 84 | 3	snp81266	0	82272	A	C
 85 | 3	snp81267	0	82777	A	G
 86 | 3	snp81268	0	82916	T	C
 87 | 3	snp81269	0	83001	T	C
 88 | 3	snp81270	0	83198	G	T
 89 | 3	snp81271	0	83538	T	G
 90 | 3	snp81272	0	85135	C	T
 91 | 3	snp81273	0	85187	G	A
 92 | 3	snp81274	0	85381	A	G
 93 | 3	snp81275	0	88508	T	G
 94 | 3	snp81276	0	90774	A	T
 95 | 3	snp81277	0	90815	A	C
 96 | 3	snp81278	0	91052	T	G
 97 | 3	snp81279	0	91325	T	A
 98 | 3	snp81280	0	91411	A	C
 99 | 3	snp81281	0	92117	T	A
100 | 3	snp81282	0	92418	C	G
101 | 3	snp81283	0	92744	A	G
102 | 3	snp81284	0	92971	C	G
103 | 3	snp81285	0	94431	A	G
104 | 3	snp81286	0	94834	C	A
105 | 3	snp81287	0	95420	A	G
106 | 3	snp81288	0	95769	A	T
107 | 3	snp81289	0	95948	G	A
108 | 3	snp81290	0	96090	C	A
109 | 3	snp81291	0	96385	T	G
110 | 3	snp81292	0	97418	T	C
111 | 3	snp81293	0	97909	G	A
112 | 3	snp81294	0	98583	G	A
113 | 3	snp81295	0	99152	A	C
114 | 3	snp81296	0	99623	A	C
115 | 3	snp81297	0	99836	C	A
116 | 3	snp81298	0	99921	G	T
117 | 3	snp81299	0	100086	C	A
118 | 3	snp81300	0	100602	G	A
119 | 3	snp81301	0	100840	G	A
120 | 3	snp81302	0	101797	G	A
121 | 3	snp81303	0	102093	T	A
122 | 3	snp81304	0	102255	A	G
123 | 3	snp81305	0	102969	A	C
124 | 3	snp81306	0	103676	T	C
125 | 3	snp81307	0	103720	T	C
126 | 3	snp81308	0	103802	T	C
127 | 3	snp81309	0	103823	C	T
128 | 3	snp81310	0	103965	A	T
129 | 3	snp81311	0	104091	G	A
130 | 3	snp81312	0	104130	C	A
131 | 3	snp81313	0	104166	A	C
132 | 3	snp81314	0	104271	G	C
133 | 3	snp81315	0	104405	C	G
134 | 3	snp81316	0	104421	C	T
135 | 3	snp81317	0	105058	T	C
136 | 3	snp81318	0	105288	C	T
137 | 3	snp81319	0	105819	C	G
138 | 3	snp81320	0	105838	A	G
139 | 3	snp81321	0	106060	G	C
140 | 3	snp81322	0	107792	C	G
141 | 3	snp81323	0	108430	T	C
142 | 3	snp81324	0	110471	G	A
143 | 3	snp81325	0	111647	C	T
144 | 3	snp81326	0	112949	T	C
145 | 3	snp81327	0	113450	T	C
146 | 3	snp81328	0	114059	C	G
147 | 3	snp81329	0	114367	G	C
148 | 3	snp81330	0	114475	G	A
149 | 3	snp81331	0	114753	A	G
150 | 3	snp81332	0	117453	T	A
151 | 3	snp81333	0	117708	A	T
152 | 3	snp81334	0	117981	G	T
153 | 3	snp81335	0	118015	G	A
154 | 3	snp81336	0	119781	T	A
155 | 3	snp81337	0	119974	T	C
156 | 3	snp81338	0	120886	A	G
157 | 3	snp81339	0	121259	T	A
158 | 3	snp81340	0	121396	T	C
159 | 3	snp81341	0	121642	A	T
160 | 3	snp81342	0	122731	C	T
161 | 3	snp81343	0	123281	G	A
162 | 3	snp81344	0	123330	G	C
163 | 3	snp81345	0	123438	C	A
164 | 3	snp81346	0	124382	G	C
165 | 3	snp81347	0	126986	C	G
166 | 3	snp81348	0	127080	T	G
167 | 3	snp81349	0	128122	G	T
168 | 3	snp81350	0	128362	A	T
169 | 3	snp81351	0	128433	C	T
170 | 3	snp81352	0	128538	A	C
171 | 3	snp81353	0	128755	T	C
172 | 3	snp81354	0	128807	A	G
173 | 3	snp81355	0	128886	A	C
174 | 3	snp81356	0	129912	T	C
175 | 3	snp81357	0	130000	T	G
176 | 3	snp81358	0	130145	A	T
177 | 3	snp81359	0	131316	C	A
178 | 3	snp81360	0	132645	G	A
179 | 3	snp81361	0	132898	G	A
180 | 3	snp81362	0	134351	A	G
181 | 3	snp81363	0	136171	A	G
182 | 3	snp81364	0	138201	G	A
183 | 3	snp81365	0	143851	C	T
184 | 3	snp81366	0	145429	A	T
185 | 3	snp81367	0	145622	T	C
186 | 3	snp81368	0	146508	G	A
187 | 3	snp81369	0	149369	A	G
188 | 3	snp81370	0	150938	G	A
189 | 3	snp81371	0	154973	A	G
190 | 3	snp81372	0	156664	A	C
191 | 3	snp81373	0	161029	T	G
192 | 3	snp81374	0	162240	A	G
193 | 3	snp81375	0	162816	A	G
194 | 3	snp81376	0	163128	G	A
195 | 3	snp81377	0	164144	A	G
196 | 3	snp81378	0	165230	A	G
197 | 3	snp81379	0	165603	T	C
198 | 3	snp81380	0	165875	A	G
199 | 3	snp81381	0	166305	T	G
200 | 3	snp81382	0	166491	C	T
201 | 3	snp81383	0	166552	T	C
202 | 3	snp81384	0	167243	G	A
203 | 3	snp81385	0	167947	G	A
204 | 3	snp81386	0	168154	C	G
205 | 3	snp81387	0	168630	G	C
206 | 3	snp81388	0	170811	C	A
207 | 3	snp81389	0	175659	C	T
208 | 3	snp81390	0	177200	A	C
209 | 3	snp81391	0	178043	T	C
210 | 3	snp81392	0	178071	C	T
211 | 3	snp81393	0	178789	T	C
212 | 3	snp81394	0	179977	T	C
213 | 3	snp81395	0	180363	G	A
214 | 3	snp81396	0	180526	T	C
215 | 3	snp81397	0	181790	G	A
216 | 3	snp81398	0	181838	A	G
217 | 3	snp81399	0	184000	A	G
218 | 3	snp81400	0	184954	T	C
219 | 3	snp81401	0	185878	G	C
220 | 3	snp81402	0	186033	A	T
221 | 3	snp81403	0	186194	G	A
222 | 3	snp81404	0	188039	A	C
223 | 3	snp81405	0	188837	G	C
224 | 3	snp81406	0	189030	T	C
225 | 3	snp81407	0	189227	A	G
226 | 3	snp81408	0	189479	G	A
227 | 3	snp81409	0	189928	G	A
228 | 3	snp81410	0	190615	T	C
229 | 3	snp81411	0	193541	G	A
230 | 3	snp81412	0	194175	A	G
231 | 3	snp81413	0	195339	T	C
232 | 3	snp81414	0	195886	C	G
233 | 3	snp81415	0	198118	T	A
234 | 3	snp81416	0	198298	C	T
235 | 3	snp81417	0	198422	T	G
236 | 3	snp81418	0	198638	T	A
237 | 3	snp81419	0	198824	G	A
238 | 3	snp81420	0	199212	T	C
239 | 3	snp81421	0	199376	C	A
240 | 3	snp81422	0	200230	G	T
241 | 3	snp81423	0	201334	G	T
242 | 3	snp81424	0	201857	T	C
243 | 3	snp81425	0	202256	T	A
244 | 3	snp81426	0	203271	C	T
245 | 3	snp81427	0	203611	T	A
246 | 3	snp81428	0	204358	C	T
247 | 3	snp81429	0	205067	C	A
248 | 3	snp81430	0	205339	A	T
249 | 3	snp81431	0	205627	T	A
250 | 3	snp81432	0	205691	A	C
251 | 3	snp81433	0	206921	C	A
252 | 3	snp81434	0	207101	C	T
253 | 3	snp81435	0	207454	T	C
254 | 3	snp81436	0	207898	G	A
255 | 3	snp81437	0	208329	T	C
256 | 3	snp81438	0	209073	A	T
257 | 3	snp81439	0	209244	C	G
258 | 3	snp81440	0	209307	T	A
259 | 3	snp81441	0	209571	G	T
260 | 3	snp81442	0	209717	A	G
261 | 3	snp81443	0	210977	G	A
262 | 3	snp81444	0	212444	C	T
263 | 3	snp81445	0	212490	G	T
264 | 3	snp81446	0	212719	T	A
265 | 3	snp81447	0	213305	C	A
266 | 3	snp81448	0	213740	G	C
267 | 3	snp81449	0	213989	G	C
268 | 3	snp81450	0	214212	A	C
269 | 3	snp81451	0	214302	T	C
270 | 3	snp81452	0	214523	T	A
271 | 3	snp81453	0	214737	C	A
272 | 3	snp81454	0	216126	G	A
273 | 3	snp81455	0	216222	A	C
274 | 3	snp81456	0	217405	A	C
275 | 3	snp81457	0	217753	C	T
276 | 3	snp81458	0	218241	G	T
277 | 3	snp81459	0	219118	G	T
278 | 3	snp81460	0	219451	C	T
279 | 3	snp81461	0	220186	G	C
280 | 3	snp81462	0	220342	C	T
281 | 3	snp81463	0	220610	A	G
282 | 3	snp81464	0	220761	T	G
283 | 3	snp81465	0	221049	T	A
284 | 3	snp81466	0	221262	G	A
285 | 3	snp81467	0	221359	T	C
286 | 3	snp81468	0	221593	T	A
287 | 3	snp81469	0	221880	C	T
288 | 3	snp81470	0	222522	A	T
289 | 3	snp81471	0	222780	A	G
290 | 3	snp81472	0	222802	C	A
291 | 3	snp81473	0	223018	G	A
292 | 3	snp81474	0	223088	T	A
293 | 3	snp81475	0	223271	C	A
294 | 3	snp81476	0	223463	C	T
295 | 3	snp81477	0	223495	C	G
296 | 3	snp81478	0	223734	T	C
297 | 3	snp81479	0	223794	C	T
298 | 3	snp81480	0	223958	T	C
299 | 3	snp81481	0	223989	C	T
300 | 3	snp81482	0	224644	A	C
301 | 


--------------------------------------------------------------------------------
/inst/extdata/chr3.fam:
--------------------------------------------------------------------------------
  1 | 5837 5837 0 0 0 -9
  2 | 6008 6008 0 0 0 -9
  3 | 6009 6009 0 0 0 -9
  4 | 6016 6016 0 0 0 -9
  5 | 6040 6040 0 0 0 -9
  6 | 6042 6042 0 0 0 -9
  7 | 6043 6043 0 0 0 -9
  8 | 6046 6046 0 0 0 -9
  9 | 6064 6064 0 0 0 -9
 10 | 6074 6074 0 0 0 -9
 11 | 6243 6243 0 0 0 -9
 12 | 6709 6709 0 0 0 -9
 13 | 6897 6897 0 0 0 -9
 14 | 6898 6898 0 0 0 -9
 15 | 6899 6899 0 0 0 -9
 16 | 6900 6900 0 0 0 -9
 17 | 6901 6901 0 0 0 -9
 18 | 6903 6903 0 0 0 -9
 19 | 6904 6904 0 0 0 -9
 20 | 6905 6905 0 0 0 -9
 21 | 6906 6906 0 0 0 -9
 22 | 6907 6907 0 0 0 -9
 23 | 6908 6908 0 0 0 -9
 24 | 6909 6909 0 0 0 -9
 25 | 6910 6910 0 0 0 -9
 26 | 6911 6911 0 0 0 -9
 27 | 6913 6913 0 0 0 -9
 28 | 6914 6914 0 0 0 -9
 29 | 6915 6915 0 0 0 -9
 30 | 6916 6916 0 0 0 -9
 31 | 6917 6917 0 0 0 -9
 32 | 6918 6918 0 0 0 -9
 33 | 6919 6919 0 0 0 -9
 34 | 6920 6920 0 0 0 -9
 35 | 6921 6921 0 0 0 -9
 36 | 6922 6922 0 0 0 -9
 37 | 6923 6923 0 0 0 -9
 38 | 6924 6924 0 0 0 -9
 39 | 6926 6926 0 0 0 -9
 40 | 6927 6927 0 0 0 -9
 41 | 6928 6928 0 0 0 -9
 42 | 6929 6929 0 0 0 -9
 43 | 6930 6930 0 0 0 -9
 44 | 6931 6931 0 0 0 -9
 45 | 6932 6932 0 0 0 -9
 46 | 6933 6933 0 0 0 -9
 47 | 6936 6936 0 0 0 -9
 48 | 6937 6937 0 0 0 -9
 49 | 6939 6939 0 0 0 -9
 50 | 6940 6940 0 0 0 -9
 51 | 6942 6942 0 0 0 -9
 52 | 6943 6943 0 0 0 -9
 53 | 6944 6944 0 0 0 -9
 54 | 6945 6945 0 0 0 -9
 55 | 6946 6946 0 0 0 -9
 56 | 6951 6951 0 0 0 -9
 57 | 6956 6956 0 0 0 -9
 58 | 6957 6957 0 0 0 -9
 59 | 6958 6958 0 0 0 -9
 60 | 6959 6959 0 0 0 -9
 61 | 6960 6960 0 0 0 -9
 62 | 6961 6961 0 0 0 -9
 63 | 6962 6962 0 0 0 -9
 64 | 6963 6963 0 0 0 -9
 65 | 6964 6964 0 0 0 -9
 66 | 6965 6965 0 0 0 -9
 67 | 6966 6966 0 0 0 -9
 68 | 6967 6967 0 0 0 -9
 69 | 6968 6968 0 0 0 -9
 70 | 6969 6969 0 0 0 -9
 71 | 6970 6970 0 0 0 -9
 72 | 6971 6971 0 0 0 -9
 73 | 6972 6972 0 0 0 -9
 74 | 6973 6973 0 0 0 -9
 75 | 6974 6974 0 0 0 -9
 76 | 6975 6975 0 0 0 -9
 77 | 6976 6976 0 0 0 -9
 78 | 6977 6977 0 0 0 -9
 79 | 6978 6978 0 0 0 -9
 80 | 6979 6979 0 0 0 -9
 81 | 6980 6980 0 0 0 -9
 82 | 6981 6981 0 0 0 -9
 83 | 6982 6982 0 0 0 -9
 84 | 6983 6983 0 0 0 -9
 85 | 6984 6984 0 0 0 -9
 86 | 6985 6985 0 0 0 -9
 87 | 6988 6988 0 0 0 -9
 88 | 7000 7000 0 0 0 -9
 89 | 7014 7014 0 0 0 -9
 90 | 7033 7033 0 0 0 -9
 91 | 7062 7062 0 0 0 -9
 92 | 7064 7064 0 0 0 -9
 93 | 7081 7081 0 0 0 -9
 94 | 7094 7094 0 0 0 -9
 95 | 7123 7123 0 0 0 -9
 96 | 7147 7147 0 0 0 -9
 97 | 7163 7163 0 0 0 -9
 98 | 7231 7231 0 0 0 -9
 99 | 7255 7255 0 0 0 -9
100 | 7275 7275 0 0 0 -9
101 | 7282 7282 0 0 0 -9
102 | 7296 7296 0 0 0 -9
103 | 7306 7306 0 0 0 -9
104 | 7323 7323 0 0 0 -9
105 | 7346 7346 0 0 0 -9
106 | 7418 7418 0 0 0 -9
107 | 7424 7424 0 0 0 -9
108 | 7438 7438 0 0 0 -9
109 | 7460 7460 0 0 0 -9
110 | 7461 7461 0 0 0 -9
111 | 7477 7477 0 0 0 -9
112 | 7514 7514 0 0 0 -9
113 | 7515 7515 0 0 0 -9
114 | 7516 7516 0 0 0 -9
115 | 7517 7517 0 0 0 -9
116 | 7518 7518 0 0 0 -9
117 | 7519 7519 0 0 0 -9
118 | 7520 7520 0 0 0 -9
119 | 7521 7521 0 0 0 -9
120 | 7522 7522 0 0 0 -9
121 | 7523 7523 0 0 0 -9
122 | 7524 7524 0 0 0 -9
123 | 7525 7525 0 0 0 -9
124 | 7526 7526 0 0 0 -9
125 | 8213 8213 0 0 0 -9
126 | 8214 8214 0 0 0 -9
127 | 8215 8215 0 0 0 -9
128 | 8222 8222 0 0 0 -9
129 | 8230 8230 0 0 0 -9
130 | 8231 8231 0 0 0 -9
131 | 8233 8233 0 0 0 -9
132 | 8235 8235 0 0 0 -9
133 | 8236 8236 0 0 0 -9
134 | 8237 8237 0 0 0 -9
135 | 8239 8239 0 0 0 -9
136 | 8240 8240 0 0 0 -9
137 | 8241 8241 0 0 0 -9
138 | 8242 8242 0 0 0 -9
139 | 8243 8243 0 0 0 -9
140 | 8245 8245 0 0 0 -9
141 | 8247 8247 0 0 0 -9
142 | 8249 8249 0 0 0 -9
143 | 8254 8254 0 0 0 -9
144 | 8256 8256 0 0 0 -9
145 | 8258 8258 0 0 0 -9
146 | 8259 8259 0 0 0 -9
147 | 8264 8264 0 0 0 -9
148 | 8265 8265 0 0 0 -9
149 | 8266 8266 0 0 0 -9
150 | 8270 8270 0 0 0 -9
151 | 8271 8271 0 0 0 -9
152 | 8274 8274 0 0 0 -9
153 | 8275 8275 0 0 0 -9
154 | 8283 8283 0 0 0 -9
155 | 8284 8284 0 0 0 -9
156 | 8285 8285 0 0 0 -9
157 | 8290 8290 0 0 0 -9
158 | 8296 8296 0 0 0 -9
159 | 8297 8297 0 0 0 -9
160 | 8300 8300 0 0 0 -9
161 | 8306 8306 0 0 0 -9
162 | 8310 8310 0 0 0 -9
163 | 8311 8311 0 0 0 -9
164 | 8312 8312 0 0 0 -9
165 | 8313 8313 0 0 0 -9
166 | 8314 8314 0 0 0 -9
167 | 8323 8323 0 0 0 -9
168 | 8325 8325 0 0 0 -9
169 | 8326 8326 0 0 0 -9
170 | 8329 8329 0 0 0 -9
171 | 8334 8334 0 0 0 -9
172 | 8335 8335 0 0 0 -9
173 | 8337 8337 0 0 0 -9
174 | 8343 8343 0 0 0 -9
175 | 8351 8351 0 0 0 -9
176 | 8353 8353 0 0 0 -9
177 | 8354 8354 0 0 0 -9
178 | 8357 8357 0 0 0 -9
179 | 8365 8365 0 0 0 -9
180 | 8366 8366 0 0 0 -9
181 | 8369 8369 0 0 0 -9
182 | 8374 8374 0 0 0 -9
183 | 8376 8376 0 0 0 -9
184 | 8378 8378 0 0 0 -9
185 | 8387 8387 0 0 0 -9
186 | 8388 8388 0 0 0 -9
187 | 8389 8389 0 0 0 -9
188 | 8395 8395 0 0 0 -9
189 | 8411 8411 0 0 0 -9
190 | 8412 8412 0 0 0 -9
191 | 8420 8420 0 0 0 -9
192 | 8422 8422 0 0 0 -9
193 | 8423 8423 0 0 0 -9
194 | 8424 8424 0 0 0 -9
195 | 8426 8426 0 0 0 -9
196 | 8430 8430 0 0 0 -9
197 | 9057 9057 0 0 0 -9
198 | 9058 9058 0 0 0 -9
199 | 100000 100000 0 0 0 -9
200 | 


--------------------------------------------------------------------------------
/inst/extdata/pheno.txt:
--------------------------------------------------------------------------------
  1 | FID IID FT10
  2 | 6960 6960 47.0
  3 | 7517 7517 107.0
  4 | 6945 6945 55.0
  5 | 6914 6914 73.0
  6 | 6944 6944 49.0
  7 | 6939 6939 49.0
  8 | 6009 6009 98.0
  9 | 7518 7518 103.0
 10 | 6046 6046 93.0
 11 | 8325 8325 49.0
 12 | 7123 7123 59.5
 13 | 6899 6899 54.0
 14 | 8222 8222 90.0
 15 | 6968 6968 71.0
 16 | 7461 7461 61.0
 17 | 8423 8423 70.0
 18 | 6008 6008 60.0
 19 | 8313 8313 49.0
 20 | 8365 8365 51.0
 21 | 9058 9058 101.0
 22 | 8335 8335 104.0
 23 | 8343 8343 62.0
 24 | 8374 8374 59.0
 25 | 6956 6956 69.0
 26 | 6909 6909 51.0
 27 | 6042 6042 56.0
 28 | 6064 6064 96.0
 29 | 7525 7525 46.0
 30 | 6961 6961 46.0
 31 | 6974 6974 103.0
 32 | 7306 7306 60.0
 33 | 7255 7255 46.0
 34 | 7418 7418 63.0
 35 | 6928 6928 55.0
 36 | 7094 7094 58.5
 37 | 6900 6900 90.0
 38 | 8387 8387 52.0
 39 | 8247 8247 87.0
 40 | 7062 7062 46.0
 41 | 7282 7282 51.0
 42 | 8422 8422 106.0
 43 | 6985 6985 56.0
 44 | 6915 6915 53.0
 45 | 6958 6958 49.0
 46 | 6016 6016 75.0
 47 | 6929 6929 71.0
 48 | 7520 7520 60.0
 49 | 6933 6933 56.0
 50 | 8424 8424 46.0
 51 | 6926 6926 49.0
 52 | 7346 7346 64.0
 53 | 7296 7296 70.0
 54 | 7522 7522 83.0
 55 | 8369 8369 76.0
 56 | 6932 6932 51.0
 57 | 8237 8237 97.0
 58 | 7000 7000 65.0
 59 | 7477 7477 59.0
 60 | 8378 8378 56.0
 61 | 8329 8329 46.0
 62 | 6917 6917 121.0
 63 | 7460 7460 49.0
 64 | 6920 6920 71.0
 65 | 7081 7081 46.0
 66 | 8270 8270 49.0
 67 | 6980 6980 51.0
 68 | 8271 8271 49.0
 69 | 8230 8230 97.0
 70 | 5837 5837 57.0
 71 | 8310 8310 49.0
 72 | 8215 8215 51.0
 73 | 8258 8258 73.0
 74 | 6898 6898 41.0
 75 | 8411 8411 NA
 76 | 6910 6910 49.0
 77 | 7014 7014 92.0
 78 | 6927 6927 51.0
 79 | 6966 6966 53.0
 80 | 6959 6959 51.0
 81 | 8354 8354 70.0
 82 | 8264 8264 46.0
 83 | 6965 6965 102.0
 84 | 6973 6973 53.0
 85 | 6975 6975 51.0
 86 | 8236 8236 91.0
 87 | 8337 8337 70.0
 88 | 8420 8420 56.0
 89 | 8297 8297 73.0
 90 | 8231 8231 91.0
 91 | 8366 8366 NA
 92 | 8351 8351 78.0
 93 | 6943 6943 49.0
 94 | 6972 6972 63.0
 95 | 6942 6942 46.0
 96 | 6901 6901 86.0
 97 | 6936 6936 67.0
 98 | 8389 8389 63.0
 99 | 8395 8395 69.0
100 | 6918 6918 108.0
101 | 7033 7033 76.0
102 | 6976 6976 56.0
103 | 8239 8239 52.0
104 | 6040 6040 71.0
105 | 6919 6919 71.0
106 | 6981 6981 44.0
107 | 7516 7516 100.0
108 | 7147 7147 71.0
109 | 6969 6969 70.0
110 | 6921 6921 64.0
111 | 7524 7524 51.0
112 | 8241 8241 73.0
113 | 9057 9057 76.0
114 | 6979 6979 44.0
115 | 8326 8326 67.0
116 | 8412 8412 NA
117 | 8256 8256 61.0
118 | 7521 7521 60.0
119 | 6908 6908 49.0
120 | 8357 8357 NA
121 | 8296 8296 45.0
122 | 6946 6946 62.0
123 | 8242 8242 120.0
124 | 7231 7231 46.0
125 | 8284 8284 61.0
126 | 6962 6962 52.0
127 | 8235 8235 60.0
128 | 8353 8353 41.0
129 | 8259 8259 73.0
130 | 6923 6923 44.0
131 | 6906 6906 43.0
132 | 6967 6967 44.0
133 | 8285 8285 70.0
134 | 6970 6970 48.0
135 | 8240 8240 93.0
136 | 7064 7064 79.0
137 | 8306 8306 96.0
138 | 7519 7519 76.0
139 | 8274 8274 74.0
140 | 8283 8283 71.0
141 | 6916 6916 63.0
142 | 6924 6924 49.0
143 | 7515 7515 49.0
144 | 6043 6043 90.0
145 | 7526 7526 53.0
146 | 8243 8243 66.0
147 | 8300 8300 61.0
148 | 7514 7514 58.0
149 | 6911 6911 46.0
150 | 100000 100000 58.0
151 | 8388 8388 60.0
152 | 8275 8275 68.0
153 | 6931 6931 46.0
154 | 7275 7275 46.0
155 | 6983 6983 71.0
156 | 7163 7163 57.0
157 | 7438 7438 75.0
158 | 6963 6963 60.0
159 | 8334 8334 64.0
160 | 6951 6951 68.0
161 | 8430 8430 NA
162 | 6930 6930 49.0
163 | 8214 8214 51.0
164 | 8290 8290 50.0
165 | 8426 8426 49.0
166 | 8323 8323 51.0
167 | 6897 6897 62.0
168 | 8249 8249 81.0
169 | 6922 6922 48.0
170 | 8376 8376 84.0
171 | 6709 6709 52.0
172 | 8213 8213 44.0
173 | 8254 8254 52.0
174 | 8311 8311 49.0
175 | 6977 6977 49.0
176 | 6957 6957 84.0
177 | 6978 6978 49.0
178 | 8245 8245 46.0
179 | 6913 6913 99.0
180 | 6971 6971 51.0
181 | 6964 6964 93.0
182 | 6074 6074 91.0
183 | 6905 6905 65.0
184 | 8233 8233 59.0
185 | 7323 7323 56.0
186 | 6982 6982 49.0
187 | 8312 8312 66.0
188 | 6937 6937 65.0
189 | 6984 6984 53.0
190 | 6243 6243 56.0
191 | 7424 7424 43.0
192 | 7523 7523 57.0
193 | 6988 6988 48.0
194 | 8266 8266 99.0
195 | 6903 6903 57.0
196 | 6907 6907 58.0
197 | 8314 8314 64.0
198 | 6904 6904 66.0
199 | 8265 8265 44.0
200 | 6940 6940 49.0
201 | 


--------------------------------------------------------------------------------
/man/BGData-class.Rd:
--------------------------------------------------------------------------------
 1 | \docType{class}
 2 | \name{BGData-class}
 3 | \alias{BGData-class}
 4 | \alias{geno,BGData-method}
 5 | \alias{geno<-,BGData-method}
 6 | \alias{pheno,BGData-method}
 7 | \alias{pheno<-,BGData-method}
 8 | \alias{map,BGData-method}
 9 | \alias{map<-,BGData-method}
10 | \title{Container for Phenotype and Genotype Data}
11 | \description{
12 |     The BGData class is a container for genotypes, sample information, and
13 |     variant information. The class is inspired by the \code{.bed/.fam/.bim}
14 |     (binary) and \code{.ped/.fam/.map} (text) phenotype/genotype file formats
15 |     of \href{https://www.cog-genomics.org/plink2}{PLINK}. It is used by several
16 |     functions of this package such as \code{GWAS} for performing a Genome Wide
17 |     Association Study or \code{getG} for calculating a genomic relationship
18 |     matrix.
19 | }
20 | \details{
21 |     There are several ways to create an instance of this class:
22 | 
23 |     \itemize{
24 |         \item from arbitrary phenotype/genotype data using the \code{BGData}
25 |             constructor function.
26 |         \item from a .bed file using \code{as.BGData} and \code{BEDMatrix}.
27 |         \item from a previously saved \code{BGData} object using
28 |             \code{load.BGData}.
29 |         \item from multiple files (even a mixture of different file types)
30 |             using \code{LinkedMatrix}.
31 |         \item from a .raw file (or a .ped-like file) using
32 |             \code{readRAW}, \code{readRAW_matrix}, or
33 |             \code{readRAW_big.matrix}.
34 |     }
35 | 
36 |     A .ped file can be recoded to a .raw file in
37 |     \href{https://www.cog-genomics.org/plink2}{PLINK} using \code{plink --file
38 |     myfile --recodeA}, or converted to a .bed file using \code{plink --file
39 |     myfile --make-bed}. Conversely, a .bed file can be transformed back to a
40 |     .ped file using \code{plink --bfile myfile --recode} or to a .raw file
41 |     using \code{plink --bfile myfile --recodeA} without losing information.
42 | }
43 | \section{Accessors}{
44 |     In the following code snippets, \code{x} is a BGData object.
45 |     \describe{
46 |         \item{\code{geno(x)}, \code{geno(x) <- value}:}{
47 |             Get or set genotypes.
48 |         }
49 |         \item{\code{pheno(x)}, \code{pheno(x) <- value}:}{
50 |             Get or set sample information.
51 |         }
52 |         \item{\code{map(x)}, \code{map(x) <- value}:}{
53 |             Get or set variant information.
54 |         }
55 |     }
56 | }
57 | \seealso{
58 |     \code{\link{BGData}}, \code{\link{as.BGData}}, \code{\link{load.BGData}},
59 |     \code{\link{readRAW}} to create \code{BGData} objects.
60 | 
61 |     \code{\link[LinkedMatrix]{LinkedMatrix-class}} and
62 |     \code{\link[BEDMatrix]{BEDMatrix-class}} for more information on the above
63 |     mentioned classes.
64 | }
65 | \examples{
66 | X <- matrix(data = rnorm(100), nrow = 10, ncol = 10)
67 | Y <- data.frame(y = runif(10))
68 | MAP <- data.frame(means = colMeans(X), freqNA = colMeans(is.na(X)))
69 | DATA <- BGData(geno = X, pheno = Y, map = MAP)
70 | 
71 | dim(geno(DATA))
72 | head(pheno(DATA))
73 | head(map(DATA))
74 | }
75 | 


--------------------------------------------------------------------------------
/man/BGData-package.Rd:
--------------------------------------------------------------------------------
 1 | \docType{package}
 2 | \name{BGData-package}
 3 | \alias{BGData-package}
 4 | \title{A Suite of Packages for Analysis of Big Genomic Data}
 5 | \description{
 6 |     Modern genomic datasets are big (large \emph{n}), high-dimensional (large
 7 |     \emph{p}), and multi-layered. The challenges that need to be addressed are
 8 |     memory requirements and computational demands. Our goal is to develop
 9 |     software that will enable researchers to carry out analyses with big
10 |     genomic data within the R environment.
11 | }
12 | \details{
13 |     We have identified several approaches to tackle those challenges within R:
14 | 
15 |     \itemize{
16 |         \item File-backed matrices: The data is stored in on the hard drive and
17 |             users can read in smaller chunks when they are needed.
18 |         \item Linked arrays: For very large datasets a single file-backed array
19 |             may not be enough or convenient. A linked array is an array whose
20 |             content is distributed over multiple file-backed nodes.
21 |         \item Multiple dispatch: Methods are presented to users so that they
22 |             can treat these arrays pretty much as if they were RAM arrays.
23 |         \item Multi-level parallelism: Exploit multi-core and multi-node
24 |             computing.
25 |         \item Inputs: Users can create these arrays from standard formats
26 |             (e.g., PLINK .bed).
27 |     }
28 | 
29 |     The \code{BGData} package is an umbrella package that comprises several
30 |     packages: \code{BEDMatrix}, \code{LinkedMatrix}, and \code{symDMatrix}. It
31 |     features scalable and efficient computational methods for large genomic
32 |     datasets such as genome-wide association studies (GWAS) or genomic
33 |     relationship matrices (G matrix). It also contains a container class called
34 |     \code{BGData} that holds genotypes, sample information, and variant
35 |     information.
36 | }
37 | \section{Example dataset}{
38 |     The \code{extdata} folder contains example files that were generated from
39 |     the 250k SNP and phenotype data in
40 |     \href{https://www.nature.com/articles/nature08800}{Atwell et al. (2010)}.
41 |     Only the first 300 SNPs of chromosome 1, 2, and 3 were included to keep the
42 |     size of the example dataset small.
43 |     \href{https://www.cog-genomics.org/plink2}{PLINK} was used to convert the
44 |     data to \href{https://www.cog-genomics.org/plink2/input#bed}{.bed} and
45 |     \href{https://www.cog-genomics.org/plink2/input#raw}{.raw} files.
46 |     \code{FT10} has been chosen as a phenotype and is provided as an
47 |     \href{https://www.cog-genomics.org/plink2/input#pheno}{alternate phenotype
48 |     file}. The file is intentionally shuffled to demonstrate that the
49 |     additional phenotypes are put in the same order as the rest of the
50 |     phenotypes.
51 | }
52 | \seealso{
53 |     \code{\link[BEDMatrix]{BEDMatrix-package}},
54 |     \code{\link[LinkedMatrix]{LinkedMatrix-package}}, and
55 |     \code{\link[symDMatrix]{symDMatrix-package}} for an introduction to the
56 |     respective packages.
57 | 
58 |     \code{\link{file-backed-matrices}} for more information on file-backed
59 |     matrices. \code{\link{multi-level-parallelism}} for more information on
60 |     multi-level parallelism.
61 | }
62 | 


--------------------------------------------------------------------------------
/man/BGData.Rd:
--------------------------------------------------------------------------------
 1 | \name{BGData}
 2 | \alias{BGData}
 3 | \title{Creates a New BGData Instance}
 4 | \description{
 5 |     This function constructs a new \code{BGData} object.
 6 | }
 7 | \usage{
 8 | BGData(geno, pheno = NULL, map = NULL)
 9 | }
10 | \arguments{
11 |     \item{geno}{
12 |         A \code{geno} object that contains genotypes.
13 |     }
14 |     \item{pheno}{
15 |         A \code{data.frame} that contains sample information (including
16 |         phenotypes). A stub that only contains a \code{sample_id} column
17 |         populated with either the rownames of \code{geno} or a sequence
18 |         starting with \code{sample_} will be generated if \code{NULL}
19 |     }
20 |     \item{map}{
21 |         A \code{data.frame} that contains variant information. A stub that only
22 |         contains a \code{variant_id} column populated with either the colnames
23 |         of \code{geno} or a sequence starting with \code{variant_} will be
24 |         generated if \code{NULL}
25 |     }
26 | }
27 | \seealso{
28 |      \code{\link{BGData-class}} and \code{\link{geno-class}} for more
29 |      information on the above mentioned classes.
30 | }
31 | 


--------------------------------------------------------------------------------
/man/FWD.Rd:
--------------------------------------------------------------------------------
 1 | \name{FWD}
 2 | \alias{FWD}
 3 | \title{Performs Forward Regressions}
 4 | \description{
 5 |     Performs forward regression of \code{y} on the columns of \code{X}.
 6 |     Predictors are added, one at a time, each time adding the one that produces
 7 |     the largest reduction in the residual sum of squares (RSS). The function
 8 |     returns estimates and summaries for the entire forward search. This
 9 |     function performs a similar search than that of \code{step(,
10 |         direction='forward')}, however, \code{FWD()} is optimized for
11 |     computational speed for linear models with very large sample size. To
12 |     achieve fast computations, the software first computes the sufficient
13 |     statistics X'X and X'y. At each step, the function first finds the
14 |     predictor that produces the largest reduction in the sum of squares (this
15 |     can be derived from X'X, X'y and the current solution of effects), and then
16 |     updates the estimates of effects for the resulting model using Gauss Seidel
17 |     iterations performed on the linear system (X'X)b=X'y, iterating only over
18 |     the elements of b that are active in the model.
19 | }
20 | \usage{
21 | FWD(y, X, df = 20, tol = 1e-7, maxIter = 1000, centerImpute = TRUE,
22 |     verbose = TRUE)
23 | }
24 | \arguments{
25 |     \item{y}{
26 |         The response vector (numeric nx1).
27 |     }
28 |     \item{X}{
29 |         An (nxp) numeric matrix. Columns are the features (aka predictors)
30 |         considered in the forward search. The rows of \code{X} must be matched
31 |         to the entries of \code{y}.
32 |     }
33 |     \item{df}{
34 |         Defines the maximum number of predictors to be included in the model.
35 |         For complete forward search, set \code{df = ncol(X)}.
36 |     }
37 |     \item{tol}{
38 |         A tolerance parameter to control when to stop the Gauss Seidel
39 |         algorithm.
40 |     }
41 |     \item{maxIter}{
42 |         The maximum number of iterations for the Gauss Seidel algorithm (only
43 |         used when the algorithm is not stopped by the tolerance parameter).
44 |     }
45 |     \item{centerImpute}{
46 |         Whether to center the columns of \code{X} and impute the missing values
47 |         with the column means.
48 |     }
49 |     \item{verbose}{
50 |         Use \code{verbose = TRUE} to print summaries of the forward search.
51 |     }
52 | }
53 | \value{
54 |     A list with two entries:
55 |     \itemize{
56 |         \item \code{B}: (pxdf+1) includes the estimated effects for each
57 |         predictor (rows) at each step of the forward search (df, in columns).
58 |         \item \code{path}: A data frame providing the order in which variables
59 |         were added to the model (\code{variable}) and statistics for each step
60 |         of the forward search (\code{RSS}, \code{LogLik}, \code{VARE} (the
61 |         residual variance), \code{DF}, \code{AIC}, and \code{BIC}).
62 |     }
63 | }
64 | 


--------------------------------------------------------------------------------
/man/GWAS.Rd:
--------------------------------------------------------------------------------
  1 | \name{GWAS}
  2 | \alias{GWAS}
  3 | \title{Performs Single Marker Regressions Using BGData Objects}
  4 | \description{
  5 |     Implements single marker regressions. The regression model includes all the
  6 |     covariates specified in the right-hand-side of the \code{formula} plus one
  7 |     column of the genotypes at a time. The data from the association tests is
  8 |     obtained from a \code{BGData} object.
  9 | }
 10 | \usage{
 11 | GWAS(formula, data, method = "lsfit", i = seq_len(nrow(geno(data))),
 12 |   j = seq_len(ncol(geno(data))), chunkSize = 5000L,
 13 |   nCores = getOption("mc.cores", 2L), verbose = FALSE, ...)
 14 | }
 15 | \arguments{
 16 |     \item{formula}{
 17 |         The formula for the GWAS model without the variant, e.g. \code{y ~ 1}
 18 |         or \code{y ~ factor(sex) + age}. The variables included in the formula
 19 |         must be column names in the sample information of the \code{BGData}
 20 |         object.
 21 |     }
 22 |     \item{data}{
 23 |         A \code{BGData} object.
 24 |     }
 25 |     \item{method}{
 26 |         The regression method to be used. Currently, the following methods are
 27 |         implemented: \code{rayOLS} (see below), \code{lsfit}, \code{lm},
 28 |         \code{lm.fit}, \code{glm}, \code{lmer}, and \code{SKAT}. Defaults to
 29 |         \code{lsfit}.
 30 |     }
 31 |     \item{i}{
 32 |         Indicates which rows of the genotypes should be used. Can be integer,
 33 |         boolean, or character. By default, all rows are used.
 34 |     }
 35 |     \item{j}{
 36 |         Indicates which columns of the genotypes should be used. Can be
 37 |         integer, boolean, or character. By default, all columns are used.
 38 |     }
 39 |     \item{chunkSize}{
 40 |         The number of columns of the genotypes that are brought into physical
 41 |         memory for processing per core. If \code{NULL}, all elements in
 42 |         \code{j} are used. Defaults to 5000.
 43 |     }
 44 |     \item{nCores}{
 45 |         The number of cores (passed to \code{mclapply}). Defaults to the number
 46 |         of cores as detected by \code{detectCores}.
 47 |     }
 48 |     \item{verbose}{
 49 |         Whether progress updates will be posted. Defaults to \code{FALSE}.
 50 |     }
 51 |     \item{...}{
 52 |         Additional arguments for chunkedApply and regression method.
 53 |     }
 54 | }
 55 | \details{
 56 |     The \code{rayOLS} method is a regression through the origin that can only
 57 |     be used with a \code{y ~ 1} formula, i.e. it only allows for one
 58 |     quantitative response variable \code{y} and one variant at a time as an
 59 |     explanatory variable (the variant is not included in the formula, hence
 60 |     \code{1} is used as a dummy). If covariates are needed, consider
 61 |     preadjustment of \code{y}. Among the provided methods, it is by far the
 62 |     fastest.
 63 | 
 64 |     Some regression methods may require the data to not contain columns with
 65 |     variance 0 or too many missing values. We suggest running \code{summarize}
 66 |     to detect variants that do not clear the desired minor-allele frequency and
 67 |     rate of missing genotype calls, and filtering these variants out using the
 68 |     \code{j} parameter of the \code{GWAS} function (see example below).
 69 | }
 70 | \value{
 71 |     The same matrix that would be returned by \code{coef(summary(model))}.
 72 | }
 73 | \seealso{
 74 |     \code{\link{file-backed-matrices}} for more information on file-backed
 75 |     matrices. \code{\link{multi-level-parallelism}} for more information on
 76 |     multi-level parallelism. \code{\link{BGData-class}} for more information on
 77 |     the \code{BGData} class. \code{\link[stats]{lsfit}},
 78 |     \code{\link[stats]{lm}}, \code{\link[stats]{lm.fit}},
 79 |     \code{\link[stats]{glm}}, \code{\link[lme4]{lmer}}, and
 80 |     \code{\link[SKAT]{SKAT}} for more information on regression methods.
 81 | }
 82 | \examples{
 83 | # Restrict number of cores to 1 on Windows
 84 | if (.Platform$OS.type == "windows") {
 85 |     options(mc.cores = 1)
 86 | }
 87 | 
 88 | # Load example data
 89 | bg <- BGData:::loadExample()
 90 | 
 91 | # Detect variants that do not pass MAF and missingness thresholds
 92 | summaries <- summarize(geno(bg))
 93 | maf <- ifelse(summaries$allele_freq > 0.5, 1 - summaries$allele_freq,
 94 |     summaries$allele_freq)
 95 | exclusions <- maf < 0.01 | summaries$freq_na > 0.05
 96 | 
 97 | # Perform a single marker regression
 98 | res1 <- GWAS(formula = FT10 ~ 1, data = bg, j = !exclusions)
 99 | 
100 | # Draw a Manhattan plot
101 | plot(-log10(res1[, 4]))
102 | 
103 | # Use lm instead of lsfit (the default)
104 | res2 <- GWAS(formula = FT10 ~ 1, data = bg, method = "lm", j = !exclusions)
105 | 
106 | # Use glm instead of lsfit (the default)
107 | y <- pheno(bg)$FT10
108 | pheno(bg)$FT10.01 <- y > quantile(y, 0.8, na.rm = TRUE)
109 | res3 <- GWAS(formula = FT10.01 ~ 1, data = bg, method = "glm", j = !exclusions)
110 | 
111 | # Perform a single marker regression on the first 50 markers (useful for
112 | # distributed computing)
113 | res4 <- GWAS(formula = FT10 ~ 1, data = bg, j = 1:50)
114 | }
115 | 


--------------------------------------------------------------------------------
/man/as.BGData.Rd:
--------------------------------------------------------------------------------
 1 | \name{as.BGData}
 2 | \alias{as.BGData}
 3 | \alias{as.BGData.BEDMatrix}
 4 | \alias{as.BGData.ColumnLinkedMatrix}
 5 | \alias{as.BGData.RowLinkedMatrix}
 6 | \title{Convert Other Objects to BGData Objects}
 7 | \description{
 8 |     Converts other objects to \code{BGData} objects by loading supplementary
 9 |     phenotypes and map files referenced by the object to be used for the sample
10 |     information and variant information, respectively.
11 | 
12 |     Currently supported are \code{BEDMatrix} objects, plain or nested in
13 |     \code{ColumnLinkedMatrix} objects.
14 | }
15 | \usage{
16 | as.BGData(x, alternatePhenotypeFile = NULL, ...)
17 | 
18 | \method{as.BGData}{BEDMatrix}(x, alternatePhenotypeFile = NULL, ...)
19 | 
20 | \method{as.BGData}{ColumnLinkedMatrix}(x, alternatePhenotypeFile = NULL,
21 |   ...)
22 | 
23 | \method{as.BGData}{RowLinkedMatrix}(x, alternatePhenotypeFile = NULL,
24 |   ...)
25 | }
26 | \arguments{
27 |     \item{x}{
28 |         An object. Currently supported are \code{BEDMatrix} objects, plain or
29 |         nested in \code{ColumnLinkedMatrix} objects.
30 |     }
31 |     \item{alternatePhenotypeFile}{
32 |         Path to an
33 |         \href{https://www.cog-genomics.org/plink2/input#pheno}{alternate
34 |         phenotype file}.
35 |     }
36 |     \item{...}{
37 |         Additional arguments to the \code{read.table} or \code{fread} call (if
38 |         data.table package is installed) call to parse the alternate pheno
39 |         file.
40 |     }
41 | }
42 | \details{
43 |     The .ped and .raw formats only allows for a single phenotype. If more
44 |     phenotypes are required it is possible to store them in an
45 |     \href{https://www.cog-genomics.org/plink2/input#pheno}{alternate phenotype
46 |     file}. The path to such a file can be provided with
47 |     \code{alternatePhenotypeFile} and will be merged with the existing sample
48 |     information. The first and second columns of that file must contain family
49 |     and within-family IDs, respectively.
50 | 
51 |     For \code{BEDMatrix} objects: If a .fam file (which corresponds to the
52 |     first six columns of a .ped or .raw file) of the same name and in the same
53 |     directory as the .bed file exists, the sample information will be populated
54 |     with the data stored in that file. Otherwise a stub that only contains an
55 |     \code{IID} column populated with the rownames of \code{geno(x)} will be
56 |     generated. The same will happen for a .bim file for the variant
57 |     information.
58 | 
59 |     For \code{ColumnLinkedMatrix} objects: See the case for \code{BEDMatrix}
60 |     objects, but only the .fam file of the first node of the
61 |     \code{LinkedMatrix} will be read and used for the sample information, and
62 |     the .bim files of all nodes will be combined and used for the variant
63 |     information.
64 | }
65 | \value{
66 |     A \code{BGData} object.
67 | }
68 | \seealso{
69 |     \code{\link[=readRAW]{readRAW()}} to convert text files to \code{BGData}
70 |     objects. \code{\link{BGData-class}},
71 |     \code{\link[BEDMatrix]{BEDMatrix-class}},
72 |     \code{\link[LinkedMatrix]{ColumnLinkedMatrix-class}} for more information
73 |     on the above mentioned classes. \code{\link[utils]{read.table}} and
74 |     \code{\link[data.table]{fread}} to learn more about extra arguments that
75 |     can be passed via \code{...}.
76 | }
77 | \examples{
78 | # Path to example data
79 | path <- system.file("extdata", package = "BGData")
80 | 
81 | # Convert a single BEDMatrix object to a BGData object
82 | chr1 <- BEDMatrix::BEDMatrix(paste0(path, "/chr1.bed"))
83 | bg1 <- as.BGData(chr1)
84 | 
85 | # Convert multiple BEDMatrix objects in a ColumnLinkedMatrix to a BGData object
86 | chr2 <- BEDMatrix::BEDMatrix(paste0(path, "/chr2.bed"))
87 | chr3 <- BEDMatrix::BEDMatrix(paste0(path, "/chr3.bed"))
88 | clm <- ColumnLinkedMatrix(chr1, chr2, chr3)
89 | bg2 <- as.BGData(clm)
90 | 
91 | # Load additional (alternate) phenotypes
92 | bg3 <- as.BGData(clm, alternatePhenotypeFile = paste0(path, "/pheno.txt"))
93 | }
94 | 


--------------------------------------------------------------------------------
/man/chunkedApply.Rd:
--------------------------------------------------------------------------------
 1 | \name{chunkedApply}
 2 | \alias{chunkedApply}
 3 | \title{Applies a Function on Each Row or Column of a File-Backed Matrix}
 4 | \description{
 5 |     Similar to \code{apply}, but designed for file-backed matrices. The
 6 |     function brings chunks of an object into physical memory by taking subsets,
 7 |     and applies a function on either the rows or the columns of the chunks
 8 |     using an optimized version of \code{apply}. If \code{nCores} is greater
 9 |     than 1, the function will be applied in parallel using \code{mclapply}. In
10 |     that case the subsets of the object are taken on the slaves.
11 | }
12 | \usage{
13 | chunkedApply(X, MARGIN, FUN, i = seq_len(nrow(X)),
14 |   j = seq_len(ncol(X)), chunkSize = 5000L,
15 |   nCores = getOption("mc.cores", 2L), verbose = FALSE, ...)
16 | }
17 | \arguments{
18 |     \item{X}{
19 |         A file-backed matrix, typically the genotypes of a \code{BGData}
20 |         object.
21 |     }
22 |     \item{MARGIN}{
23 |         The subscripts which the function will be applied over. 1 indicates
24 |         rows, 2 indicates columns.
25 |     }
26 |     \item{FUN}{
27 |         The function to be applied.
28 |     }
29 |     \item{i}{
30 |         Indicates which rows of \code{X} should be used. Can be integer,
31 |         boolean, or character. By default, all rows are used.
32 |     }
33 |     \item{j}{
34 |         Indicates which columns of \code{X} should be used. Can be integer,
35 |         boolean, or character. By default, all columns are used.
36 |     }
37 |     \item{chunkSize}{
38 |         The number of rows or columns of \code{X} that are brought into
39 |         physical memory for processing per core. If \code{NULL}, all elements
40 |         in \code{i} or \code{j} are used. Defaults to 5000.
41 |     }
42 |     \item{nCores}{
43 |         The number of cores (passed to \code{mclapply}). Defaults to the number
44 |         of cores as detected by \code{detectCores}.
45 |     }
46 |     \item{verbose}{
47 |         Whether progress updates will be posted. Defaults to \code{FALSE}.
48 |     }
49 |     \item{...}{
50 |         Additional arguments to be passed to the \code{apply} like function.
51 |     }
52 | }
53 | \seealso{
54 |     \code{\link{file-backed-matrices}} for more information on file-backed
55 |     matrices. \code{\link{multi-level-parallelism}} for more information on
56 |     multi-level parallelism. \code{\link{BGData-class}} for more information on
57 |     the \code{BGData} class.
58 | }
59 | \examples{
60 | # Restrict number of cores to 1 on Windows
61 | if (.Platform$OS.type == "windows") {
62 |     options(mc.cores = 1)
63 | }
64 | 
65 | # Load example data
66 | bg <- BGData:::loadExample()
67 | 
68 | # Compute standard deviation of columns
69 | chunkedApply(X = geno(bg), MARGIN = 2, FUN = sd)
70 | }
71 | 


--------------------------------------------------------------------------------
/man/chunkedMap.Rd:
--------------------------------------------------------------------------------
 1 | \name{chunkedMap}
 2 | \alias{chunkedMap}
 3 | \title{Applies a Function on Each Chunk of a File-Backed Matrix}
 4 | \description{
 5 |     Similar to \code{lapply}, but designed for file-backed matrices. The
 6 |     function brings chunks of an object into physical memory by taking subsets,
 7 |     and applies a function on them. If \code{nCores} is greater than 1, the
 8 |     function will be applied in parallel using \code{mclapply}. In that case
 9 |     the subsets of the object are taken on the slaves.
10 | }
11 | \usage{
12 | chunkedMap(X, FUN, i = seq_len(nrow(X)), j = seq_len(ncol(X)),
13 |   chunkBy = 2L, chunkSize = 5000L, nCores = getOption("mc.cores",
14 |   2L), verbose = FALSE, ...)
15 | }
16 | \arguments{
17 |     \item{X}{
18 |         A file-backed matrix, typically the genotypes of a \code{BGData}
19 |         object.
20 |     }
21 |     \item{FUN}{
22 |         The function to be applied on each chunk.
23 |     }
24 |     \item{i}{
25 |         Indicates which rows of \code{X} should be used. Can be integer,
26 |         boolean, or character. By default, all rows are used.
27 |     }
28 |     \item{j}{
29 |         Indicates which columns of \code{X} should be used. Can be integer,
30 |         boolean, or character. By default, all columns are used.
31 |     }
32 |     \item{chunkBy}{
33 |         Whether to extract chunks by rows (1) or by columns (2). Defaults to
34 |         columns (2).
35 |     }
36 |     \item{chunkSize}{
37 |         The number of rows or columns of \code{X} that are brought into
38 |         physical memory for processing per core. If \code{NULL}, all elements
39 |         in \code{i} or \code{j} are used. Defaults to 5000.
40 |     }
41 |     \item{nCores}{
42 |         The number of cores (passed to \code{mclapply}). Defaults to the number
43 |         of cores as detected by \code{detectCores}.
44 |     }
45 |     \item{verbose}{
46 |         Whether progress updates will be posted. Defaults to \code{FALSE}.
47 |     }
48 |     \item{...}{
49 |         Additional arguments to be passed to the
50 |         \code{apply} like function.
51 |     }
52 | }
53 | \seealso{
54 |     \code{\link{file-backed-matrices}} for more information on file-backed
55 |     matrices. \code{\link{multi-level-parallelism}} for more information on
56 |     multi-level parallelism. \code{\link{BGData-class}} for more information on
57 |     the \code{BGData} class.
58 | }
59 | \examples{
60 | # Restrict number of cores to 1 on Windows
61 | if (.Platform$OS.type == "windows") {
62 |     options(mc.cores = 1)
63 | }
64 | 
65 | # Load example data
66 | bg <- BGData:::loadExample()
67 | 
68 | # Compute column sums of each chunk
69 | chunkedMap(X = geno(bg), FUN = colSums)
70 | }
71 | 


--------------------------------------------------------------------------------
/man/file-backed-matrices.Rd:
--------------------------------------------------------------------------------
 1 | \name{file-backed-matrices}
 2 | \alias{file-backed-matrices}
 3 | \title{File-Backed Matrices}
 4 | \description{
 5 |     Functions with the \code{chunkSize} parameter work best with file-backed
 6 |     matrices such as \code{BEDMatrix} objects. To avoid loading the whole,
 7 |     potentially very large matrix into memory, these functions will load chunks
 8 |     of the file-backed matrix into memory and perform the operations on one
 9 |     chunk at a time. The size of the chunks is determined by the
10 |     \code{chunkSize} parameter. Care must be taken to not set \code{chunkSize}
11 |     too high to avoid memory shortage, particularly when combined with parallel
12 |     computing.
13 | }
14 | \seealso{
15 |     \code{\link[BEDMatrix]{BEDMatrix-class}} as an example of a file-backed
16 |     matrix.
17 | }
18 | 


--------------------------------------------------------------------------------
/man/findRelated.Rd:
--------------------------------------------------------------------------------
 1 | \name{findRelated}
 2 | \alias{findRelated}
 3 | \alias{findRelated.matrix}
 4 | \alias{findRelated.symDMatrix}
 5 | \title{Find related individuals in a relationship matrix}
 6 | \description{
 7 |     Find related individuals in a relationship matrix.
 8 | }
 9 | \usage{
10 | findRelated(x, ...)
11 | 
12 | \method{findRelated}{matrix}(x, cutoff = 0.03, ...)
13 | 
14 | \method{findRelated}{symDMatrix}(x, cutoff = 0.03, verbose = FALSE,
15 |   ...)
16 | }
17 | \arguments{
18 |     \item{x}{
19 |         A matrix-like object with dimnames.
20 |     }
21 |     \item{...}{
22 |         Additional arguments for methods.
23 |     }
24 |     \item{cutoff}{
25 |         The cutoff between 0 and 1 for related individuals to be included in
26 |         the output. Defaults to 0.03.
27 |     }
28 |     \item{verbose}{
29 |         Whether progress updates will be posted. Defaults to \code{FALSE}.
30 |     }
31 | }
32 | \value{
33 |     A vector of names or indices of related individuals.
34 | }
35 | \section{Methods (by class)}{
36 |     \itemize{
37 |         \item \code{matrix}: Find related individuals in matrices
38 |         \item \code{symDMatrix}: Find related individuals in symDMatrix objects
39 |     }
40 | }
41 | \examples{
42 | # Load example data
43 | bg <- BGData:::loadExample()
44 | 
45 | G <- getG(geno(bg))
46 | findRelated(G)
47 | }
48 | 


--------------------------------------------------------------------------------
/man/geno-class.Rd:
--------------------------------------------------------------------------------
 1 | \docType{class}
 2 | \name{geno-class}
 3 | \alias{geno-class}
 4 | \title{An Abstract S4 Class Union of Matrix-Like Types}
 5 | \description{
 6 |     \code{geno} is a class union of several matrix-like types, many of
 7 |     them suitable for very large datasets.
 8 | 
 9 |     Currently supported are \code{LinkedMatrix}, \code{BEDMatrix},
10 |     \code{big.matrix},\code{ff_matrix}, and \code{matrix}.
11 | }
12 | \seealso{
13 |     \code{\link[LinkedMatrix]{LinkedMatrix-class}},
14 |     \code{\link[BEDMatrix]{BEDMatrix-class}},
15 |     \code{\link[bigmemory]{big.matrix-class}}, \code{\link[ff]{ff}}, and
16 |     \code{\link[base]{matrix}} for more information on each matrix-like type.
17 | 
18 |     \code{\link{BGData-class}} for more information on the \code{BGData} class,
19 |     in particular its \code{geno} accessor that accepts \code{geno} objects.
20 | }
21 | 


--------------------------------------------------------------------------------
/man/geno.Rd:
--------------------------------------------------------------------------------
 1 | \name{geno}
 2 | \alias{geno}
 3 | \alias{geno<-}
 4 | \alias{pheno}
 5 | \alias{pheno<-}
 6 | \alias{map}
 7 | \alias{map<-}
 8 | \title{Getting/Setting Genotypes, Sample Information, and Variant Information}
 9 | \description{
10 |     A set of generic functions for getting/setting the genotypes, sample
11 |     information, and variant information.
12 | }
13 | \usage{
14 | geno(x)
15 | geno(x) <- value
16 | 
17 | pheno(x)
18 | pheno(x) <- value
19 | 
20 | map(x)
21 | map(x) <- value
22 | }
23 | \arguments{
24 |     \item{x}{
25 |         The object from/on which to get/set genotypes, sample information, and
26 |         variant information. Typically a \code{BGData} object.
27 |     }
28 |     \item{value}{
29 |         Typically a \code{geno} object for the \code{geno} setter.
30 | 
31 |         Typically a \code{data.frame} object for the \code{pheno} setter.
32 | 
33 |         Typically a \code{data.frame} object for the \code{map} setter.
34 |     }
35 | }
36 | \seealso{
37 |     \itemize{
38 |         \item \code{\link{BGData-class}}
39 |         \item \code{\link{geno-class}}
40 |     }
41 | }
42 | \examples{
43 | # Load example data
44 | bg <- BGData:::loadExample()
45 | 
46 | # Access genotypes
47 | geno(bg)
48 | 
49 | # Access sample information
50 | pheno(bg)
51 | 
52 | # Access variant information
53 | map(bg)
54 | }
55 | \keyword{methods}
56 | 


--------------------------------------------------------------------------------
/man/getG.Rd:
--------------------------------------------------------------------------------
  1 | \name{getG}
  2 | \alias{getG}
  3 | \title{Computes a Genomic Relationship Matrix}
  4 | \description{
  5 |     Computes a positive semi-definite symmetric genomic relation matrix G=XX'
  6 |     offering options for centering and scaling the columns of \code{X}
  7 |     beforehand.
  8 | }
  9 | \usage{
 10 | getG(X, center = TRUE, scale = TRUE, impute = TRUE, scaleG = TRUE,
 11 |   minVar = 1e-05, i = seq_len(nrow(X)), j = seq_len(ncol(X)), i2 = NULL,
 12 |   chunkSize = 5000L, nCores = getOption("mc.cores", 2L), verbose = FALSE)
 13 | }
 14 | \arguments{
 15 |     \item{X}{
 16 |         A matrix-like object, typically the genotypes of a \code{BGData}
 17 |         object.
 18 |     }
 19 |     \item{center}{
 20 |         Either a logical value or a numeric vector of length equal to the
 21 |         number of columns of \code{X}. Numeric vector required if \code{i2} is
 22 |         used. If \code{FALSE}, no centering is done. Defaults to \code{TRUE}.
 23 |     }
 24 |     \item{scale}{
 25 |         Either a logical value or a numeric vector of length equal to the
 26 |         number of columns of \code{X}. Numeric vector required if \code{i2} is
 27 |         used. If \code{FALSE}, no scaling is done. Defaults to \code{TRUE}.
 28 |     }
 29 |     \item{impute}{
 30 |         Indicates whether missing values should be imputed. Defaults to
 31 |         \code{TRUE}.
 32 |     }
 33 |     \item{scaleG}{
 34 |         Whether XX' should be scaled. Defaults to \code{TRUE}.
 35 |     }
 36 |     \item{minVar}{
 37 |         Columns with variance lower than this value will not be used in the
 38 |         computation (only if \code{scale} is not \code{FALSE}).
 39 |     }
 40 |     \item{i}{
 41 |         Indicates which rows of \code{X} should be used. Can be integer,
 42 |         boolean, or character. By default, all rows are used.
 43 |     }
 44 |     \item{j}{
 45 |         Indicates which columns of \code{X} should be used. Can be integer,
 46 |         boolean, or character. By default, all columns are used.
 47 |     }
 48 |     \item{i2}{
 49 |         Indicates which rows should be used to compute a block of the genomic
 50 |         relationship matrix. Will compute XY' where X is determined by \code{i}
 51 |         and \code{j} and Y by \code{i2} and \code{j}. Can be integer, boolean,
 52 |         or character. If \code{NULL}, the whole genomic relationship matrix XX'
 53 |         is computed. Defaults to \code{NULL}.
 54 |     }
 55 |     \item{chunkSize}{
 56 |         The number of columns of \code{X} that are brought into physical memory
 57 |         for processing per core. If \code{NULL}, all columns of \code{X} are
 58 |         used. Defaults to 5000.
 59 |     }
 60 |     \item{nCores}{
 61 |         The number of cores (passed to \code{mclapply}). Defaults to the number
 62 |         of cores as detected by \code{detectCores}.
 63 |     }
 64 |     \item{verbose}{
 65 |         Whether progress updates will be posted. Defaults to \code{FALSE}.
 66 |     }
 67 | }
 68 | \details{
 69 |     If \code{center = FALSE}, \code{scale = FALSE} and \code{scaleG = FALSE},
 70 |     \code{getG} produces the same outcome than \code{tcrossprod}.
 71 | }
 72 | \value{
 73 |     A positive semi-definite symmetric numeric matrix.
 74 | }
 75 | \seealso{
 76 |     \code{\link{file-backed-matrices}} for more information on file-backed
 77 |     matrices. \code{\link{multi-level-parallelism}} for more information on
 78 |     multi-level parallelism. \code{\link{BGData-class}} for more information on
 79 |     the \code{BGData} class.
 80 | }
 81 | \examples{
 82 | # Restrict number of cores to 1 on Windows
 83 | if (.Platform$OS.type == "windows") {
 84 |     options(mc.cores = 1)
 85 | }
 86 | 
 87 | # Load example data
 88 | bg <- BGData:::loadExample()
 89 | 
 90 | # Compute a scaled genomic relationship matrix from centered and scaled
 91 | # genotypes
 92 | g1 <- getG(X = geno(bg))
 93 | 
 94 | # Disable scaling of G
 95 | g2 <- getG(X = geno(bg), scaleG = FALSE)
 96 | 
 97 | # Disable centering of genotypes
 98 | g3 <- getG(X = geno(bg), center = FALSE)
 99 | 
100 | # Disable scaling of genotypes
101 | g4 <- getG(X = geno(bg), scale = FALSE)
102 | 
103 | # Provide own scales
104 | scales <- chunkedApply(X = geno(bg), MARGIN = 2, FUN = sd)
105 | g4 <- getG(X = geno(bg), scale = scales)
106 | 
107 | # Provide own centers
108 | centers <- chunkedApply(X = geno(bg), MARGIN = 2, FUN = mean)
109 | g5 <- getG(X = geno(bg), center = centers)
110 | 
111 | # Only use the first 50 individuals (useful to account for population structure)
112 | g6 <- getG(X = geno(bg), i = 1:50)
113 | 
114 | # Only use the first 100 markers (useful to ignore some markers)
115 | g7 <- getG(X = geno(bg), j = 1:100)
116 | 
117 | # Compute unscaled G matrix by combining blocks of $XX_{i2}'$ where $X_{i2}$ is
118 | # a horizontal partition of X. This is useful for distributed computing as each
119 | # block can be computed in parallel. Centers and scales need to be precomputed.
120 | block1 <- getG(X = geno(bg), i2 = 1:100, center = centers, scale = scales)
121 | block2 <- getG(X = geno(bg), i2 = 101:199, center = centers, scale = scales)
122 | g8 <- cbind(block1, block2)
123 | 
124 | # Compute unscaled G matrix by combining blocks of $X_{i}X_{i2}'$ where both
125 | # $X_{i}$ and $X_{i2}$ are horizontal partitions of X. Similarly to the example
126 | # above, this is useful for distributed computing, in particular to compute
127 | # very large G matrices. Centers and scales need to be precomputed. This
128 | # approach is similar to the one taken by the symDMatrix package, but the
129 | # symDMatrix package adds memory-mapped blocks, only stores the upper side of
130 | # the triangular matrix, and provides a type that allows for indexing as if the
131 | # full G matrix is in memory.
132 | block11 <- getG(X = geno(bg), i = 1:100, i2 = 1:100, center = centers, scale = scales)
133 | block12 <- getG(X = geno(bg), i = 1:100, i2 = 101:199, center = centers, scale = scales)
134 | block21 <- getG(X = geno(bg), i = 101:199, i2 = 1:100, center = centers, scale = scales)
135 | block22 <- getG(X = geno(bg), i = 101:199, i2 = 101:199, center = centers, scale = scales)
136 | g9 <- rbind(
137 |     cbind(block11, block12),
138 |     cbind(block21, block22)
139 | )
140 | }
141 | 


--------------------------------------------------------------------------------
/man/getG_symDMatrix.Rd:
--------------------------------------------------------------------------------
 1 | \name{getG_symDMatrix}
 2 | \alias{getG_symDMatrix}
 3 | \title{Computes a Very Large Genomic Relationship Matrix}
 4 | \description{
 5 |     Computes a positive semi-definite symmetric genomic relation matrix G=XX'
 6 |     offering options for centering and scaling the columns of \code{X}
 7 |     beforehand.
 8 | }
 9 | \usage{
10 | getG_symDMatrix(X, center = TRUE, scale = TRUE, impute = TRUE, scaleG = TRUE,
11 |   minVar = 1e-05, blockSize = 5000L,
12 |   folderOut = paste0("symDMatrix_", randomString()), vmode = "double",
13 |   i = seq_len(nrow(X)), j = seq_len(ncol(X)), chunkSize = 5000L,
14 |   nCores = getOption("mc.cores", 2L), verbose = FALSE)
15 | }
16 | \arguments{
17 |     \item{X}{
18 |         A matrix-like object, typically the genotypes of a \code{BGData}
19 |         object.
20 |     }
21 |     \item{center}{
22 |         Either a logical value or a numeric vector of length equal to the
23 |         number of columns of \code{X}. If \code{FALSE}, no centering is done.
24 |         Defaults to \code{TRUE}.
25 |     }
26 |     \item{scale}{
27 |         Either a logical value or a numeric vector of length equal to the
28 |         number of columns of \code{X}. If \code{FALSE}, no scaling is done.
29 |         Defaults to \code{TRUE}.
30 |     }
31 |     \item{impute}{
32 |         Indicates whether missing values should be imputed. Defaults to
33 |         \code{TRUE}.
34 |     }
35 |     \item{scaleG}{
36 |         TRUE/FALSE whether xx' must be scaled.
37 |     }
38 |     \item{minVar}{
39 |         Columns with variance lower than this value will not be used in the
40 |         computation (only if \code{scale} is not \code{FALSE}).
41 |     }
42 |     \item{blockSize}{
43 |         The number of rows and columns of each block. If \code{NULL}, a single
44 |         block of the same length as \code{i} will be created. Defaults to 5000.
45 |     }
46 |     \item{folderOut}{
47 |         The path to the folder where to save the \code{symDMatrix} object.
48 |         Defaults to a random string prefixed with "symDMatrix_".
49 |     }
50 |     \item{vmode}{
51 |         vmode of \code{ff} objects.
52 |     }
53 |     \item{i}{
54 |         Indicates which rows of \code{X} should be used. Can be integer,
55 |         boolean, or character. By default, all rows are used.
56 |     }
57 |     \item{j}{
58 |         Indicates which columns of \code{X} should be used. Can be integer,
59 |         boolean, or character. By default, all columns are used.
60 |     }
61 |     \item{chunkSize}{
62 |         The number of columns of \code{X} that are brought into physical memory
63 |         for processing per core. If \code{NULL}, all columns of \code{X} are
64 |         used. Defaults to 5000.
65 |     }
66 |     \item{nCores}{
67 |         The number of cores (passed to \code{mclapply}). Defaults to the number
68 |         of cores as detected by \code{detectCores}.
69 |     }
70 |     \item{verbose}{
71 |         Whether progress updates will be posted. Defaults to \code{FALSE}.
72 |     }
73 | }
74 | \details{
75 |     Even very large genomic relationship matrices are supported by partitioning
76 |     \code{X} into blocks and calling \code{getG} on these blocks. This function
77 |     performs the block computations sequentially, which may be slow. In an HPC
78 |     environment, performance can be improved by manually distributing these
79 |     operations to different nodes.
80 | }
81 | \value{
82 |     A \code{symDMatrix} object.
83 | }
84 | \seealso{
85 |     \code{\link{multi-level-parallelism}} for more information on multi-level
86 |     parallelism. \code{\link[symDMatrix]{symDMatrix-class}} and
87 |     \code{\link{BGData-class}} for more information on the \code{BGData} class.
88 |     \code{\link{getG}} to learn more about the underlying method.
89 | }
90 | 


--------------------------------------------------------------------------------
/man/load.BGData.Rd:
--------------------------------------------------------------------------------
 1 | \name{load.BGData}
 2 | \alias{load.BGData}
 3 | \title{Loads BGData (and Other) Objects from .RData Files}
 4 | \description{
 5 |     This function is similar to \code{load}, but also initializes the different
 6 |     types of objects that can be used as genotypes in a \code{BGData} object.
 7 | 
 8 |     Currently supported are \code{ff_matrix}, \code{big.matrix}, and
 9 |     \code{BEDMatrix} objects. If the object is of type \code{LinkedMatrix}, all
10 |     nodes will be initialized with their appropriate method.
11 | }
12 | \usage{
13 | load.BGData(file, envir = parent.frame())
14 | }
15 | \arguments{
16 |     \item{file}{
17 |         The name of the .RData file to be loaded.
18 |     }
19 |     \item{envir}{
20 |         The environment where to load the data.
21 |     }
22 | }
23 | \seealso{
24 |     \code{\link{BGData-class}}, \code{\link[ff]{ff}},
25 |     \code{\link[bigmemory]{big.matrix-class}},
26 |     \code{\link[BEDMatrix]{BEDMatrix-class}}, and
27 |     \code{\link[LinkedMatrix]{LinkedMatrix-class}} for more information on the
28 |     above mentioned classes.
29 | }
30 | 


--------------------------------------------------------------------------------
/man/multi-level-parallelism.Rd:
--------------------------------------------------------------------------------
 1 | \name{multi-level-parallelism}
 2 | \alias{multi-level-parallelism}
 3 | \title{Multi-Level Parallelism}
 4 | \description{
 5 |     Functions with the \code{nCores}, \code{i}, and \code{j} parameters provide
 6 |     capabilities for both parallel and distributed computing.
 7 | 
 8 |     For parallel computing, \code{nCores} determines the number of cores the
 9 |     code is run on. Memory usage can be an issue for higher values of
10 |     \code{nCores} as R is not particularly memory-efficient. As a rule of
11 |     thumb, at least around \code{(nCores * object_size(chunk)) +
12 |     object_size(result)} MB of total memory will be needed for operations
13 |     on file-backed matrices, not including potential copies of your data that
14 |     might be created (for example \code{lsfit} runs \code{cbind(1, X)}).
15 |     \code{i} and \code{j} can be used to include or exclude certain rows or
16 |     columns. Internally, the \code{mclapply} function is used and therefore
17 |     parallel computing will not work on Windows machines.
18 | 
19 |     For distributed computing, \code{i} and \code{j} determine the subset of
20 |     the input matrix that the code runs on. In an HPC environment, this can be
21 |     used not just to include or exclude certain rows or columns, but also to
22 |     partition the task among many nodes rather than cores. Scheduler-specific
23 |     code and code to aggregate the results need to be written by the user. It
24 |     is recommended to set \code{nCores} to \code{1} as nodes are often cheaper
25 |     than cores.
26 | }
27 | \seealso{
28 |     \code{\link[parallel]{mclapply}} to learn more about the function used to
29 |     implement parallel computing. \code{\link[parallel]{detectCores}} to detect
30 |     the number of available cores.
31 | }
32 | 


--------------------------------------------------------------------------------
/man/orderedMerge.Rd:
--------------------------------------------------------------------------------
 1 | \name{orderedMerge}
 2 | \alias{orderedMerge}
 3 | \title{Merge Two Data Frames Keeping the Order of the First}
 4 | \description{
 5 |     This is a simplified version of \code{merge} useful for merging additional
 6 |     data into a \code{BGData} object while keeping the order of the data in the
 7 |     \code{BGData} object.
 8 | }
 9 | \usage{
10 | orderedMerge(x, y, by = c(1L, 2L))
11 | }
12 | \arguments{
13 |     \item{x}{
14 |         Data frame
15 |     }
16 |     \item{y}{
17 |         Data frame
18 |     }
19 |     \item{by}{
20 |         Specifications of the columns used for merging. Defaults to the first
21 |         two columns of the data frame, which traditionally has the family ID
22 |         and the individual ID.
23 |     }
24 | }
25 | \value{
26 |     Merged data frame
27 | }
28 | \seealso{
29 |     \code{\link{BGData-class}} for more information on the \code{BGData} class.
30 | }
31 | 


--------------------------------------------------------------------------------
/man/preprocess.Rd:
--------------------------------------------------------------------------------
 1 | \name{preprocess}
 2 | \alias{preprocess}
 3 | \title{Center, scale, and impute data}
 4 | \description{
 5 |     A faster version of \code{\link[base]{scale}} with a similar interface that
 6 |     also allows for imputation. The main difference is that this version scales
 7 |     by the standard deviation regardless of whether centering is enabled or
 8 |     not. If centering is enabled, missing values are imputed by 0, otherwise by
 9 |     the mean of the column that contains the value.
10 | }
11 | \usage{
12 | preprocess(X, center = FALSE, scale = FALSE, impute = FALSE,
13 |   nCores = getOption("mc.cores", 2L))
14 | }
15 | \arguments{
16 |     \item{X}{
17 |         A numeric matrix.
18 |     }
19 |     \item{center}{
20 |         Either a logical value or numeric vector of length equal to the number
21 |         of columns of \code{X}.
22 |     }
23 |     \item{scale}{
24 |         Either a logical value or numeric vector of length equal to the number
25 |         of columns of \code{X}.
26 |     }
27 |     \item{impute}{
28 |         Indicates whether missing values should be imputed.
29 |     }
30 |     \item{nCores}{
31 |         The number of cores (passed to \code{mclapply}). Defaults to the number
32 |         of cores as detected by \code{detectCores}.
33 |     }
34 | }
35 | \value{
36 |     The centered, scaled, and imputed matrix.
37 | }
38 | \seealso{
39 |     \code{\link[base]{scale}}, which this function tries to improve upon.
40 | }
41 | \examples{
42 | # Load example data
43 | bg <- BGData:::loadExample()
44 | 
45 | # Center and scale genotypes
46 | W <- preprocess(as.matrix(geno(bg)), center = TRUE, scale = TRUE)
47 | }
48 | 


--------------------------------------------------------------------------------
/man/readRAW.Rd:
--------------------------------------------------------------------------------
  1 | \name{readRAW}
  2 | \alias{readRAW}
  3 | \alias{readRAW_matrix}
  4 | \alias{readRAW_big.matrix}
  5 | \title{Creates a BGData Object From a .raw File or a .ped-Like File}
  6 | \description{
  7 |     Creates a \code{BGData} object from a .raw file (generated with
  8 |     \code{--recodeA} in \href{https://www.cog-genomics.org/plink2}{PLINK}).
  9 |     Other text-based file formats are supported as well by tweaking some of the
 10 |     parameters as long as the records of individuals are in rows, and
 11 |     phenotypes, covariates and markers are in columns.
 12 | }
 13 | \usage{
 14 | readRAW(fileIn, header = TRUE, dataType = integer(), n = NULL,
 15 |   p = NULL, sep = "", na.strings = "NA", nColSkip = 6L,
 16 |   idCol = c(1L, 2L), nNodes = NULL, linked.by = "rows",
 17 |   folderOut = paste0("BGData_", sub("\\\\.[[:alnum:]]+$", "",
 18 |   basename(fileIn))), outputType = "byte", dimorder = if (linked.by ==
 19 |   "rows") 2L:1L else 1L:2L, verbose = FALSE)
 20 | 
 21 | readRAW_matrix(fileIn, header = TRUE, dataType = integer(), n = NULL,
 22 |   p = NULL, sep = "", na.strings = "NA", nColSkip = 6L,
 23 |   idCol = c(1L, 2L), verbose = FALSE)
 24 | 
 25 | readRAW_big.matrix(fileIn, header = TRUE, dataType = integer(),
 26 |   n = NULL, p = NULL, sep = "", na.strings = "NA", nColSkip = 6L,
 27 |   idCol = c(1L, 2L), folderOut = paste0("BGData_",
 28 |   sub("\\\\.[[:alnum:]]+$", "", basename(fileIn))), outputType = "char",
 29 |   verbose = FALSE)
 30 | }
 31 | \arguments{
 32 |     \item{fileIn}{
 33 |         The path to the plaintext file.
 34 |     }
 35 |     \item{header}{
 36 |         Whether \code{fileIn} contains a header. Defaults to \code{TRUE}.
 37 |     }
 38 |     \item{dataType}{
 39 |         The coding type of genotypes in \code{fileIn}. Use \code{integer()} or
 40 |         \code{double()} for numeric coding. Alpha-numeric coding is currently
 41 |         not supported for \code{readRAW} and \code{readRAW_big.matrix}: use the
 42 |         \code{--recodeA} option of PLINK to convert the .ped file into a .raw
 43 |         file. Defaults to \code{integer()}.
 44 |     }
 45 |     \item{n}{
 46 |         The number of individuals. Auto-detect if \code{NULL}. Defaults to
 47 |         \code{NULL}.
 48 |     }
 49 |     \item{p}{
 50 |         The number of markers. Auto-detect if \code{NULL}. Defaults to
 51 |         \code{NULL}.
 52 |     }
 53 |     \item{sep}{
 54 |         The field separator character. Values on each line of the file are
 55 |         separated by this character. If \code{sep = ""} (the default for
 56 |         \code{readRAW} the separator is "white space", that is one or more
 57 |         spaces, tabs, newlines or carriage returns.
 58 |     }
 59 |     \item{na.strings}{
 60 |         The character string used in the plaintext file to denote missing
 61 |         value. Defaults to \code{NA}.
 62 |     }
 63 |     \item{nColSkip}{
 64 |         The number of columns to be skipped to reach the genotype information
 65 |         in the file. Defaults to \code{6}.
 66 |     }
 67 |     \item{idCol}{
 68 |         The index of the ID column. If more than one index is given, both
 69 |         columns will be concatenated with "_". Defaults to \code{c(1, 2)}, i.e.
 70 |         a concatenation of the first two columns.
 71 |     }
 72 |     \item{nNodes}{
 73 |         The number of nodes to create. Auto-detect if \code{NULL}. Defaults to
 74 |         \code{NULL}.
 75 |     }
 76 |     \item{linked.by}{
 77 |         If \code{columns} a column-linked matrix (\code{ColumnLinkedMatrix}) is
 78 |         created, if \code{rows} a row-linked matrix (\code{RowLinkedMatrix}).
 79 |         Defaults to \code{rows}.
 80 |     }
 81 |     \item{folderOut}{
 82 |         The path to the folder where to save the binary files. Defaults to the
 83 |         name of the input file (\code{fileIn}) without extension prefixed with
 84 |         "BGData_".
 85 |     }
 86 |     \item{outputType}{
 87 |         The \code{vmode} for \code{ff} and \code{type} for \code{big.matrix}
 88 |         objects. Default to \code{byte} for \code{ff} and \code{char} for
 89 |         \code{big.matrix} objects.
 90 |     }
 91 |     \item{dimorder}{
 92 |         The physical layout of the underlying \code{ff} object of each node.
 93 |     }
 94 |     \item{verbose}{
 95 |         Whether progress updates will be posted. Defaults to \code{FALSE}.
 96 |     }
 97 | }
 98 | \details{
 99 |     The data included in the first couple of columns (up to \code{nColSkip}) is
100 |     used to populate the sample information of a \code{BGData} object, and the
101 |     remaining columns are used to fill the genotypes. If the first row contains
102 |     a header (\code{header = TRUE}), data in this row is used to determine the
103 |     column names for sample information and genotypes.
104 | 
105 |     The genotypes can take several forms, depending on the function that is
106 |     called (\code{readRAW}, \code{readRAW_matrix}, or
107 |     \code{readRAW_big.matrix}). The following sections illustrate each function
108 |     in detail.
109 | }
110 | \section{readRAW}{
111 |     Genotypes are stored in a \code{LinkedMatrix} object where each node is an
112 |     \code{ff} instance. Multiple \code{ff} files are used because the array
113 |     size in \code{ff} is limited to the largest integer which can be
114 |     represented on the system (\code{.Machine$integer.max}) and for genetic
115 |     data this limitation is often exceeded. The \code{LinkedMatrix} package
116 |     makes it possible to link several \code{ff} files together by columns or by
117 |     rows and treat them similarly to a single matrix. By default a
118 |     \code{ColumnLinkedMatrix} is used for the genotypes, but the user can
119 |     modify this using the \code{linked.by} argument. The number of nodes to
120 |     generate is either specified by the user using the \code{nNodes} argument
121 |     or determined internally so that each \code{ff} object has a number of
122 |     cells that is smaller than \code{.Machine$integer.max / 1.2}. A folder (see
123 |     \code{folderOut}) that contains the binary flat files (named
124 |     \code{geno_*.bin}) and an external representation of the \code{BGData}
125 |     object in \code{BGData.RData} is created.
126 | }
127 | \section{readRAW_matrix}{
128 |     Genotypes are stored in a regular \code{matrix} object. Therefore, this
129 |     function will only work if the .raw file is small enough to fit into
130 |     memory.
131 | }
132 | \section{readRAW_big.matrix}{
133 |     Genotypes are stored in a filebacked \code{big.matrix} object. A folder
134 |     (see \code{folderOut}) that contains the binary flat file (named
135 |     \code{BGData.bin}), a descriptor file (named \code{BGData.desc}), and an
136 |     external representation of the \code{BGData} object in \code{BGData.RData}
137 |     are created.
138 | }
139 | \section{Reloading a BGData object}{
140 |     To reload a \code{BGData} object, it is recommended to use the
141 |     \code{load.BGData} function instead of the \code{load} function as
142 |     \code{load} does not initialize \code{ff} objects or attach
143 |     \code{big.matrix} objects.
144 | }
145 | \seealso{
146 |     \code{\link[=load.BGData]{load.BGData()}} to load a previously saved
147 |     \code{BGData} object, \code{\link[=as.BGData]{as.BGData()}} to create
148 |     \code{BGData} objects from non-text files (e.g. .bed files).
149 |     \code{\link{BGData-class}},
150 |     \code{\link[LinkedMatrix]{ColumnLinkedMatrix-class}},
151 |     \code{\link[LinkedMatrix]{RowLinkedMatrix-class}},
152 |     \code{\link[bigmemory]{big.matrix-class}}, and \code{\link[ff]{ff}} for
153 |     more information on the above mentioned classes.
154 | }
155 | \examples{
156 | # Path to example data
157 | path <- system.file("extdata", package = "BGData")
158 | 
159 | # Convert RAW files of chromosome 1 to a BGData object
160 | bg <- readRAW(fileIn = paste0(path, "/chr1.raw"))
161 | 
162 | unlink("BGData_chr1", recursive = TRUE)
163 | }
164 | 


--------------------------------------------------------------------------------
/man/segments.Rd:
--------------------------------------------------------------------------------
 1 | \name{segments}
 2 | \alias{segments}
 3 | \title{Find non-overlapping segments based on a summary statistic}
 4 | \description{
 5 |     Given a summary statistic and a threshold, this function computes the
 6 |     number of non-overlapping segments, each defined as a discovery (i.e.,
 7 |     \code{statistic[i] <= threshold)} +/- a gap, in the same units as \code{bp}
 8 |     (often base-pair position).
 9 | }
10 | \usage{
11 | segments(statistic, chr, bp, threshold, gap, trim = FALSE, verbose = FALSE)
12 | }
13 | \arguments{
14 |     \item{statistic}{
15 |         A statistic (e.g., BFDR or p-values).
16 |     }
17 |     \item{chr}{
18 |         A vector containing the chromosome for each value of \code{statistic}.
19 |     }
20 |     \item{bp}{
21 |         A vector containing the base-pair positions for each value of
22 |         \code{statistic}.
23 |     }
24 |     \item{threshold}{
25 |         The threshold to determine 'significance' (e.g., \code{1e-5} for
26 |         p-values).
27 |     }
28 |     \item{gap}{
29 |         1/2 of the length of the desired segments.
30 |     }
31 |     \item{trim}{
32 |         Whether to collapse segments that were artifically inflated by
33 |         \code{gap}. Defaults to \code{FALSE}.
34 |     }
35 |     \item{verbose}{
36 |         Whether progress updates will be posted. Defaults to \code{FALSE}.
37 |     }
38 | }
39 | \value{
40 |     A data frame containing the following information:
41 |     \item{chr}{
42 |         Chromosome
43 |     }
44 |     \item{start}{
45 |         Index where segment starts within \code{statistic}.
46 |     }
47 |     \item{end}{
48 |         Index where segment ends within \code{statistic}.
49 |     }
50 |     \item{length}{
51 |         Length of segment.
52 |     }
53 |     \item{bpStart}{
54 |         Base-pair position where segment starts.
55 |     }
56 |     \item{bpEnd}{
57 |         Base-pair position where segment ends.
58 |     }
59 |     \item{bpLength}{
60 |         Length of segment in base-pair positions.
61 |     }
62 |     \item{minValue}{
63 |         Smallest value of \code{statistic} within segment.
64 |     }
65 |     \item{minValuePos}{
66 |         Position of variant with the smallest value of \code{statistic} within
67 |         segment.
68 |     }
69 | }
70 | \examples{
71 | library(BGData)
72 | 
73 | # Load example data
74 | bg <- BGData:::loadExample()
75 | 
76 | # Perform GWAS
77 | pValues <- GWAS(
78 |     formula = FT10 ~ 1,
79 |     data = bg,
80 |     method = "rayOLS"
81 | )
82 | 
83 | # Determine segments within +/- 1MB from a significant variant
84 | segments <- segments(
85 |     statistic = pValues[, 4],
86 |     chr = map(bg)$chromosome,
87 |     bp = map(bg)$base_pair_position,
88 |     threshold = 1e-5,
89 |     gap = 1e6,
90 |     trim = FALSE,
91 |     verbose = FALSE
92 | )
93 | }
94 | 


--------------------------------------------------------------------------------
/man/summarize.Rd:
--------------------------------------------------------------------------------
 1 | \name{summarize}
 2 | \alias{summarize}
 3 | \title{Generates Various Summary Statistics}
 4 | \description{
 5 |     Computes the frequency of missing values, the (minor) allele frequency, and
 6 |     standard deviation of each column of \code{X}.
 7 | }
 8 | \usage{
 9 | summarize(X, i = seq_len(nrow(X)), j = seq_len(ncol(X)),
10 |   chunkSize = 5000L, nCores = getOption("mc.cores", 2L),
11 |   verbose = FALSE)
12 | }
13 | \arguments{
14 |     \item{X}{
15 |         A matrix-like object, typically the genotypes of a \code{BGData}
16 |         object.
17 |     }
18 |     \item{i}{
19 |         Indicates which rows of \code{X} should be used. Can be integer,
20 |         boolean, or character. By default, all rows are used.
21 |     }
22 |     \item{j}{
23 |         Indicates which columns of \code{X} should be used. Can be integer,
24 |         boolean, or character. By default, all columns are used.
25 |     }
26 |     \item{chunkSize}{
27 |         The number of columns of \code{X} that are brought into physical memory
28 |         for processing per core. If \code{NULL}, all elements in \code{j} are
29 |         used. Defaults to 5000.
30 |     }
31 |     \item{nCores}{
32 |         The number of cores (passed to \code{mclapply}). Defaults to the
33 |         number of cores as detected by \code{detectCores}.
34 |     }
35 |     \item{verbose}{
36 |         Whether progress updates will be posted. Defaults to \code{FALSE}.
37 |     }
38 | }
39 | \value{
40 |     A \code{data.frame} with three columns: \code{freq_na} for frequencies of
41 |     missing values, \code{allele_freq} for allele frequencies of the counted
42 |     allele, and \code{sd} for standard deviations.
43 | }
44 | \seealso{
45 |     \code{\link{file-backed-matrices}} for more information on file-backed
46 |     matrices. \code{\link{multi-level-parallelism}} for more information on
47 |     multi-level parallelism. \code{\link{BGData-class}} for more information on
48 |     the \code{BGData} class.
49 | }
50 | \examples{
51 | # Restrict number of cores to 1 on Windows
52 | if (.Platform$OS.type == "windows") {
53 |     options(mc.cores = 1)
54 | }
55 | 
56 | # Load example data
57 | bg <- BGData:::loadExample()
58 | 
59 | # Summarize the whole dataset
60 | sum1 <- summarize(X = geno(bg))
61 | 
62 | # Summarize the first 50 individuals
63 | sum2 <- summarize(X = geno(bg), i = 1:50)
64 | 
65 | # Summarize the first 1000 markers (useful for distributed computing)
66 | sum3 <- summarize(X = geno(bg), j = 1:100)
67 | 
68 | # Summarize the first 50 individuals on the first 1000 markers
69 | sum4 <- summarize(X = geno(bg), i = 1:50, j = 1:100)
70 | 
71 | # Summarize by names
72 | sum5 <- summarize(X = geno(bg), j = c("snp81233_C", "snp81234_C", "snp81235_T"))
73 | 
74 | # Convert to minor allele frequencies (useful if the counted alleles are not
75 | # the minor alleles)
76 | maf <- ifelse(sum1$allele_freq > 0.5, 1 - sum1$allele_freq, sum1$allele_freq)
77 | }
78 | 


--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.so
3 | *.dll
4 | 


--------------------------------------------------------------------------------
/src/Makevars:
--------------------------------------------------------------------------------
1 | PKG_CFLAGS = $(SHLIB_OPENMP_CFLAGS)
2 | PKG_LIBS = $(SHLIB_OPENMP_CFLAGS)
3 | 


--------------------------------------------------------------------------------
/src/fitLSYS.c:
--------------------------------------------------------------------------------
 1 | #include "fitLSYS.h"
 2 | 
 3 | SEXP fitLSYS(SEXP C, SEXP rhs, SEXP b, SEXP active, SEXP RSS, SEXP maxIter, SEXP tolerance) {
 4 |     int p = Rf_ncols(C);
 5 |     R_xlen_t nActive = Rf_xlength(active);
 6 |     int nIter = Rf_asInteger(maxIter);
 7 |     double tol = Rf_asReal(tolerance);
 8 |     double *pC = REAL(C);
 9 |     double *prhs = REAL(rhs);
10 |     b = PROTECT(Rf_duplicate(b));
11 |     double *pb = REAL(b);
12 |     int *pactive = INTEGER(active);
13 |     double oldRSS = Rf_asReal(RSS);
14 |     double newRSS = oldRSS;
15 |     for (int iter = 0; iter < nIter; iter++) {
16 |         oldRSS = newRSS;
17 |         for (int j = 0; j < nActive; j++) { // loop over active predictors
18 |             int k = pactive[j];
19 |             double Ckk = pC[k * (p + 1)];
20 |             double offset = 0;
21 |             for (int m = 0; m < nActive; m++) {
22 |                 int n = pactive[m];
23 |                 offset += pC[p * k + n] * pb[n];
24 |             }
25 |             offset -= Ckk * pb[k];
26 |             double rhs_offset = prhs[k] - offset;
27 |             double sol = rhs_offset / Ckk;
28 |             newRSS += (pow(sol, 2) - pow(pb[k], 2)) * Ckk - 2 * (sol - pb[k]) * rhs_offset;
29 |             pb[k] = sol;
30 |         }
31 |         if (((oldRSS - newRSS) / oldRSS) < tol) {
32 |             break;
33 |         }
34 |     }
35 |     // Creating a list to return results
36 |     SEXP list = PROTECT(Rf_allocVector(VECSXP, 2));
37 |     SET_VECTOR_ELT(list, 0, b);
38 |     SET_VECTOR_ELT(list, 1, Rf_ScalarReal(newRSS));
39 |     UNPROTECT(2); // b, list
40 |     return list;
41 | }
42 | 


--------------------------------------------------------------------------------
/src/fitLSYS.h:
--------------------------------------------------------------------------------
1 | #define R_NO_REMAP
2 | 
3 | #include <Rinternals.h>
4 | 
5 | SEXP fitLSYS(SEXP C, SEXP rhs, SEXP b, SEXP active, SEXP RSS, SEXP maxIter, SEXP tolerance);
6 | 


--------------------------------------------------------------------------------
/src/init.c:
--------------------------------------------------------------------------------
 1 | #include "summarize.h"
 2 | #include "rayOLS.h"
 3 | #include "preprocess.h"
 4 | #include "fitLSYS.h"
 5 | 
 6 | #include <R_ext/Rdynload.h>
 7 | 
 8 | static const R_CallMethodDef callMethods[] = {
 9 |     {"summarize", (DL_FUNC) &summarize, 1},
10 |     {"rayOLS", (DL_FUNC) &rayOLS, 2},
11 |     {"preprocess", (DL_FUNC) &preprocess, 5},
12 |     {"fitLSYS", (DL_FUNC) &fitLSYS, 7},
13 |     {NULL, NULL, 0}
14 | };
15 | 
16 | void R_init_BGData(DllInfo *dll) {
17 |     R_registerRoutines(dll, NULL, callMethods, NULL, NULL);
18 |     R_useDynamicSymbols(dll, FALSE);
19 |     R_forceSymbols(dll, TRUE);
20 | }
21 | 


--------------------------------------------------------------------------------
/src/preprocess.c:
--------------------------------------------------------------------------------
  1 | #include "preprocess.h"
  2 | 
  3 | #ifdef _OPENMP
  4 | #include <omp.h>
  5 | #endif
  6 | #include <stddef.h>
  7 | 
  8 | void preprocess_int(int *in, int nrows, int ncols, double *out, int center, double *centers, int computeCenters, int scale, double *scales, int computeScales, int impute, int numCores) {
  9 |     #pragma omp parallel for schedule(static) default(none) shared(NA_INTEGER, NA_REAL, in, nrows, ncols, out, center, centers, computeCenters, scale, scales, computeScales, impute) num_threads(numCores)
 10 |     for (ptrdiff_t j = 0; j < ncols; j++) {
 11 |         double mean;
 12 |         if (computeCenters || computeScales || impute) {
 13 |             double sum = 0;
 14 |             double sumsq = 0;
 15 |             ptrdiff_t n = 0;
 16 |             for (ptrdiff_t i = 0; i < nrows; i++) {
 17 |                 int *cin = in + j * nrows + i;
 18 |                 if (*cin != NA_INTEGER) {
 19 |                     sum += *cin;
 20 |                     sumsq += *cin * *cin;
 21 |                     n++;
 22 |                 }
 23 |             }
 24 |             mean = sum / n;
 25 |             if (computeCenters) {
 26 |                 centers[j] = mean;
 27 |             }
 28 |             if (computeScales) {
 29 |                 scales[j] = sqrt((sumsq - (sum * sum) / n) / (n - 1));
 30 |             }
 31 |         }
 32 |         for (ptrdiff_t i = 0; i < nrows; i++) {
 33 |             int *cin = in + j * nrows + i;
 34 |             double *cout = out + j * nrows + i;
 35 |             if (*cin == NA_INTEGER) {
 36 |                 if (impute) {
 37 |                     if (center) {
 38 |                         *cout = 0;
 39 |                     } else {
 40 |                         *cout = mean;
 41 |                     }
 42 |                 } else {
 43 |                     *cout = NA_REAL;
 44 |                 }
 45 |             } else {
 46 |                 *cout = *cin;
 47 |                 if (center) {
 48 |                     *cout -= centers[j];
 49 |                 }
 50 |                 if (scale) {
 51 |                     *cout /= scales[j];
 52 |                 }
 53 |             }
 54 |         }
 55 |     }
 56 | }
 57 | 
 58 | void preprocess_real(double *in, int nrows, int ncols, double *out, int center, double *centers, int computeCenters, int scale, double *scales, int computeScales, int impute, int numCores) {
 59 |     #pragma omp parallel for schedule(static) default(none) shared(NA_REAL, in, nrows, ncols, out, center, centers, computeCenters, scale, scales, computeScales, impute) num_threads(numCores)
 60 |     for (ptrdiff_t j = 0; j < ncols; j++) {
 61 |         double mean;
 62 |         if (computeCenters || computeScales || impute) {
 63 |             double sum = 0;
 64 |             double sumsq = 0;
 65 |             ptrdiff_t n = 0;
 66 |             for (ptrdiff_t i = 0; i < nrows; i++) {
 67 |                 double *cin = in + j * nrows + i;
 68 |                 if (!ISNAN(*cin)) {
 69 |                     sum += *cin;
 70 |                     sumsq += *cin * *cin;
 71 |                     n++;
 72 |                 }
 73 |             }
 74 |             mean = sum / n;
 75 |             if (computeCenters) {
 76 |                 centers[j] = mean;
 77 |             }
 78 |             if (computeScales) {
 79 |                 scales[j] = sqrt((sumsq - (sum * sum) / n) / (n - 1));
 80 |             }
 81 |         }
 82 |         for (ptrdiff_t i = 0; i < nrows; i++) {
 83 |             double *cin = in + j * nrows + i;
 84 |             double *cout = out + j * nrows + i;
 85 |             *cout = *cin;
 86 |             if (ISNA(*cin)) {
 87 |                 if (impute) {
 88 |                     if (center) {
 89 |                         *cout = 0;
 90 |                     } else {
 91 |                         *cout = mean;
 92 |                     }
 93 |                 }
 94 |             } else {
 95 |                 if (center) {
 96 |                     *cout -= centers[j];
 97 |                 }
 98 |                 if (scale) {
 99 |                     *cout /= scales[j];
100 |                 }
101 |             }
102 |         }
103 |     }
104 | }
105 | 
106 | SEXP preprocess(SEXP sIn, SEXP sCenter, SEXP sScale, SEXP sImpute, SEXP sNumCores) {
107 |     int nprotect = 0;
108 |     R_xlen_t length = Rf_xlength(sIn);
109 |     int nrows = Rf_nrows(sIn);
110 |     int ncols = Rf_ncols(sIn);
111 |     int center = 0;
112 |     SEXP sCenters = NULL;
113 |     double *centers = NULL;
114 |     int computeCenters = 0;
115 |     switch(TYPEOF(sCenter)) {
116 |     case LGLSXP:
117 |         center = Rf_asLogical(sCenter);
118 |         if (center) {
119 |             sCenters = PROTECT(Rf_allocVector(REALSXP, ncols));
120 |             nprotect++;
121 |             centers = REAL(sCenters);
122 |             computeCenters = 1;
123 |         }
124 |         break;
125 |     case REALSXP:
126 |         center = 1;
127 |         sCenters = PROTECT(Rf_duplicate(sCenter));
128 |         nprotect++;
129 |         centers = REAL(sCenters);
130 |         break;
131 |     }
132 |     int scale = 0;
133 |     SEXP sScales = NULL;
134 |     double *scales = NULL;
135 |     int computeScales = 0;
136 |     switch(TYPEOF(sScale)) {
137 |     case LGLSXP:
138 |         scale = Rf_asLogical(sScale);
139 |         if (scale) {
140 |             sScales = PROTECT(Rf_allocVector(REALSXP, ncols));
141 |             nprotect++;
142 |             scales = REAL(sScales);
143 |             computeScales = 1;
144 |         }
145 |         break;
146 |     case REALSXP:
147 |         scale = 1;
148 |         sScales = PROTECT(Rf_duplicate(sScale));
149 |         nprotect++;
150 |         scales = REAL(sScales);
151 |         break;
152 |     }
153 |     int impute = Rf_asLogical(sImpute);
154 |     int numCores = Rf_asInteger(sNumCores);
155 |  // Allocate output vector
156 |     SEXP sOut = PROTECT(Rf_allocVector(REALSXP, length));
157 |     nprotect++;
158 |     switch(TYPEOF(sIn)) {
159 |     case REALSXP:
160 |         preprocess_real(
161 |             REAL(sIn),
162 |             nrows,
163 |             ncols,
164 |             REAL(sOut),
165 |             center,
166 |             centers,
167 |             computeCenters,
168 |             scale,
169 |             scales,
170 |             computeScales,
171 |             impute,
172 |             numCores
173 |         );
174 |         break;
175 |     case INTSXP:
176 |         preprocess_int(
177 |             INTEGER(sIn),
178 |             nrows,
179 |             ncols,
180 |             REAL(sOut),
181 |             center,
182 |             centers,
183 |             computeCenters,
184 |             scale,
185 |             scales,
186 |             computeScales,
187 |             impute,
188 |             numCores
189 |         );
190 |         break;
191 |     }
192 |  // Handle attributes
193 |     DUPLICATE_ATTRIB(sOut, sIn);
194 |     if (center) {
195 |         Rf_setAttrib(sOut, Rf_install("scaled:center"), sCenters);
196 |     }
197 |     if (scale) {
198 |         Rf_setAttrib(sOut, Rf_install("scaled:scale"), sScales);
199 |     }
200 |     UNPROTECT(nprotect);
201 |     return sOut;
202 | }
203 | 


--------------------------------------------------------------------------------
/src/preprocess.h:
--------------------------------------------------------------------------------
1 | #define R_NO_REMAP
2 | 
3 | #include <Rinternals.h>
4 | 
5 | SEXP preprocess(SEXP sIn, SEXP sCenter, SEXP sScale, SEXP sImpute, SEXP sNumCores);
6 | 


--------------------------------------------------------------------------------
/src/rayOLS.c:
--------------------------------------------------------------------------------
  1 | #include "rayOLS.h"
  2 | 
  3 | #include <Rmath.h>
  4 | 
  5 | SEXP rayOLS_real(SEXP X, SEXP y) {
  6 |     // Get dimensions of X
  7 |     int X_nrow = Rf_nrows(X);
  8 |     int X_ncol = Rf_ncols(X);
  9 |     // Check if dimensions match
 10 |     R_xlen_t y_length = Rf_xlength(y);
 11 |     if (X_nrow != y_length) {
 12 |         Rf_error("The number of rows in X and the length of y need to match\n");
 13 |     }
 14 |     // Allocate output matrix
 15 |     SEXP out = PROTECT(Rf_allocMatrix(REALSXP, X_ncol, 6));
 16 |     // Get data pointers
 17 |     double *X_data = REAL(X);
 18 |     double *y_data = REAL(y);
 19 |     // Iterate over columns of X
 20 |     for (R_xlen_t col_idx = 0; col_idx < X_ncol; col_idx++) {
 21 |         // Compute number of non-missing values in both x and y (n), and
 22 |         // Compute sum of x (xt1) for centering x, and
 23 |         // Compute sum of y (yt1) for centering y, and
 24 |         // Compute sum of products of x and y (xty) for Cov(x, y), and
 25 |         // Compute sum of squares of x (xtx) for Var(x), and
 26 |         // Compute sum of squares of y (yty) for RSS
 27 |         R_xlen_t n = 0;
 28 |         double xt1 = 0;
 29 |         double yt1 = 0;
 30 |         double xty = 0;
 31 |         double xtx = 0;
 32 |         double yty = 0;
 33 |         for (R_xlen_t row_idx = 0; row_idx < X_nrow; row_idx++) {
 34 |             double x_val = X_data[row_idx + (col_idx * X_nrow)];
 35 |             if (!(ISNA(x_val) || ISNA(y_data[row_idx]))) {
 36 |                 n++;
 37 |                 xt1 += x_val;
 38 |                 yt1 += y_data[row_idx];
 39 |                 xty += x_val * y_data[row_idx];
 40 |                 xtx += x_val * x_val;
 41 |                 yty += y_data[row_idx] * y_data[row_idx];
 42 |             }
 43 |         }
 44 |         // Center xty, xtx, and yty
 45 |         xty -= (xt1 * yt1) / n;
 46 |         xtx -= (xt1 * xt1) / n;
 47 |         yty -= (yt1 * yt1) / n;
 48 |         // Compute beta_1 as Cov(x, y) / Var(x)
 49 |         // For centered data, beta_0 will be 0: mean(y) - beta_1 * mean(x)
 50 |         double beta_1 = xty / xtx;
 51 |         // Compute remaining statistics
 52 |         double rss = yty - (xtx * pow(beta_1, 2));
 53 |         double se = sqrt((rss / (n - 2)) / xtx);
 54 |         double z_stat = beta_1 / se;
 55 |         double p_value = Rf_pt(fabs(z_stat), n - 2, 0, 0) * 2;
 56 |         double allele_freq = xt1 / n / 2;
 57 |         // Write results
 58 |         REAL(out)[col_idx] = beta_1;
 59 |         REAL(out)[col_idx + X_ncol] = se;
 60 |         REAL(out)[col_idx + (2 * X_ncol)] = z_stat;
 61 |         REAL(out)[col_idx + (3 * X_ncol)] = p_value;
 62 |         REAL(out)[col_idx + (4 * X_ncol)] = n;
 63 |         REAL(out)[col_idx + (5 * X_ncol)] = allele_freq;
 64 |     }
 65 |     UNPROTECT(1);
 66 |     return out;
 67 | }
 68 | 
 69 | SEXP rayOLS_integer(SEXP X, SEXP y) {
 70 |     // Get dimensions of X
 71 |     int X_nrow = Rf_nrows(X);
 72 |     int X_ncol = Rf_ncols(X);
 73 |     // Check if dimensions match
 74 |     R_xlen_t y_length = Rf_xlength(y);
 75 |     if (X_nrow != y_length) {
 76 |         Rf_error("The number of rows in X and the length of y need to match\n");
 77 |     }
 78 |     // Allocate output matrix
 79 |     SEXP out = PROTECT(Rf_allocMatrix(REALSXP, X_ncol, 6));
 80 |     // Get data pointers
 81 |     int *X_data = INTEGER(X);
 82 |     double *y_data = REAL(y);
 83 |     // Iterate over columns of X
 84 |     for (R_xlen_t col_idx = 0; col_idx < X_ncol; col_idx++) {
 85 |         // Compute number of non-missing values in both x and y (n), and
 86 |         // Compute sum of x (xt1) for centering x, and
 87 |         // Compute sum of y (yt1) for centering y, and
 88 |         // Compute sum of products of x and y (xty) for Cov(x, y), and
 89 |         // Compute sum of squares of x (xtx) for Var(x), and
 90 |         // Compute sum of squares of y (yty) for RSS
 91 |         R_xlen_t n = 0;
 92 |         double xt1 = 0;
 93 |         double yt1 = 0;
 94 |         double xty = 0;
 95 |         double xtx = 0;
 96 |         double yty = 0;
 97 |         for (R_xlen_t row_idx = 0; row_idx < X_nrow; row_idx++) {
 98 |             int x_val = X_data[row_idx + (col_idx * X_nrow)];
 99 |             if (!(x_val == NA_INTEGER || ISNA(y_data[row_idx]))) {
100 |                 n++;
101 |                 xt1 += x_val;
102 |                 yt1 += y_data[row_idx];
103 |                 xty += x_val * y_data[row_idx];
104 |                 xtx += x_val * x_val;
105 |                 yty += y_data[row_idx] * y_data[row_idx];
106 |             }
107 |         }
108 |         // Center xty, xtx, and yty
109 |         xty -= (xt1 * yt1) / n;
110 |         xtx -= (xt1 * xt1) / n;
111 |         yty -= (yt1 * yt1) / n;
112 |         // Compute beta_1 as Cov(x, y) / Var(x)
113 |         // For centered data, beta_0 will be 0: mean(y) - beta_1 * mean(x)
114 |         double beta_1 = xty / xtx;
115 |         // Compute remaining statistics
116 |         double rss = yty - (xtx * pow(beta_1, 2));
117 |         double se = sqrt((rss / (n - 2)) / xtx);
118 |         double z_stat = beta_1 / se;
119 |         double p_value = Rf_pt(fabs(z_stat), n - 2, 0, 0) * 2;
120 |         double allele_freq = xt1 / n / 2;
121 |         // Write results
122 |         REAL(out)[col_idx] = beta_1;
123 |         REAL(out)[col_idx + X_ncol] = se;
124 |         REAL(out)[col_idx + (2 * X_ncol)] = z_stat;
125 |         REAL(out)[col_idx + (3 * X_ncol)] = p_value;
126 |         REAL(out)[col_idx + (4 * X_ncol)] = n;
127 |         REAL(out)[col_idx + (5 * X_ncol)] = allele_freq;
128 |     }
129 |     UNPROTECT(1);
130 |     return out;
131 | }
132 | 
133 | SEXP rayOLS(SEXP X, SEXP y) {
134 |     // Dispatch to real or integer function
135 |     // TODO: Macro-based generics
136 |     switch (TYPEOF(X)) {
137 |         case REALSXP:
138 |             return rayOLS_real(X, y);
139 |             break;
140 |         case INTSXP:
141 |             return rayOLS_integer(X, y);
142 |             break;
143 |         default:
144 |             Rf_error("x needs to be a numeric vector");
145 |             break;
146 |     }
147 | }
148 | 


--------------------------------------------------------------------------------
/src/rayOLS.h:
--------------------------------------------------------------------------------
1 | #define R_NO_REMAP
2 | 
3 | #include <Rinternals.h>
4 | 
5 | SEXP rayOLS(SEXP X, SEXP y);
6 | 


--------------------------------------------------------------------------------
/src/summarize.c:
--------------------------------------------------------------------------------
  1 | #include "summarize.h"
  2 | 
  3 | #include <Rmath.h>
  4 | 
  5 | SEXP summarize_real(SEXP X) {
  6 |     // Get dimensions of X
  7 |     int nrow = Rf_nrows(X);
  8 |     int ncol = Rf_ncols(X);
  9 |     // Get data pointer
 10 |     double *X_data = REAL(X);
 11 |     // Allocate output matrix
 12 |     SEXP out = PROTECT(Rf_allocMatrix(REALSXP, ncol, 3));
 13 |     // Iterate over columns of X
 14 |     int col_idx = 0;
 15 |     int row_idx = 0;
 16 |     for (col_idx = 0; col_idx < ncol; col_idx++) {
 17 |         // Compute number of non-missing values (n), and
 18 |         // Compute column sum (xt1), and
 19 |         // Compute column sum of squares (xtx)
 20 |         R_xlen_t n = 0;
 21 |         double xt1 = 0;
 22 |         double xtx = 0;
 23 |         for (row_idx = 0; row_idx < nrow; row_idx++) {
 24 |             double x_val = X_data[row_idx + (col_idx * nrow)];
 25 |             if (!ISNA(x_val)) {
 26 |                 n++;
 27 |                 xt1 += x_val;
 28 |                 xtx += x_val * x_val;
 29 |             }
 30 |         }
 31 |         double freq_na;
 32 |         double allele_freq;
 33 |         double sd;
 34 |         if (n) {
 35 |             // Center xtx
 36 |             xtx -= (xt1 * xt1) / n;
 37 |             // Compute summary statistics
 38 |             freq_na = (nrow - n) / (double) nrow;
 39 |             allele_freq = xt1 / n / 2;
 40 |             sd = sqrt(xtx / (n - 1));
 41 |         } else {
 42 |             freq_na = 1;
 43 |             allele_freq = NA_REAL;
 44 |             sd = NA_REAL;
 45 |         }
 46 |         // Write results into output matrix
 47 |         REAL(out)[col_idx] = freq_na;
 48 |         REAL(out)[col_idx + ncol] = allele_freq;
 49 |         REAL(out)[col_idx + (2 * ncol)] = sd;
 50 |     }
 51 |     UNPROTECT(1);
 52 |     return out;
 53 | }
 54 | 
 55 | SEXP summarize_integer(SEXP X) {
 56 |     // Get dimensions of X
 57 |     int nrow = Rf_nrows(X);
 58 |     int ncol = Rf_ncols(X);
 59 |     // Get data pointer
 60 |     int *X_data = INTEGER(X);
 61 |     // Allocate output matrix
 62 |     SEXP out = PROTECT(Rf_allocMatrix(REALSXP, ncol, 3));
 63 |     // Iterate over columns of X
 64 |     int col_idx = 0;
 65 |     int row_idx = 0;
 66 |     for (col_idx = 0; col_idx < ncol; col_idx++) {
 67 |         // Compute number of non-missing values (n), and
 68 |         // Compute column sum (xt1), and
 69 |         // Compute column sum of squares (xtx)
 70 |         R_xlen_t n = 0;
 71 |         double xt1 = 0;
 72 |         double xtx = 0;
 73 |         for (row_idx = 0; row_idx < nrow; row_idx++) {
 74 |             int x_val = X_data[row_idx + (col_idx * nrow)];
 75 |             if (x_val != NA_INTEGER) {
 76 |                 n++;
 77 |                 xt1 += x_val;
 78 |                 xtx += x_val * x_val;
 79 |             }
 80 |         }
 81 |         double freq_na;
 82 |         double allele_freq;
 83 |         double sd;
 84 |         if (n) {
 85 |             // Center xtx
 86 |             xtx -= (xt1 * xt1) / n;
 87 |             // Compute summary statistics
 88 |             freq_na = (nrow - n) / (double) nrow;
 89 |             allele_freq = xt1 / n / 2;
 90 |             sd = sqrt(xtx / (n - 1));
 91 |         } else {
 92 |             freq_na = 1;
 93 |             allele_freq = NA_REAL;
 94 |             sd = NA_REAL;
 95 |         }
 96 |         // Write results into output matrix
 97 |         REAL(out)[col_idx] = freq_na;
 98 |         REAL(out)[col_idx + ncol] = allele_freq;
 99 |         REAL(out)[col_idx + (2 * ncol)] = sd;
100 |     }
101 |     UNPROTECT(1);
102 |     return out;
103 | }
104 | 
105 | SEXP summarize(SEXP X) {
106 |     // Dispatch to real or integer function
107 |     // TODO: Macro-based generics
108 |     switch (TYPEOF(X)) {
109 |         case REALSXP:
110 |             return summarize_real(X);
111 |             break;
112 |         case INTSXP:
113 |             return summarize_integer(X);
114 |             break;
115 |         default:
116 |             Rf_error("X needs to be a numeric matrix");
117 |             break;
118 |     }
119 | }
120 | 


--------------------------------------------------------------------------------
/src/summarize.h:
--------------------------------------------------------------------------------
1 | #define R_NO_REMAP
2 | 
3 | #include <Rinternals.h>
4 | 
5 | SEXP summarize(SEXP X);
6 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(BGData)
3 | 
4 | test_check("BGData")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/helper-utils.R:
--------------------------------------------------------------------------------
 1 | library(parallel)
 2 | 
 3 | testDir <- function() {
 4 |     paste0(tempdir(), "/BGData-", BGData:::randomString(), "/")
 5 | }
 6 | 
 7 | hasCores <- function(numCores) {
 8 |     # For CRAN
 9 |     if (Sys.getenv("_R_CHECK_LIMIT_CORES_") == TRUE || numCores > parallel::detectCores()) {
10 |         skip("Not enough cores or number of cores capped for CRAN submission checks.")
11 |     }
12 |     # For WinBuilder
13 |     if (.Platform$OS.type == "windows" && numCores > 1) {
14 |         skip("mc.cores > 1 is not supported on Windows.")
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/tests/testthat/test-BGData.R:
--------------------------------------------------------------------------------
  1 | context("BGData")
  2 | 
  3 | # Create dummy path
  4 | testPath <- paste0(tempdir(), "/BGData-", BGData:::randomString(), "/")
  5 | dir.create(testPath)
  6 | 
  7 | restoreGenotypes <- function() {
  8 |     set.seed(4711)
  9 |     data <- sample(c(1, 2, 3, 4), size = nRows * nCols, replace = TRUE)
 10 |     set.seed(NULL)
 11 |     genotypes <- matrix(data = data, nrow = nRows, ncol = nCols)
 12 |     rownames(genotypes) <- paste0("1_", seq_len(nRows))
 13 |     colnames(genotypes) <- paste0("mrk_", seq_len(nCols))
 14 |     return(genotypes)
 15 | }
 16 | 
 17 | # Create example .raw files
 18 | pedPath <- paste0(testPath, "ped-", BGData:::randomString(), ".txt")
 19 | nRows <- 3
 20 | nCols <- 3
 21 | phenotypes <- data.frame(FID = c("1", "1", "1"), IID = c("1", "2", "3"), 
 22 |     PAT = c("NA", "NA", "NA"), MAT = c("NA", "NA", "NA"), SEX = c("NA", "NA", "NA"), 
 23 |     PHENOTYPE = c("NA", "NA", "NA"), stringsAsFactors = FALSE)
 24 | phenotypes[] <- lapply(phenotypes, type.convert, as.is = TRUE)
 25 | rownames(phenotypes) <- paste0("1_", 1:3)
 26 | genotypes <- restoreGenotypes()
 27 | ped <- cbind(phenotypes, genotypes)
 28 | outFile <- file(pedPath, "w")
 29 | write.table(ped, file = outFile, quote = FALSE, row.names = FALSE)
 30 | close(outFile)
 31 | 
 32 | 
 33 | context("initialize")
 34 | 
 35 | test_that("it requires at least geno", {
 36 |     expect_error(BGData())
 37 | })
 38 | 
 39 | test_that("it checks if pheno is a data.frame", {
 40 |     expect_error(BGData(geno = genotypes, pheno = rownames(genotypes)))
 41 | })
 42 | 
 43 | test_that("it checks if map is a data.frame", {
 44 |     expect_error(BGData(geno = genotypes, map = colnames(genotypes)))
 45 | })
 46 | 
 47 | test_that("it checks if the number of rows of geno match with the number of rows of pheno", {
 48 |     expect_error(BGData(geno = genotypes, pheno = phenotypes[-1, ]))
 49 | })
 50 | 
 51 | test_that("it checks if the number of rows of geno match with the number of rows of pheno", {
 52 |     map <- data.frame(mrk = colnames(genotypes))
 53 |     expect_error(BGData(geno = genotypes, map = map[-1, ]))
 54 | })
 55 | 
 56 | test_that("it checks if the rownames of geno are unique", {
 57 |     rownames(genotypes) <- c("1_1", "1_2", "1_2")
 58 |     expect_error(BGData(geno = genotypes))
 59 |     genotypes <- restoreGenotypes()
 60 | })
 61 | 
 62 | test_that("it checks if the colnames of geno are unique", {
 63 |     colnames(genotypes) <- c("mrk_1", "mrk_2", "mrk_2")
 64 |     expect_error(BGData(geno = genotypes))
 65 |     genotypes <- restoreGenotypes()
 66 | })
 67 | 
 68 | test_that("it warns if the row names of pheno do not match the row names of geno", {
 69 |     expect_warning(BGData(geno = genotypes, pheno = phenotypes[nrow(phenotypes):1, ]))
 70 | })
 71 | 
 72 | test_that("it warns if the row names of map do not match the columns names of geno", {
 73 |     map <- data.frame(mrk = rev(colnames(genotypes)))
 74 |     expect_warning(BGData(geno = genotypes, map = map))
 75 | })
 76 | 
 77 | test_that("it generates a sequence as rownames for pheno if geno does not have rownames", {
 78 |     rownames(genotypes) <- NULL
 79 |     DATA <- BGData(geno = genotypes)
 80 |     expect_equal(rownames(pheno(DATA)), paste0("sample_", seq_len(nrow(pheno(DATA)))))
 81 |     genotypes <- restoreGenotypes()
 82 | })
 83 | 
 84 | test_that("it generates a sequence as rownames for map if geno does not have colnames", {
 85 |     colnames(genotypes) <- NULL
 86 |     DATA <- BGData(geno = genotypes)
 87 |     expect_equal(rownames(map(DATA)), paste0("variant_", seq_len(nrow(map(DATA)))))
 88 |     genotypes <- restoreGenotypes()
 89 | })
 90 | 
 91 | 
 92 | context("readRAW")
 93 | 
 94 | test_that("it complains if folderOut already exists", {
 95 |     dirExistsPath <- paste0(testPath, "dirExists")
 96 |     dir.create(dirExistsPath, showWarnings = FALSE)
 97 |     expect_error(readRAW(fileIn = pedPath, n = nRows, folderOut = dirExistsPath))
 98 | })
 99 | 
100 | 
101 | test_that("it reads .raw files into BGData objects", {
102 | 
103 |     # With minimum number of parameters (with exception of folderOut)
104 |     BGData <- readRAW(fileIn = pedPath, folderOut = paste0(testPath, "test-", BGData:::randomString()))
105 |     expect_equal(pheno(BGData), phenotypes)
106 |     expect_equivalent(geno(BGData)[], genotypes)
107 | 
108 |     # With n
109 |     BGData <- readRAW(fileIn = pedPath, n = nRows, folderOut = paste0(testPath, "test-", BGData:::randomString()))
110 |     expect_equal(pheno(BGData), phenotypes)
111 |     expect_equivalent(geno(BGData)[], genotypes)
112 | 
113 |     # With p
114 |     BGData <- readRAW(fileIn = pedPath, p = nCols, folderOut = paste0(testPath, "test-", BGData:::randomString()))
115 |     expect_equal(pheno(BGData), phenotypes)
116 |     expect_equivalent(geno(BGData)[], genotypes)
117 | 
118 |     # With both n and p
119 |     BGData <- readRAW(fileIn = pedPath, n = nRows, p = nCols, folderOut = paste0(testPath, "test-", BGData:::randomString()))
120 |     expect_equal(pheno(BGData), phenotypes)
121 |     expect_equivalent(geno(BGData)[], genotypes)
122 | 
123 |     # As integer
124 |     class(genotypes) <- "integer"
125 |     BGData <- readRAW(fileIn = pedPath, dataType = integer(), folderOut = paste0(testPath, "test-", BGData:::randomString()))
126 |     expect_equivalent(geno(BGData)[], genotypes)
127 |     BGData <- readRAW(fileIn = pedPath, dataType = "integer", folderOut = paste0(testPath, "test-", BGData:::randomString()))
128 |     expect_equivalent(geno(BGData)[], genotypes)
129 |     genotypes <- restoreGenotypes()
130 | 
131 |     # As double
132 |     class(genotypes) <- "double"
133 |     BGData <- readRAW(fileIn = pedPath, dataType = double(), folderOut = paste0(testPath, "test-", BGData:::randomString()))
134 |     expect_equivalent(geno(BGData)[], genotypes)
135 |     BGData <- readRAW(fileIn = pedPath, dataType = "double", folderOut = paste0(testPath, "test-", BGData:::randomString()))
136 |     expect_equivalent(geno(BGData)[], genotypes)
137 |     genotypes <- restoreGenotypes()
138 | 
139 |     # As character
140 |     expect_error(readRAW(fileIn = pedPath, dataType = character(), folderOut = paste0(testPath, "test-", BGData:::randomString())))
141 |     expect_error(readRAW(fileIn = pedPath, dataType = "character", folderOut = paste0(testPath, "test-", BGData:::randomString())))
142 | 
143 | })
144 | 
145 | 
146 | context("readRAW_matrix")
147 | 
148 | test_that("it reads a .raw file into a matrix object", {
149 | 
150 |     # With minimum number of parameters (with exception of folderOut)
151 |     BGData <- readRAW_matrix(fileIn = pedPath)
152 |     expect_equal(pheno(BGData), phenotypes)
153 |     expect_equal(geno(BGData)[], genotypes)
154 | 
155 |     # With n
156 |     BGData <- readRAW_matrix(fileIn = pedPath, n = nRows)
157 |     expect_equal(pheno(BGData), phenotypes)
158 |     expect_equal(geno(BGData)[], genotypes)
159 | 
160 |     # With p
161 |     BGData <- readRAW_matrix(fileIn = pedPath, p = nCols)
162 |     expect_equal(pheno(BGData), phenotypes)
163 |     expect_equal(geno(BGData)[], genotypes)
164 | 
165 |     # With both n and p
166 |     BGData <- readRAW_matrix(fileIn = pedPath, n = nRows, p = nCols)
167 |     expect_equal(pheno(BGData), phenotypes)
168 |     expect_equal(geno(BGData)[], genotypes)
169 | 
170 |     # As integer
171 |     class(genotypes) <- "integer"
172 |     BGData <- readRAW_matrix(fileIn = pedPath, dataType = integer())
173 |     expect_equal(geno(BGData)[], genotypes)
174 |     BGData <- readRAW_matrix(fileIn = pedPath, dataType = "integer")
175 |     expect_equal(geno(BGData)[], genotypes)
176 |     genotypes <- restoreGenotypes()
177 | 
178 |     # As double
179 |     class(genotypes) <- "double"
180 |     BGData <- readRAW_matrix(fileIn = pedPath, dataType = double())
181 |     expect_equal(geno(BGData)[], genotypes)
182 |     BGData <- readRAW_matrix(fileIn = pedPath, dataType = "double")
183 |     expect_equal(geno(BGData)[], genotypes)
184 |     genotypes <- restoreGenotypes()
185 | 
186 |     # As character
187 |     class(genotypes) <- "character"
188 |     BGData <- readRAW_matrix(fileIn = pedPath, dataType = character())
189 |     expect_equal(geno(BGData)[], genotypes)
190 |     BGData <- readRAW_matrix(fileIn = pedPath, dataType = "character")
191 |     expect_equal(geno(BGData)[], genotypes)
192 |     genotypes <- restoreGenotypes()
193 | 
194 | })
195 | 
196 | context("readRAW_big.matrix")
197 | 
198 | test_that("it reads a .raw file into a big.matrix object", {
199 | 
200 |     # With minimum number of parameters (with exception of folderOut)
201 |     BGData <- readRAW_big.matrix(fileIn = pedPath, folderOut = paste0(testPath, "test-", BGData:::randomString()))
202 |     expect_equal(pheno(BGData), phenotypes)
203 |     expect_equal(geno(BGData)[], genotypes)
204 | 
205 |     # With n
206 |     BGData <- readRAW_big.matrix(fileIn = pedPath, n = nRows, folderOut = paste0(testPath, "test-", BGData:::randomString()))
207 |     expect_equal(pheno(BGData), phenotypes)
208 |     expect_equal(geno(BGData)[], genotypes)
209 | 
210 |     # With p
211 |     BGData <- readRAW_big.matrix(fileIn = pedPath, p = nCols, folderOut = paste0(testPath, "test-", BGData:::randomString()))
212 |     expect_equal(pheno(BGData), phenotypes)
213 |     expect_equal(geno(BGData)[], genotypes)
214 | 
215 |     # With both n and p
216 |     BGData <- readRAW_big.matrix(fileIn = pedPath, n = nRows, p = nCols, folderOut = paste0(testPath, "test-", BGData:::randomString()))
217 |     expect_equal(pheno(BGData), phenotypes)
218 |     expect_equal(geno(BGData)[], genotypes)
219 | 
220 |     # As integer
221 |     class(genotypes) <- "integer"
222 |     BGData <- readRAW_big.matrix(fileIn = pedPath, dataType = integer(), folderOut = paste0(testPath, "test-", BGData:::randomString()))
223 |     expect_equal(geno(BGData)[], genotypes)
224 |     BGData <- readRAW_big.matrix(fileIn = pedPath, dataType = "integer", folderOut = paste0(testPath, "test-", BGData:::randomString()))
225 |     expect_equal(geno(BGData)[], genotypes)
226 |     genotypes <- restoreGenotypes()
227 | 
228 |     # As double
229 |     class(genotypes) <- "double"
230 |     BGData <- readRAW_big.matrix(fileIn = pedPath, dataType = double(), folderOut = paste0(testPath, "test-", BGData:::randomString()))
231 |     expect_equal(geno(BGData)[], genotypes)
232 |     BGData <- readRAW_big.matrix(fileIn = pedPath, dataType = "double", folderOut = paste0(testPath, "test-", BGData:::randomString()))
233 |     expect_equal(geno(BGData)[], genotypes)
234 |     genotypes <- restoreGenotypes()
235 | 
236 |     # As character
237 |     expect_error(readRAW(fileIn = pedPath, dataType = character(), folderOut = paste0(testPath, "test-", BGData:::randomString())))
238 |     expect_error(readRAW(fileIn = pedPath, dataType = "character", folderOut = paste0(testPath, "test-", BGData:::randomString())))
239 | 
240 | })
241 | 
242 | context("load.BGData")
243 | 
244 | test_that("it loads BGData objects created by readRAW", {
245 | 
246 |     # Create dummy BGData object without returning data
247 |     path <- paste0(testPath, "test-", BGData:::randomString())
248 |     readRAW(fileIn = pedPath, folderOut = path)
249 |     expect_true(!("BGData" %in% ls()))
250 | 
251 |     # Append BGData.RData to path
252 |     path <- paste0(path, "/", "BGData.RData")
253 | 
254 |     # Load BGData object and test if all nodes have been opened
255 |     load.BGData(path)
256 |     expect_true("BGData" %in% ls())
257 |     for (node in seq_len(LinkedMatrix::nNodes(geno(BGData)))) {
258 |         expect_true(ff::is.open(geno(BGData)[[node]]))
259 |     }
260 |     expect_equal(dim(geno(BGData)), c(nRows, nCols))
261 | 
262 | })
263 | 
264 | test_that("it loads BGData objects created by readRAW_matrix", {
265 | 
266 |     # Create dummy BGData object
267 |     path <- paste0(testPath, "test-", BGData:::randomString(), "/", "BGData.RData")
268 |     dir.create(dirname(path))
269 |     BGData <- readRAW_matrix(fileIn = pedPath)
270 |     save(BGData, file = path)
271 |     rm(BGData)
272 |     expect_true(!("BGData" %in% ls()))
273 | 
274 |     # Load BGData object
275 |     load.BGData(path)
276 |     expect_true("BGData" %in% ls())
277 |     expect_equal(dim(geno(BGData)), c(nRows, nCols))
278 | 
279 | })
280 | 
281 | test_that("it loads BGData objects created by readRAW_big.matrix", {
282 | 
283 |     # Create dummy BGData object
284 |     path <- paste0(testPath, "test-", BGData:::randomString())
285 |     readRAW_big.matrix(fileIn = pedPath, dataType = integer(), folderOut = path)
286 |     expect_true(!("BGData" %in% ls()))
287 | 
288 |     # Append BGData.RData to path
289 |     path <- paste0(path, "/", "BGData.RData")
290 | 
291 |     # Load BGData object
292 |     load.BGData(path)
293 |     expect_true("BGData" %in% ls())
294 |     expect_equal(dim(geno(BGData)), c(nRows, nCols))
295 | 
296 | })
297 | 
298 | test_that("it loads BGData objects containing a BEDMatrix object", {
299 | 
300 |     # Create dummy objects
301 |     bedMatrix <- BEDMatrix::BEDMatrix(system.file("extdata", "chr1.bed", package = "BGData"))
302 |     bedDims <- dim(bedMatrix)
303 |     bedDNames <- dimnames(bedMatrix)
304 |     bedRow <- bedMatrix[1, ]
305 |     BGData <- BGData(geno = bedMatrix)
306 | 
307 |     # Save BGData object
308 |     path <- paste0(testPath, "test-", BGData:::randomString(), "/", "BGData.RData")
309 |     dir.create(dirname(path))
310 |     save(BGData, file = path)
311 |     rm(BGData)
312 |     expect_true(!("BGData" %in% ls()))
313 | 
314 |     # Load BGData object
315 |     load.BGData(path)
316 |     expect_true("BGData" %in% ls())
317 |     expect_equal(dim(geno(BGData)), bedDims)
318 |     expect_equal(dimnames(geno(BGData)), bedDNames)
319 |     expect_equal(geno(BGData)[1, ], bedRow)
320 | 
321 | })
322 | 
323 | context("as.BGData")
324 | 
325 | test_that("it converts a regular BEDMatrix object to a BGData object", {
326 |     bedMatrix <- BEDMatrix::BEDMatrix(system.file("extdata", "chr1.bed", package = "BGData"))
327 |     bgData <- as.BGData(bedMatrix)
328 |     expect_is(bgData, "BGData")
329 |     expect_equal(dim(geno(bgData)), dim(bedMatrix))
330 |     expect_equal(nrow(pheno(bgData)), nrow(bedMatrix))
331 |     expect_equal(rownames(pheno(bgData)), rownames(bedMatrix))
332 |     expect_equal(nrow(map(bgData)), ncol(bedMatrix))
333 |     expect_equal(rownames(map(bgData)), colnames(bedMatrix))
334 | })
335 | 
336 | test_that("it converts a BEDMatrix object created with the n parameter to a BGData object", {
337 |     bedMatrix <- BEDMatrix::BEDMatrix(system.file("extdata", "chr1.bed", package = "BGData"), n = 199)
338 |     bgData <- as.BGData(bedMatrix)
339 |     expect_is(bgData, "BGData")
340 |     expect_equal(dim(geno(bgData)), dim(bedMatrix))
341 |     expect_equal(nrow(pheno(bgData)), nrow(bedMatrix))
342 |     expect_equal(nrow(map(bgData)), ncol(bedMatrix))
343 |     expect_equal(rownames(map(bgData)), colnames(bedMatrix))
344 | })
345 | 
346 | test_that("it converts a BEDMatrix object created with the p parameter to a BGData object", {
347 |     bedMatrix <- BEDMatrix::BEDMatrix(system.file("extdata", "chr1.bed", package = "BGData"), p = 300)
348 |     bgData <- as.BGData(bedMatrix)
349 |     expect_is(bgData, "BGData")
350 |     expect_equal(dim(geno(bgData)), dim(bedMatrix))
351 |     expect_equal(nrow(pheno(bgData)), nrow(bedMatrix))
352 |     expect_equal(rownames(pheno(bgData)), rownames(bedMatrix))
353 |     expect_equal(nrow(map(bgData)), ncol(bedMatrix))
354 | })
355 | 
356 | test_that("it converts a BEDMatrix object created with the n and p parameters to a BGData object", {
357 |     bedMatrix <- BEDMatrix::BEDMatrix(system.file("extdata", "chr1.bed", package = "BGData"), n = 199, p = 300)
358 |     bgData <- as.BGData(bedMatrix)
359 |     expect_is(bgData, "BGData")
360 |     expect_equal(dim(geno(bgData)), dim(bedMatrix))
361 |     expect_equal(nrow(pheno(bgData)), nrow(bedMatrix))
362 |     expect_equal(nrow(map(bgData)), ncol(bedMatrix))
363 | })
364 | 
365 | test_that("it throws an error if an alternate phenotype file does not exist when converting a BEDMatrix object to a BGData object", {
366 |     bedMatrix <- BEDMatrix::BEDMatrix(system.file("extdata", "chr1.bed", package = "BGData"))
367 |     expect_error(as.BGData(bedMatrix, alternatePhenotypeFile = "NOT_FOUND"))
368 | })
369 | 
370 | test_that("it reads an alternate phenotype file when converting a BEDMatrix object to a BGData object", {
371 |     bedMatrix <- BEDMatrix::BEDMatrix(system.file("extdata", "chr1.bed", package = "BGData"))
372 |     bgData <- as.BGData(bedMatrix, alternatePhenotypeFile = system.file("extdata", "pheno.txt", package = "BGData"))
373 |     expect_is(bgData, "BGData")
374 |     # Test if pheno has an extra column for the phenotype
375 |     expect_equal(ncol(pheno(bgData)), 7)
376 |     # Test merging and NA handling
377 |     expect_equal(pheno(bgData)[1, 7], 57.0)
378 |     expect_equal(nrow(pheno(bgData)), nrow(geno(bgData)))
379 |     expect_true(all(is.na(pheno(bgData)[c(178, 180, 189, 190, 196), 7])))
380 |     # Test if rownames are retained
381 |     expect_equal(rownames(pheno(bgData)), rownames(bedMatrix))
382 | })
383 | 


--------------------------------------------------------------------------------
/tests/testthat/test-GWAS.R:
--------------------------------------------------------------------------------
  1 | context("GWAS")
  2 | 
  3 | set.seed(1)
  4 | 
  5 | nRows <- 15
  6 | nCols <- 50
  7 | percentNA <- 0.1
  8 | 
  9 | lm_test <- function(X, y, covariates = NULL) {
 10 |     res <- apply(X, 2, function(x) {
 11 |         data <- data.frame(
 12 |             y = y,
 13 |             x = x
 14 |         )
 15 |         if (!is.null(covariates)) {
 16 |             data <- cbind(data, covariates)
 17 |         }
 18 |         fm <- lm(y ~ ., data = data)
 19 |         coefficients(summary(fm))[2, ]
 20 |     })
 21 |     res <- t(res)
 22 |     rownames(res) <- colnames(X)
 23 |     return(res)
 24 | }
 25 | 
 26 | lsfit_test <- function(X, y, covariates = NULL) {
 27 |     res <- apply(X, 2, function(x) {
 28 |         fm <- lsfit(x = cbind(x, covariates), y = y)
 29 |         ls.print(fm, print.it = FALSE)$coef.table[[1]][2, ]
 30 |     })
 31 |     res <- t(res)
 32 |     rownames(res) <- colnames(X)
 33 |     return(res)
 34 | }
 35 | 
 36 | test_that("GWAS without covariates", {
 37 | 
 38 |     for (mode in c("integer", "double")) {
 39 | 
 40 |         X <- matrix(data = rnorm(nRows * nCols, sd = 100), nrow = nRows, ncol = nCols)
 41 |         X[sample(seq_along(X), size = ceiling(length(X) * percentNA))] <- NA
 42 |         storage.mode(X) <- mode
 43 | 
 44 |         y <- rnorm(nRows, sd = 100)
 45 |         y[sample(seq_along(y), size = ceiling(length(y) * percentNA))] <- NA
 46 | 
 47 |         lsfit_res <- suppressWarnings(lsfit_test(X, y))
 48 |         lm_res <- lm_test(X, y)
 49 | 
 50 |         DATA <- BGData(geno = X, pheno = data.frame(
 51 |             y = y
 52 |         ))
 53 | 
 54 |         for (method in c("rayOLS", "lsfit", "lm")) {
 55 | 
 56 |             for (nCores in seq_len(2)) {
 57 | 
 58 |                 hasCores(nCores)
 59 | 
 60 |                 GWAS_res <- suppressWarnings(GWAS(formula = y ~ 1, data = DATA, method = method, nCores = nCores))
 61 | 
 62 |                 expect_equivalent(GWAS_res[, 1:4], lsfit_res)
 63 |                 expect_equivalent(GWAS_res[, 1:4], lm_res)
 64 | 
 65 |             }
 66 | 
 67 |         }
 68 | 
 69 |     }
 70 | 
 71 | })
 72 | 
 73 | test_that("GWAS with covariates", {
 74 | 
 75 |     for (mode in c("integer", "double")) {
 76 | 
 77 |         X <- matrix(data = rnorm(nRows * nCols, sd = 100), nrow = nRows, ncol = nCols)
 78 | 
 79 |         PCs <- svd(X, nu = 2, nv = 0)$u
 80 |         colnames(PCs) <- c("pc1", "pc2")
 81 |         PCs[sample(seq_along(PCs), size = ceiling(length(PCs) * percentNA))] <- NA
 82 | 
 83 |         X[sample(seq_along(X), size = ceiling(length(X) * percentNA))] <- NA
 84 |         storage.mode(X) <- mode
 85 | 
 86 |         y <- rnorm(nRows, sd = 100)
 87 |         y[sample(seq_along(y), size = ceiling(length(y) * percentNA))] <- NA
 88 | 
 89 |         lsfit_res <- suppressWarnings(lsfit_test(X, y, PCs))
 90 |         lm_res <- lm_test(X, y, PCs)
 91 | 
 92 |         DATA <- BGData(geno = X, pheno = data.frame(
 93 |             y = y,
 94 |             pc1 = PCs[, 1],
 95 |             pc2 = PCs[, 2]
 96 |         ))
 97 | 
 98 |         for (method in c("lsfit", "lm")) {
 99 | 
100 |             for (nCores in seq_len(2)) {
101 | 
102 |                 hasCores(nCores)
103 | 
104 |                 GWAS_res <- suppressWarnings(GWAS(formula = y ~ pc1 + pc2, data = DATA, method = method, nCores = nCores))
105 | 
106 |                 expect_equivalent(GWAS_res, lsfit_res)
107 |                 expect_equivalent(GWAS_res, lm_res)
108 | 
109 |             }
110 | 
111 |         }
112 | 
113 |     }
114 | 
115 | })
116 | 


--------------------------------------------------------------------------------
/tests/testthat/test-chunkedApply.R:
--------------------------------------------------------------------------------
 1 | context("chunkedApply")
 2 | 
 3 | set.seed(1)
 4 | 
 5 | nRows <- 5
 6 | nCols <- 10
 7 | nNAs <- 5
 8 | 
 9 | X <- matrix(data = rnorm(nRows * nCols, sd = 100), nrow = nRows, ncol = nCols)
10 | X[sample(1:length(X), size = nNAs)] <- NA
11 | 
12 | test_that("chunkedMap", {
13 | 
14 |     for (nCores in seq_len(2)) {
15 | 
16 |         hasCores(nCores)
17 | 
18 |         for (chunkSize in c(5, 10)) {
19 | 
20 |             expect_equal(unlist(chunkedMap(X = X, FUN = rowSums, chunkBy = 1, chunkSize = chunkSize, nCores = nCores)), rowSums(X))
21 |             expect_equal(unlist(chunkedMap(X = X, FUN = colSums, chunkSize = chunkSize, nCores = nCores)), colSums(X))
22 | 
23 |             expect_equal(unlist(chunkedMap(X = X, FUN = rowSums, chunkBy = 1, i = c(1, 3), chunkSize = chunkSize, nCores = nCores)), rowSums(X[c(1, 3), ]))
24 |             expect_equal(unlist(chunkedMap(X = X, FUN = colSums, i = c(1, 3), chunkSize = chunkSize, nCores = nCores)), colSums(X[c(1, 3), ]))
25 | 
26 |             expect_equal(unlist(chunkedMap(X = X, FUN = rowSums, chunkBy = 1, j = c(1, 3, 5), chunkSize = chunkSize, nCores = nCores)), rowSums(X[, c(1, 3, 5)]))
27 |             expect_equal(unlist(chunkedMap(X = X, FUN = colSums, j = c(1, 3, 5), chunkSize = chunkSize, nCores = nCores)), colSums(X[, c(1, 3, 5)]))
28 | 
29 |             expect_equal(unlist(chunkedMap(X = X, FUN = rowSums, chunkBy = 1, i = c(1, 3), j = c(1, 3, 5), chunkSize = chunkSize, nCores = nCores)), rowSums(X[c(1, 3), c(1, 3, 5)]))
30 |             expect_equal(unlist(chunkedMap(X = X, FUN = colSums, i = c(1, 3), j = c(1, 3, 5), chunkSize = chunkSize, nCores = nCores)), colSums(X[c(1, 3), c(1, 3, 5)]))
31 | 
32 |         }
33 |     }
34 | 
35 | })
36 | 
37 | test_that("chunkedApply", {
38 | 
39 |     for (nCores in seq_len(2)) {
40 | 
41 |         hasCores(nCores)
42 | 
43 |         for (chunkSize in c(5, 10)) {
44 | 
45 |             expect_equal(chunkedApply(X = X, MARGIN = 1, FUN = sum, chunkSize = chunkSize, nCores = nCores), apply(X, 1, sum))
46 |             expect_equal(chunkedApply(X = X, MARGIN = 2, FUN = sum, chunkSize = chunkSize, nCores = nCores), apply(X, 2, sum))
47 | 
48 |             expect_equal(chunkedApply(X = X, MARGIN = 1, FUN = sum, i = c(1, 3), chunkSize = chunkSize, nCores = nCores), apply(X[c(1, 3), ], 1, sum))
49 |             expect_equal(chunkedApply(X = X, MARGIN = 2, FUN = sum, i = c(1, 3), chunkSize = chunkSize, nCores = nCores), apply(X[c(1, 3), ], 2, sum))
50 | 
51 |             expect_equal(chunkedApply(X = X, MARGIN = 1, FUN = sum, j = c(1, 3, 5), chunkSize = chunkSize, nCores = nCores), apply(X[, c(1, 3, 5)], 1, sum))
52 |             expect_equal(chunkedApply(X = X, MARGIN = 2, FUN = sum, j = c(1, 3, 5), chunkSize = chunkSize, nCores = nCores), apply(X[, c(1, 3, 5)], 2, sum))
53 | 
54 |             expect_equal(chunkedApply(X = X, MARGIN = 1, FUN = sum, i = c(1, 3), j = c(1, 3, 5), chunkSize = chunkSize, nCores = nCores), apply(X[c(1, 3), c(1, 3, 5)], 1, sum))
55 |             expect_equal(chunkedApply(X = X, MARGIN = 2, FUN = sum, i = c(1, 3), j = c(1, 3, 5), chunkSize = chunkSize, nCores = nCores), apply(X[c(1, 3), c(1, 3, 5)], 2, sum))
56 | 
57 |         }
58 | 
59 |     }
60 | 
61 | })
62 | 


--------------------------------------------------------------------------------
/tests/testthat/test-getG.R:
--------------------------------------------------------------------------------
  1 | context("getG")
  2 | 
  3 | for (nCores in seq_len(2)) {
  4 | 
  5 |     test_that(paste("getGi", "on", nCores, "cores"), {
  6 | 
  7 |         hasCores(nCores)
  8 | 
  9 |         n <- 10
 10 |         p <- 100
 11 |         X <- matrix(data = rnorm(n * p), nrow = n, ncol = p)
 12 | 
 13 |         for (chunkSize in c(NULL, p, ceiling(p / 3))) {
 14 | 
 15 |             # both scalings
 16 |             G <- tcrossprod(scale(X))
 17 |             G <- G / mean(diag(G))
 18 |             G2 <- getG(X = X, scale = TRUE, scaleG = TRUE, chunkSize = chunkSize, nCores = nCores)
 19 |             expect_equivalent(G, G2)
 20 | 
 21 |             # without scaling to average diagonal = 1 (scaleG)
 22 |             G <- tcrossprod(scale(X))
 23 |             G2 <- getG(X = X, scale = TRUE, scaleG = FALSE, chunkSize = chunkSize, nCores = nCores)
 24 |             expect_equivalent(G, G2)
 25 | 
 26 |             # without scaling columns, but scaling average diagonal = 1 (scaleG)
 27 |             G <- tcrossprod(scale(X, center = TRUE, scale = FALSE))
 28 |             G <- G / mean(diag(G))
 29 |             G2 <- getG(X = X, scale = FALSE, scaleG = TRUE, chunkSize = chunkSize, nCores = nCores)
 30 | 
 31 |             expect_equivalent(G, G2)
 32 | 
 33 |             # no scaling at all
 34 |             G <- tcrossprod(scale(X, center = TRUE, scale = FALSE))
 35 |             G2 <- getG(X = X, scale = FALSE, scaleG = FALSE, chunkSize = chunkSize, nCores = nCores)
 36 |             expect_equivalent(G, G2)
 37 | 
 38 |             # neither scaling nor centering
 39 |             G <- tcrossprod(X)
 40 |             G2 <- getG(X = X, center = FALSE, scale = FALSE, scaleG = FALSE, chunkSize = chunkSize, nCores = nCores)
 41 |             expect_equivalent(G, G2)
 42 | 
 43 |         }
 44 | 
 45 |         X[sample(1:length(X), size = 20)] <- NA
 46 |         G <- getG(X, nCores = nCores)
 47 |         expect_true(!any(is.na(G)))
 48 | 
 49 |     })
 50 | 
 51 |     test_that(paste("getGij", "on", nCores, "cores"), {
 52 | 
 53 |         hasCores(nCores)
 54 | 
 55 |         n <- 10
 56 |         p <- 100
 57 |         X <- matrix(data = rnorm(n * p), nrow = n, ncol = p)
 58 | 
 59 |         for (chunkSize in c(NULL, p, ceiling(p / 3))) {
 60 | 
 61 |             i <- sample(1:nrow(X), size = 3)
 62 |             i2 <- sample(1:nrow(X), size = 4)
 63 | 
 64 |             centers <- colMeans(X)
 65 |             scales <- apply(X, 2, sd) * sqrt((n - 1)/n)
 66 | 
 67 |             # all scalings
 68 |             G <- tcrossprod(scale(X))
 69 |             G <- G / mean(diag(G))
 70 |             G_12 <- getG(X = X, center = centers, scale = scales, scaleG = TRUE, i = i, i2 = i2, chunkSize = chunkSize, nCores = nCores)
 71 |             expect_equivalent(G[i, i2], G_12)
 72 | 
 73 |             G_12 <- getG(X = X, center = centers, scale = scales, scaleG = TRUE, i = i, i2 = i, chunkSize = chunkSize, nCores = nCores)
 74 |             expect_equivalent(G[i, i], G_12)
 75 | 
 76 |             # without scaling to average diagonal = 1
 77 |             G <- tcrossprod(scale(X) * sqrt(n/(n - 1)))
 78 |             G_12 <- getG(X = X, center = centers, scale = scales, scaleG = FALSE, i = i, i2 = i2, chunkSize = chunkSize, nCores = nCores)
 79 |             expect_equivalent(G[i, i2], G_12)
 80 | 
 81 |             G_12 <- getG(X = X, center = centers, scale = scales, scaleG = FALSE, i = i, i2 = i, chunkSize = chunkSize, nCores = nCores)
 82 |             expect_equivalent(G[i, i], G_12)
 83 | 
 84 |             # without scaling columns, but scaling average diagonal = 1
 85 |             scales <- rep(1, ncol(X))
 86 | 
 87 |             G <- tcrossprod(scale(X, center = TRUE, scale = FALSE))
 88 |             G <- G / ncol(X)
 89 |             G_12 <- getG(X = X, center = centers, scale = scales, scaleG = TRUE, i = i, i2 = i2, chunkSize = chunkSize, nCores = nCores)
 90 |             expect_equivalent(G[i, i2], G_12)
 91 | 
 92 |             G_12 <- getG(X = X, center = centers, scale = scales, scaleG = TRUE, i = i, i2 = i, chunkSize = chunkSize, nCores = nCores)
 93 |             expect_equivalent(G[i, i], G_12)
 94 | 
 95 |             # no scaling at all
 96 |             G <- tcrossprod(scale(X, center = TRUE, scale = FALSE))
 97 |             G_12 <- getG(X = X, center = centers, scale = scales, scaleG = FALSE, i = i, i2 = i2, chunkSize = chunkSize, nCores = nCores)
 98 |             expect_equivalent(G[i, i2], G_12)
 99 | 
100 |             G_12 <- getG(X = X, center = centers, scale = scales, scaleG = FALSE, i = i, i2 = i, chunkSize = chunkSize, nCores = nCores)
101 |             expect_equivalent(G[i, i], G_12)
102 | 
103 |         }
104 |     })
105 | 
106 |     test_that(paste("getG_symDMatrix", "on", nCores, "cores"), {
107 | 
108 |         hasCores(nCores)
109 | 
110 |         W <- matrix(data = rnorm(200), nrow = 10, ncol = 20)
111 |         G1 <- tcrossprod(scale(W))
112 |         G1 <- G1 / mean(diag(G1))
113 | 
114 |         G2 <- getG_symDMatrix(X = W, blockSize = ceiling(nrow(W) / 3), folderOut = testDir(), nCores = nCores)
115 |         expect_equivalent(G2[], G1) # use equivalent to correct slight difference in NULL dimnames handling
116 | 
117 |     })
118 | 
119 | }
120 | 


--------------------------------------------------------------------------------
/tests/testthat/test-preprocess-int.R:
--------------------------------------------------------------------------------
  1 | context("preprocess for integers")
  2 | 
  3 | # Parameters
  4 | n <- 250
  5 | p <- 50
  6 | length <- n * p
  7 | nmiss <- 100
  8 | 
  9 | # Data
 10 | set.seed(4711)
 11 | X <- sample(0:9, size = length, replace = TRUE)
 12 | dim(X) <- c(n, p)
 13 | missing <- sample(seq_len(length), size = nmiss)
 14 | X[missing] <- NA
 15 | 
 16 | centers <- colMeans(X, na.rm = TRUE)
 17 | scales <- apply(X, 2, sd, na.rm = TRUE)
 18 | 
 19 | # No operation
 20 | expect_equal(
 21 |     scale(X, center = FALSE, scale = FALSE),
 22 |     preprocess(X, center = FALSE, scale = FALSE, impute = FALSE)
 23 | )
 24 | 
 25 | # Tests without imputation
 26 | 
 27 | # Compute centers and scales
 28 | expect_equal(
 29 |     scale(X, center = TRUE, scale = TRUE),
 30 |     preprocess(X, center = TRUE, scale = TRUE, impute = FALSE)
 31 | )
 32 | expect_equal(
 33 |     scale(X, center = TRUE, scale = FALSE),
 34 |     preprocess(X, center = TRUE, scale = FALSE, impute = FALSE)
 35 | )
 36 | expect_equal(
 37 |     scale(X, center = FALSE, scale = scales), # scale() uses root mean squares if 'center = FALSE'
 38 |     preprocess(X, center = FALSE, scale = TRUE, impute = FALSE)
 39 | )
 40 | 
 41 | # Provide own centers and scales
 42 | expect_equal(
 43 |     scale(X, center = centers, scale = scales),
 44 |     preprocess(X, center = centers, scale = scales, impute = FALSE)
 45 | )
 46 | expect_equal(
 47 |     scale(X, center = centers, scale = FALSE),
 48 |     preprocess(X, center = centers, scale = FALSE, impute = FALSE)
 49 | )
 50 | expect_equal(
 51 |     scale(X, center = FALSE, scale = scales),
 52 |     preprocess(X, center = FALSE, scale = scales, impute = FALSE)
 53 | )
 54 | 
 55 | # Provide own centers, compute scales
 56 | expect_equal(
 57 |     scale(X, center = centers, scale = TRUE),
 58 |     preprocess(X, center = centers, scale = TRUE, impute = FALSE)
 59 | )
 60 | 
 61 | # Provide own scales, compute centers
 62 | expect_equal(
 63 |     scale(X, center = TRUE, scale = scales),
 64 |     preprocess(X, center = TRUE, scale = scales, impute = FALSE)
 65 | )
 66 | 
 67 | 
 68 | # Tests with imputation
 69 | 
 70 | # center = TRUE and impute = TRUE means impute by 0
 71 | expect_equal(
 72 |     {
 73 |         W <- scale(X, center = TRUE, scale = FALSE)
 74 |         W[missing] <- 0
 75 |         W
 76 |     },
 77 |     preprocess(X, center = TRUE, scale = FALSE, impute = TRUE)
 78 | )
 79 | 
 80 | # Given centers and impute = TRUE means impute by 0
 81 | expect_equal(
 82 |     {
 83 |         W <- scale(X, center = centers, scale = FALSE)
 84 |         W[missing] <- 0
 85 |         W
 86 |     },
 87 |     preprocess(X, center = centers, scale = FALSE, impute = TRUE)
 88 | )
 89 | 
 90 | # center = FALSE and impute = TRUE means impute by mean
 91 | expect_equal(
 92 |     {
 93 |         means <- rep(colMeans(X, na.rm = TRUE), each = n)
 94 |         W <- X
 95 |         W[missing] <- means[missing]
 96 |         W
 97 |     },
 98 |     preprocess(X, center = FALSE, scale = FALSE, impute = TRUE)
 99 | )
100 | 


--------------------------------------------------------------------------------
/tests/testthat/test-preprocess-real.R:
--------------------------------------------------------------------------------
  1 | context("preprocess for real numbers")
  2 | 
  3 | # Parameters
  4 | n <- 250
  5 | p <- 50
  6 | length <- n * p
  7 | nmiss <- 100
  8 | 
  9 | # Data
 10 | set.seed(4711)
 11 | X <- rnorm(length)
 12 | dim(X) <- c(n, p)
 13 | missing <- sample(seq_len(length), size = nmiss)
 14 | X[missing] <- NA
 15 | 
 16 | centers <- colMeans(X, na.rm = TRUE)
 17 | scales <- apply(X, 2, sd, na.rm = TRUE)
 18 | 
 19 | # No operation
 20 | expect_equal(
 21 |     scale(X, center = FALSE, scale = FALSE),
 22 |     preprocess(X, center = FALSE, scale = FALSE, impute = FALSE)
 23 | )
 24 | 
 25 | # Tests without imputation
 26 | 
 27 | # Compute centers and scales
 28 | expect_equal(
 29 |     scale(X, center = TRUE, scale = TRUE),
 30 |     preprocess(X, center = TRUE, scale = TRUE, impute = FALSE)
 31 | )
 32 | expect_equal(
 33 |     scale(X, center = TRUE, scale = FALSE),
 34 |     preprocess(X, center = TRUE, scale = FALSE, impute = FALSE)
 35 | )
 36 | expect_equal(
 37 |     scale(X, center = FALSE, scale = scales), # scale() uses root mean squares if 'center = FALSE'
 38 |     preprocess(X, center = FALSE, scale = TRUE, impute = FALSE)
 39 | )
 40 | 
 41 | # Provide own centers and scales
 42 | expect_equal(
 43 |     scale(X, center = centers, scale = scales),
 44 |     preprocess(X, center = centers, scale = scales, impute = FALSE)
 45 | )
 46 | expect_equal(
 47 |     scale(X, center = centers, scale = FALSE),
 48 |     preprocess(X, center = centers, scale = FALSE, impute = FALSE)
 49 | )
 50 | expect_equal(
 51 |     scale(X, center = FALSE, scale = scales),
 52 |     preprocess(X, center = FALSE, scale = scales, impute = FALSE)
 53 | )
 54 | 
 55 | # Provide own centers, compute scales
 56 | expect_equal(
 57 |     scale(X, center = centers, scale = TRUE),
 58 |     preprocess(X, center = centers, scale = TRUE, impute = FALSE)
 59 | )
 60 | 
 61 | # Provide own scales, compute centers
 62 | expect_equal(
 63 |     scale(X, center = TRUE, scale = scales),
 64 |     preprocess(X, center = TRUE, scale = scales, impute = FALSE)
 65 | )
 66 | 
 67 | 
 68 | # Tests with imputation
 69 | 
 70 | # center = TRUE and impute = TRUE means impute by 0
 71 | expect_equal(
 72 |     {
 73 |         W <- scale(X, center = TRUE, scale = FALSE)
 74 |         W[missing] <- 0
 75 |         W
 76 |     },
 77 |     preprocess(X, center = TRUE, scale = FALSE, impute = TRUE)
 78 | )
 79 | 
 80 | # Given centers and impute = TRUE means impute by 0
 81 | expect_equal(
 82 |     {
 83 |         W <- scale(X, center = centers, scale = FALSE)
 84 |         W[missing] <- 0
 85 |         W
 86 |     },
 87 |     preprocess(X, center = centers, scale = FALSE, impute = TRUE)
 88 | )
 89 | 
 90 | # center = FALSE and impute = TRUE means impute by mean
 91 | expect_equal(
 92 |     {
 93 |         means <- rep(colMeans(X, na.rm = TRUE), each = n)
 94 |         W <- X
 95 |         W[missing] <- means[missing]
 96 |         W
 97 |     },
 98 |     preprocess(X, center = FALSE, scale = FALSE, impute = TRUE)
 99 | )
100 | 


--------------------------------------------------------------------------------
/tests/testthat/test-summarize.R:
--------------------------------------------------------------------------------
 1 | context("summarize")
 2 | 
 3 | set.seed(1)
 4 | 
 5 | nRows <- 5
 6 | nCols <- 10
 7 | percentNA <- 0.15
 8 | 
 9 | summarize_test <- function(X) {
10 |     res <- data.frame(
11 |         freq_na = vector(mode = "double", length = ncol(X)),
12 |         allele_freq = vector(mode = "double", length = ncol(X)),
13 |         sd = vector(mode = "double", length = ncol(X))
14 |     )
15 |     for (col in seq_len(ncol(X))) {
16 |         x <- X[, col]
17 |         nMissing <- sum(is.na(x))
18 |         res$freq_na[col] <- nMissing / length(x)
19 |         res$allele_freq[col] <- sum(x, na.rm = TRUE) / ((length(x) - nMissing) * 2)
20 |         res$sd[col] <- sd(x, na.rm = TRUE)
21 |     }
22 |     return(res)
23 | }
24 | 
25 | test_that("summarize", {
26 | 
27 |     for (mode in c("integer", "double")) {
28 | 
29 |         X <- matrix(data = rnorm(nRows * nCols, sd = 100), nrow = nRows, ncol = nCols)
30 |         X[sample(seq_along(X), size = as.integer(length(X) * percentNA))] <- NA
31 |         storage.mode(X) <- mode
32 | 
33 |         for (nCores in seq_len(2)) {
34 | 
35 |             hasCores(nCores)
36 | 
37 |             expect_equal(
38 |                 summarize(X, nCores = nCores),
39 |                 summarize_test(X)
40 |             )
41 | 
42 |         }
43 | 
44 |     }
45 | 
46 | })
47 | 


--------------------------------------------------------------------------------
/tests/testthat/test-utils.R:
--------------------------------------------------------------------------------
 1 | context("utils")
 2 | 
 3 | test_that("normalizeType", {
 4 | 
 5 |     expect_equal(typeof(BGData:::normalizeType("double")), "double")
 6 |     expect_equal(typeof(BGData:::normalizeType(double())), "double")
 7 |     expect_equal(typeof(BGData:::normalizeType("integer")), "integer")
 8 |     expect_equal(typeof(BGData:::normalizeType(integer())), "integer")
 9 |     expect_equal(typeof(BGData:::normalizeType("character")), "character")
10 |     expect_equal(typeof(BGData:::normalizeType(character())), "character")
11 |     expect_equal(typeof(BGData:::normalizeType("complex")), "complex")
12 |     expect_equal(typeof(BGData:::normalizeType(complex())), "complex")
13 |     expect_warning(BGData:::normalizeType("test"))
14 |     expect_equal(suppressWarnings(typeof(BGData:::normalizeType("test"))), "character")
15 |     expect_equal(typeof(BGData:::normalizeType(1)), "double")
16 |     expect_equal(typeof(BGData:::normalizeType(1L)), "integer")
17 | 
18 | })
19 | 


--------------------------------------------------------------------------------