├── .Rbuildignore ├── .gitignore ├── .travis.yml ├── DESCRIPTION ├── NAMESPACE ├── R ├── RcppExports.R ├── bigmemory_methods.R ├── fastglm.R ├── fit_glm.R └── glm_methods.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── cran-comments.md ├── docs ├── articles │ ├── Using-the-fastglm-Package.html │ ├── index.html │ └── quick-usage-guide-to-the-fastglm-package.html ├── authors.html ├── docsearch.css ├── docsearch.js ├── index.html ├── link.svg ├── pkgdown.css ├── pkgdown.js ├── pkgdown.yml └── reference │ ├── deviance.html │ ├── family.html │ ├── fastglm.html │ ├── fastglmPure.html │ ├── index.html │ ├── logLik.html │ ├── predict.fastglm.html │ ├── print.html │ ├── residuals.html │ └── summary.html ├── fastglm.Rproj ├── inst └── include │ ├── bigmemory.h │ ├── glm.h │ ├── glm_base.h │ └── glm_big.h ├── man ├── deviance.Rd ├── extract-methods.Rd ├── family.Rd ├── fastglm.Rd ├── fastglmPure.Rd ├── logLik.Rd ├── predict.fastglm.Rd ├── print.Rd ├── residuals.Rd └── summary.Rd ├── paper ├── paper.bib └── paper.md ├── src ├── Makevars ├── RcppExports.cpp ├── bigmemory.cpp ├── colmin_colmax.cpp └── fit_glm_dense.cpp └── vignettes ├── .gitignore ├── gen_data-1.png └── quick-usage-guide-to-the-fastglm-package.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^docs$ 2 | ^.*\.Rproj$ 3 | ^\.Rproj\.user$ 4 | ^.*\.Rhistory$ 5 | ^cran-comments.md$ 6 | ^NEWS.md$ 7 | README.Rmd 8 | README.html 9 | ^\.travis\.yml$ 10 | ^/\.gitattributes$ 11 | ^.*\docs$ 12 | ^\_pkgdown\.yml$ 13 | docs 14 | ^_pkgdown\.yml$ 15 | ^paper$ 16 | ^.*\.*.bk$ 17 | ^.*\.*.desc$ -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | src/*.o 6 | src/*.so 7 | src/*.dll 8 | inst/doc 9 | .DS_Store 10 | 11 | *.bk 12 | *.desc -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: r 2 | cache: packages 3 | r: 4 | - release 5 | 6 | r_check_args: --as-cran 7 | 8 | warnings_are_errors: true 9 | 10 | sudo: false 11 | 12 | notifications: 13 | email: 14 | on_success: change 15 | on_failure: always 16 | 17 | os: 18 | - linux 19 | 20 | 21 | r_packages: 22 | - covr 23 | 24 | cache: packages 25 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: fastglm 2 | Type: Package 3 | Title: Fast and Stable Fitting of Generalized Linear Models using 'RcppEigen' 4 | Version: 0.0.3 5 | Authors@R: c( 6 | person("Jared", "Huling", , "jaredhuling@gmail.com", c("aut", "cre")), 7 | person("Douglas", "Bates", , , c("cph")), 8 | person("Dirk", "Eddelbuettel", , , c("cph")), 9 | person("Romain", "Francois", , , c("cph")), 10 | person("Yixuan", "Qiu", , , c("cph")) 11 | ) 12 | Maintainer: Jared Huling 13 | Description: Fits generalized linear models efficiently using 'RcppEigen'. The iteratively reweighted least squares 14 | implementation utilizes the step-halving approach of Marschner (2011) to help safeguard 15 | against convergence issues. 16 | BugReports: https://github.com/jaredhuling/fastglm/issues 17 | License: GPL (>= 2) 18 | Encoding: UTF-8 19 | Imports: 20 | Rcpp (>= 0.12.13), 21 | methods 22 | Depends: 23 | bigmemory 24 | LinkingTo: 25 | Rcpp, 26 | RcppEigen, 27 | BH, 28 | bigmemory 29 | RoxygenNote: 7.2.0 30 | Suggests: knitr, 31 | rmarkdown, 32 | glm2 33 | VignetteBuilder: knitr 34 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(deviance,fastglm) 4 | S3method(family,fastglm) 5 | S3method(fastglm,default) 6 | S3method(logLik,fastglm) 7 | S3method(predict,fastglm) 8 | S3method(print,fastglm) 9 | S3method(residuals,fastglm) 10 | S3method(summary,fastglm) 11 | export(fastglm) 12 | export(fastglmPure) 13 | import(methods) 14 | import(stats) 15 | importFrom(Rcpp,evalCpp) 16 | importFrom(bigmemory,as.big.matrix) 17 | importFrom(bigmemory,is.big.matrix) 18 | importFrom(methods,new) 19 | useDynLib(fastglm, .registration = TRUE) 20 | -------------------------------------------------------------------------------- /R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | colMax_dense <- function(A) { 5 | .Call(`_fastglm_colMax_dense`, A) 6 | } 7 | 8 | colMin_dense <- function(A) { 9 | .Call(`_fastglm_colMin_dense`, A) 10 | } 11 | 12 | fit_glm <- function(x, y, weights, offset, start, mu, eta, var, mu_eta, linkinv, dev_resids, valideta, validmu, type, tol, maxit) { 13 | .Call(`_fastglm_fit_glm`, x, y, weights, offset, start, mu, eta, var, mu_eta, linkinv, dev_resids, valideta, validmu, type, tol, maxit) 14 | } 15 | 16 | fit_big_glm <- function(x, y, weights, offset, start, mu, eta, var, mu_eta, linkinv, dev_resids, valideta, validmu, type, tol, maxit) { 17 | .Call(`_fastglm_fit_big_glm`, x, y, weights, offset, start, mu, eta, var, mu_eta, linkinv, dev_resids, valideta, validmu, type, tol, maxit) 18 | } 19 | 20 | -------------------------------------------------------------------------------- /R/bigmemory_methods.R: -------------------------------------------------------------------------------- 1 | big.crossprod <- function(x) 2 | { 3 | if (!is.big.matrix(x)) 4 | { 5 | stop("object must be a big.matrix object") 6 | } 7 | .Call("crossprod_big", x@address) 8 | } 9 | 10 | big.colSums <- function(x) 11 | { 12 | if (!is.big.matrix(x)) 13 | { 14 | stop("object must be a big.matrix object") 15 | } 16 | .Call("colsums_big", x@address) 17 | } 18 | 19 | big.colMax <- function(x) 20 | { 21 | if (!is.big.matrix(x)) 22 | { 23 | stop("object must be a big.matrix object") 24 | } 25 | .Call("colmax_big", x@address) 26 | } 27 | 28 | big.colMin <- function(x) 29 | { 30 | if (!is.big.matrix(x)) 31 | { 32 | stop("object must be a big.matrix object") 33 | } 34 | .Call("colmin_big", x@address) 35 | } 36 | 37 | 38 | #' big.matrix prod 39 | #' 40 | #' @param x big.matrix 41 | #' @param y numeric vector 42 | #' @docType methods 43 | #' @import methods 44 | #' @importFrom methods new 45 | #' @importFrom bigmemory as.big.matrix is.big.matrix 46 | #' @rdname extract-methods 47 | setMethod("%*%",signature(x="big.matrix", y="vector"), 48 | function(x, y) 49 | { 50 | if(dim(x)[2] != length(y)) stop("non-conformant matrices") 51 | return( .Call("prod_vec_big", x@address, y) ) 52 | }, 53 | valueClass="vector" 54 | ) 55 | 56 | #' big.matrix prod 57 | #' 58 | #' @docType methods 59 | #' @rdname extract-methods 60 | setMethod("%*%",signature(x="vector", y="big.matrix"), 61 | function(x, y) 62 | { 63 | if(dim(y)[2] != length(x)) stop("non-conformant matrices") 64 | return( .Call("prod_vec_big_right", x, y@address) ) 65 | }, 66 | valueClass="vector" 67 | ) 68 | -------------------------------------------------------------------------------- /R/fastglm.R: -------------------------------------------------------------------------------- 1 | #' @importFrom Rcpp evalCpp 2 | #' @import stats 3 | #' @useDynLib fastglm, .registration = TRUE 4 | NULL -------------------------------------------------------------------------------- /R/fit_glm.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #' fast generalized linear model fitting 4 | #' 5 | #' @param x input model matrix. Must be a matrix object 6 | #' @param y numeric response vector of length nobs. 7 | #' @param family a description of the error distribution and link function to be used in the model. 8 | #' For \code{fastglmPure} this can only be the result of a call to a family function. 9 | #' (See \code{\link[stats]{family}} for details of family functions.) 10 | #' @param weights an optional vector of 'prior weights' to be used in the fitting process. Should be a numeric vector. 11 | #' @param offset this can be used to specify an a priori known component to be included in the linear predictor during fitting. 12 | #' This should be a numeric vector of length equal to the number of cases 13 | #' @param start starting values for the parameters in the linear predictor. 14 | #' @param etastart starting values for the linear predictor. 15 | #' @param mustart values for the vector of means. 16 | #' @param method an integer scalar with value 0 for the column-pivoted QR decomposition, 1 for the unpivoted QR decomposition, 17 | #' 2 for the LLT Cholesky, 3 for the LDLT Cholesky, 4 for the full pivoted QR decomposition, 5 for the Bidiagonal Divide and 18 | #' Conquer SVD 19 | #' @param tol threshold tolerance for convergence. Should be a positive real number 20 | #' @param maxit maximum number of IRLS iterations. Should be an integer 21 | #' @return A list with the elements 22 | #' \item{coefficients}{a vector of coefficients} 23 | #' \item{se}{a vector of the standard errors of the coefficient estimates} 24 | #' \item{rank}{a scalar denoting the computed rank of the model matrix} 25 | #' \item{df.residual}{a scalar denoting the degrees of freedom in the model} 26 | #' \item{residuals}{the vector of residuals} 27 | #' \item{s}{a numeric scalar - the root mean square for residuals} 28 | #' \item{fitted.values}{the vector of fitted values} 29 | #' @export 30 | #' @examples 31 | #' 32 | #' set.seed(1) 33 | #' x <- matrix(rnorm(1000 * 25), ncol = 25) 34 | #' eta <- 0.1 + 0.25 * x[,1] - 0.25 * x[,3] + 0.75 * x[,5] -0.35 * x[,6] #0.25 * x[,1] - 0.25 * x[,3] 35 | #' y <- 1 * (eta > rnorm(1000)) 36 | #' 37 | #' yp <- rpois(1000, eta ^ 2) 38 | #' yg <- rgamma(1000, exp(eta) * 1.75, 1.75) 39 | #' 40 | #' # binomial 41 | #' system.time(gl1 <- glm.fit(x, y, family = binomial())) 42 | #' 43 | #' system.time(gf1 <- fastglmPure(x, y, family = binomial(), tol = 1e-8)) 44 | #' 45 | #' system.time(gf2 <- fastglmPure(x, y, family = binomial(), method = 1, tol = 1e-8)) 46 | #' 47 | #' system.time(gf3 <- fastglmPure(x, y, family = binomial(), method = 2, tol = 1e-8)) 48 | #' 49 | #' system.time(gf4 <- fastglmPure(x, y, family = binomial(), method = 3, tol = 1e-8)) 50 | #' 51 | #' max(abs(coef(gl1) - gf1$coef)) 52 | #' max(abs(coef(gl1) - gf2$coef)) 53 | #' max(abs(coef(gl1) - gf3$coef)) 54 | #' max(abs(coef(gl1) - gf4$coef)) 55 | #' 56 | #' # poisson 57 | #' system.time(gl1 <- glm.fit(x, yp, family = poisson(link = "log"))) 58 | #' 59 | #' system.time(gf1 <- fastglmPure(x, yp, family = poisson(link = "log"), tol = 1e-8)) 60 | #' 61 | #' system.time(gf2 <- fastglmPure(x, yp, family = poisson(link = "log"), method = 1, tol = 1e-8)) 62 | #' 63 | #' system.time(gf3 <- fastglmPure(x, yp, family = poisson(link = "log"), method = 2, tol = 1e-8)) 64 | #' 65 | #' system.time(gf4 <- fastglmPure(x, yp, family = poisson(link = "log"), method = 3, tol = 1e-8)) 66 | #' 67 | #' max(abs(coef(gl1) - gf1$coef)) 68 | #' max(abs(coef(gl1) - gf2$coef)) 69 | #' max(abs(coef(gl1) - gf3$coef)) 70 | #' max(abs(coef(gl1) - gf4$coef)) 71 | #' 72 | #' # gamma 73 | #' system.time(gl1 <- glm.fit(x, yg, family = Gamma(link = "log"))) 74 | #' 75 | #' system.time(gf1 <- fastglmPure(x, yg, family = Gamma(link = "log"), tol = 1e-8)) 76 | #' 77 | #' system.time(gf2 <- fastglmPure(x, yg, family = Gamma(link = "log"), method = 1, tol = 1e-8)) 78 | #' 79 | #' system.time(gf3 <- fastglmPure(x, yg, family = Gamma(link = "log"), method = 2, tol = 1e-8)) 80 | #' 81 | #' system.time(gf4 <- fastglmPure(x, yg, family = Gamma(link = "log"), method = 3, tol = 1e-8)) 82 | #' 83 | #' max(abs(coef(gl1) - gf1$coef)) 84 | #' max(abs(coef(gl1) - gf2$coef)) 85 | #' max(abs(coef(gl1) - gf3$coef)) 86 | #' max(abs(coef(gl1) - gf4$coef)) 87 | #' 88 | fastglmPure <- function(x, y, 89 | family = gaussian(), 90 | weights = rep(1, NROW(y)), 91 | offset = rep(0, NROW(y)), 92 | start = NULL, 93 | etastart = NULL, 94 | mustart = NULL, 95 | method = 0L, 96 | tol = 1e-7, 97 | maxit = 100L) 98 | { 99 | weights <- as.vector(weights) 100 | offset <- as.vector(offset) 101 | 102 | if (is.big.matrix(x)) 103 | { 104 | is_big_matrix <- TRUE 105 | if (method != 2 & method != 3) 106 | { 107 | method <- 3 108 | warning("for big.matrix objects, 'method' must either be 2 (for LLT) or 3 (for LDLT) -- 'method' changed to 3.") 109 | } 110 | } else if (is.matrix(x)) 111 | { 112 | is_big_matrix <- FALSE 113 | } else 114 | { 115 | stop("x must be either a matrix or a big.matrix object") 116 | } 117 | 118 | 119 | stopifnot(is.numeric(y), 120 | is.numeric(weights), 121 | is.numeric(offset), 122 | NROW(y) == nrow(x), 123 | NROW(y) == NROW(weights), 124 | NROW(y) == NROW(offset), 125 | is.numeric(method), 126 | is.numeric(tol), 127 | is.numeric(maxit), 128 | tol[1] > 0, 129 | maxit[1] > 0 130 | ) 131 | 132 | nobs <- n <- NROW(y) 133 | nvars <- NCOL(x) 134 | if(is.null(family$family)) 135 | { 136 | print(family) 137 | stop("'family' not recognized") 138 | } 139 | 140 | if( any(weights < 0) ) stop("negative weights not allowed") 141 | 142 | if (method[1] > 5L || method[1] < 0) 143 | { 144 | stop("Invalid decomposition method specified. Choose from 0, 1, 2, 3, 4, or 5.") 145 | } 146 | 147 | cnames <- colnames(x) 148 | 149 | # from glm 150 | variance <- family$variance 151 | dev.resids <- family$dev.resids 152 | aic <- family$aic 153 | linkinv <- family$linkinv 154 | mu.eta <- family$mu.eta 155 | 156 | unless.null <- function(x, if.null) if(is.null(x)) if.null else x 157 | valideta <- unless.null(family$valideta, function(eta) TRUE) 158 | validmu <- unless.null(family$validmu, function(mu) TRUE) 159 | 160 | 161 | if(is.null(mustart)) 162 | { 163 | ## calculates mustart and may change y and weights and set n (!) 164 | eval(family$initialize) 165 | } else 166 | { 167 | mukeep <- mustart 168 | eval(family$initialize) 169 | mustart <- mukeep 170 | } 171 | 172 | y <- as.numeric(y) 173 | 174 | coefold <- NULL 175 | eta <- 176 | if(!is.null(etastart)) { 177 | etastart 178 | } else if(!is.null(start)) 179 | { 180 | if (length(start) != nvars) 181 | { 182 | stop(gettextf("length of 'start' should equal %d", nvars), 183 | domain = NA) 184 | } else 185 | { 186 | coefold <- start 187 | offset + as.vector(x %*% start) 188 | } 189 | } else family$linkfun(mustart) 190 | mu <- linkinv(eta) 191 | 192 | if (!(validmu(mu) && valideta(eta))) 193 | stop("cannot find valid starting values: please specify some", call. = FALSE) 194 | 195 | if (is.null(start)) start <- rep(0, nvars) 196 | 197 | if (!is_big_matrix) 198 | { 199 | res <- fit_glm(x, drop(y), drop(weights), drop(offset), 200 | drop(start), drop(mu), drop(eta), 201 | family$variance, family$mu.eta, family$linkinv, family$dev.resids, 202 | family$valideta, family$validmu, 203 | as.integer(method[1]), as.double(tol[1]), as.integer(maxit[1]) ) 204 | 205 | res$intercept <- any(is.int <- colMax_dense(x) == colMin_dense(x)) 206 | } else 207 | { 208 | res <- fit_big_glm(x@address, drop(y), drop(weights), drop(offset), 209 | drop(start), drop(mu), drop(eta), 210 | family$variance, family$mu.eta, family$linkinv, family$dev.resids, 211 | family$valideta, family$validmu, 212 | as.integer(method[1]), as.double(tol[1]), as.integer(maxit[1]) ) 213 | 214 | res$intercept <- any(is.int <- big.colMax(x) == big.colMin(x)) 215 | } 216 | 217 | if (!res$converged) 218 | { 219 | warning("fit_glm: algorithm did not converge", call. = FALSE) 220 | } 221 | 222 | eps <- 10*.Machine$double.eps 223 | if (family$family == "binomial") 224 | { 225 | if (any(res$fitted.values > 1 - eps) || any(res$fitted.values < eps)) 226 | warning("fit_glm: fitted probabilities numerically 0 or 1 occurred", call. = FALSE) 227 | } 228 | if (family$family == "poisson") 229 | { 230 | if (any(res$fitted.values < eps)) 231 | warning("fit_glm: fitted rates numerically 0 occurred", call. = FALSE) 232 | } 233 | 234 | if (is.null(cnames)) 235 | { 236 | ncx <- ncol(x) 237 | if (res$intercept) 238 | { 239 | which.int <- which(is.int) 240 | cnames <- paste0("X", 1:(ncx - 1) ) 241 | names(res$coefficients) <- 1:ncx 242 | names(res$coefficients)[-which.int] <- cnames 243 | names(res$coefficients)[which.int] <- "(Intercept)" 244 | } else 245 | { 246 | names(res$coefficients) <- paste0("X", 1:ncx) 247 | } 248 | } else 249 | { 250 | names(res$coefficients) <- cnames 251 | } 252 | 253 | res$family <- family 254 | res$prior.weights <- weights 255 | res$y <- y 256 | res$n <- n 257 | res 258 | } 259 | 260 | #' fast generalized linear model fitting 261 | #' 262 | #' @param x input model matrix. Must be a matrix object 263 | #' @param y numeric response vector of length nobs. 264 | #' @param family a description of the error distribution and link function to be used in the model. 265 | #' For \code{fastglm} this can be a character string naming a family function, a family function or the 266 | #' result of a call to a family function. For \code{fastglmPure} only the third option is supported. 267 | #' (See \code{\link[stats]{family}} for details of family functions.) 268 | #' @param weights an optional vector of 'prior weights' to be used in the fitting process. Should be a numeric vector. 269 | #' @param offset this can be used to specify an a priori known component to be included in the linear predictor during fitting. 270 | #' This should be a numeric vector of length equal to the number of cases 271 | #' @param start starting values for the parameters in the linear predictor. 272 | #' @param etastart starting values for the linear predictor. 273 | #' @param mustart values for the vector of means. 274 | #' @param method an integer scalar with value 0 for the column-pivoted QR decomposition, 1 for the unpivoted QR decomposition, 275 | #' 2 for the LLT Cholesky, or 3 for the LDLT Cholesky 276 | #' @param tol threshold tolerance for convergence. Should be a positive real number 277 | #' @param maxit maximum number of IRLS iterations. Should be an integer 278 | #' @return A list with the elements 279 | #' \item{coefficients}{a vector of coefficients} 280 | #' \item{se}{a vector of the standard errors of the coefficient estimates} 281 | #' \item{rank}{a scalar denoting the computed rank of the model matrix} 282 | #' \item{df.residual}{a scalar denoting the degrees of freedom in the model} 283 | #' \item{residuals}{the vector of residuals} 284 | #' \item{s}{a numeric scalar - the root mean square for residuals} 285 | #' \item{fitted.values}{the vector of fitted values} 286 | #' @export 287 | #' @examples 288 | #' 289 | #' x <- matrix(rnorm(10000 * 100), ncol = 100) 290 | #' y <- 1 * (0.25 * x[,1] - 0.25 * x[,3] > rnorm(10000)) 291 | #' 292 | #' system.time(gl1 <- glm.fit(x, y, family = binomial())) 293 | #' 294 | #' system.time(gf1 <- fastglm(x, y, family = binomial())) 295 | #' 296 | #' system.time(gf2 <- fastglm(x, y, family = binomial(), method = 1)) 297 | #' 298 | #' system.time(gf3 <- fastglm(x, y, family = binomial(), method = 2)) 299 | #' 300 | #' system.time(gf4 <- fastglm(x, y, family = binomial(), method = 3)) 301 | #' 302 | #' max(abs(coef(gl1) - gf1$coef)) 303 | #' max(abs(coef(gl1) - gf2$coef)) 304 | #' max(abs(coef(gl1) - gf3$coef)) 305 | #' max(abs(coef(gl1) - gf4$coef)) 306 | #' 307 | #' 308 | #' \dontrun{ 309 | #' nrows <- 50000 310 | #' ncols <- 50 311 | #' bkFile <- "bigmat2.bk" 312 | #' descFile <- "bigmatk2.desc" 313 | #' bigmat <- filebacked.big.matrix(nrow=nrows, ncol=ncols, type="double", 314 | #' backingfile=bkFile, backingpath=".", 315 | #' descriptorfile=descFile, 316 | #' dimnames=c(NULL,NULL)) 317 | #' for (i in 1:ncols) bigmat[,i] = rnorm(nrows)*i 318 | #' y <- 1*(rnorm(nrows) + bigmat[,1] > 0) 319 | #' 320 | #' system.time(gfb1 <- fastglm(bigmat, y, family = binomial(), method = 3)) 321 | #' } 322 | #' 323 | fastglm <- function(x, ...) UseMethod("fastglm") 324 | 325 | 326 | #' bigLm default 327 | #' 328 | #' @param ... not used 329 | #' @rdname fastglm 330 | #' @method fastglm default 331 | #' @export 332 | fastglm.default <- function(x, y, 333 | family = gaussian(), 334 | weights = NULL, 335 | offset = NULL, 336 | start = NULL, 337 | etastart = NULL, 338 | mustart = NULL, 339 | method = 0L, tol = 1e-8, maxit = 100L, 340 | ...) 341 | { 342 | ## family 343 | if(is.character(family)) 344 | { 345 | family <- get(family, mode = "function", envir = parent.frame()) 346 | } 347 | if(is.function(family)) family <- family() 348 | if(is.null(family$family)) 349 | { 350 | print(family) 351 | stop("'family' not recognized") 352 | } 353 | 354 | #y <- as.numeric(y) 355 | 356 | ## avoid problems with 1D arrays, but keep names 357 | if(length(dim(y)) == 1L) 358 | { 359 | nm <- rownames(y) 360 | dim(y) <- NULL 361 | if(!is.null(nm)) names(y) <- nm 362 | } 363 | 364 | nobs <- NROW(y) 365 | 366 | aic <- family$aic 367 | 368 | if (is.null(weights)) weights <- rep(1, nobs) 369 | if (is.null(offset)) offset <- rep(0, nobs) 370 | 371 | res <- fastglmPure(x, y, family, weights, offset, 372 | start, etastart, mustart, 373 | method, tol, maxit) 374 | y <- res$y 375 | 376 | res$residuals <- (y - res$fitted.values) / family$mu.eta(res$linear.predictors) 377 | #res$y <- y 378 | 379 | # from summary.glm() 380 | dispersion <- 381 | if(family$family %in% c("poisson", "binomial")) 1 382 | else if(res$df.residual > 0) 383 | { 384 | est.disp <- TRUE 385 | if(any(weights == 0)) 386 | warning("observations with zero weight not used for calculating dispersion") 387 | sum((res$weights*res$residuals ^ 2)[weights > 0]) / res$df.residual 388 | } else 389 | { 390 | est.disp <- TRUE 391 | NaN 392 | } 393 | 394 | res$dispersion <- dispersion 395 | 396 | if (!is.nan(dispersion)) res$se <- res$se * sqrt(dispersion) 397 | 398 | wtdmu <- if (res$intercept) sum(weights * y) / sum(weights) else family$linkinv(offset) 399 | nulldev <- sum(family$dev.resids(y, wtdmu, weights)) 400 | 401 | n.ok <- nobs - sum(weights == 0) 402 | nulldf <- n.ok - as.integer(res$intercept) 403 | res$df.null <- nulldf 404 | 405 | res$null.deviance <- nulldev 406 | 407 | rank <- res$rank 408 | dev <- res$deviance 409 | 410 | aic.model <- aic(y, res$n, res$fitted.values, res$prior.weights, dev) + 2 * rank 411 | 412 | res$aic <- aic.model 413 | 414 | # will change later 415 | boundary <- FALSE 416 | 417 | if (boundary) 418 | { 419 | warning("fit_glm: algorithm stopped at boundary value", call. = FALSE) 420 | } 421 | 422 | 423 | res$call <- match.call() 424 | 425 | class(res) <- "fastglm" 426 | res 427 | } 428 | -------------------------------------------------------------------------------- /R/glm_methods.R: -------------------------------------------------------------------------------- 1 | 2 | #' print method for fastglm objects 3 | #' 4 | #' @param x object to print 5 | #' @param ... not used 6 | #' @rdname print 7 | #' @method print fastglm 8 | #' @export 9 | print.fastglm <- function(x, ...) 10 | { 11 | cat("\nCall:\n") 12 | print(x$call) 13 | cat("\nCoefficients:\n") 14 | print(x$coefficients, digits=5) 15 | } 16 | 17 | logLik.glm <- function (object, ...) 18 | { 19 | if (!missing(...)) 20 | warning("extra arguments discarded") 21 | fam <- family(object)$family 22 | p <- object$rank 23 | if (fam %in% c("gaussian", "Gamma", "inverse.gaussian")) 24 | p <- p + 1 25 | val <- p - object$aic/2 26 | attr(val, "nobs") <- sum(!is.na(object$residuals)) 27 | attr(val, "df") <- p 28 | class(val) <- "logLik" 29 | val 30 | } 31 | 32 | deviance.glm <- function (object, ...) 33 | { 34 | object$deviance 35 | } 36 | 37 | family.glm <- function (object, ...) 38 | { 39 | object$family 40 | } 41 | 42 | #' summary method for fastglm fitted objects 43 | #' 44 | #' @param object fastglm fitted object 45 | #' @param dispersion the dispersion parameter for the family used. 46 | #' Either a single numerical value or \code{NULL} (the default), when it is inferred from \code{object}. 47 | #' @param ... not used 48 | #' @return a summary.fastglm object 49 | #' @rdname summary 50 | #' @method summary fastglm 51 | #' @export 52 | #' @examples 53 | #' 54 | #' x <- matrix(rnorm(10000 * 10), ncol = 10) 55 | #' y <- 1 * (0.25 * x[,1] - 0.25 * x[,3] > rnorm(10000)) 56 | #' 57 | #' fit <- fastglm(x, y, family = binomial()) 58 | #' 59 | #' summary(fit) 60 | #' 61 | #' 62 | summary.fastglm <- function(object, dispersion = NULL, ...) 63 | { 64 | p <- object$rank 65 | 66 | est.disp <- FALSE 67 | df.r <- object$df.residual 68 | 69 | if(is.null(dispersion)) 70 | { 71 | if (!(object$family$family %in% c("poisson", "binomial"))) est.disp <- TRUE 72 | dispersion <- object$dispersion 73 | } 74 | 75 | aliased <- is.na(coef(object)) # used in print method 76 | 77 | if (p > 0) 78 | { 79 | coef <- object$coefficients 80 | se <- object$se 81 | tvalue <- coef / se 82 | 83 | #coef.table <- cbind(Estimate = coef, 84 | # "Std. Error" = se, 85 | # "t value" = tval, 86 | # "Pr(>|t|)" = 2*pt(-abs(tval), df = object$df)) 87 | 88 | dn <- c("Estimate", "Std. Error") 89 | if(!est.disp) 90 | { # known dispersion 91 | pvalue <- 2 * pnorm(-abs(tvalue)) 92 | coef.table <- cbind(coef, se, tvalue, pvalue) 93 | dimnames(coef.table) <- list(names(coef), 94 | c(dn, "z value","Pr(>|z|)")) 95 | } else if(df.r > 0) 96 | { 97 | pvalue <- 2 * pt(-abs(tvalue), df.r) 98 | coef.table <- cbind(coef, se, tvalue, pvalue) 99 | dimnames(coef.table) <- list(names(coef), 100 | c(dn, "t value","Pr(>|t|)")) 101 | } else 102 | { # df.r == 0 103 | coef.table <- cbind(coef, NaN, NaN, NaN) 104 | dimnames(coef.table) <- list(names(coef), 105 | c(dn, "t value","Pr(>|t|)")) 106 | } 107 | 108 | df.f <- length(aliased) 109 | } else 110 | { 111 | coef.table <- matrix(0, 0L, 4L) 112 | dimnames(coef.table) <- 113 | list(NULL, c("Estimate", "Std. Error", "t value", "Pr(>|t|)")) 114 | covmat.unscaled <- covmat <- matrix(0, 0L, 0L) 115 | df.f <- length(aliased) 116 | } 117 | df.int <- if (object$intercept) 1L else 0L 118 | 119 | ## these need not all exist, e.g. na.action. 120 | keep <- match(c("call","terms","family","deviance", "aic", 121 | "contrasts", "df.residual","null.deviance","df.null", 122 | "iter", "na.action"), names(object), 0L) 123 | ans <- c(object[keep], 124 | list(deviance.resid = residuals(object, type = "deviance"), 125 | coefficients = coef.table, 126 | aliased = aliased, 127 | dispersion = dispersion, 128 | df = c(object$rank, df.r, df.f))) 129 | #cov.unscaled = covmat.unscaled, 130 | #cov.scaled = covmat)) 131 | 132 | ## will do this later 133 | # if(correlation && p > 0) 134 | # { 135 | # dd <- sqrt(diag(covmat.unscaled)) 136 | # ans$correlation <- 137 | # covmat.unscaled/outer(dd,dd) 138 | # ans$symbolic.cor <- symbolic.cor 139 | # } 140 | class(ans) <- "summary.glm" 141 | return(ans) 142 | } 143 | 144 | 145 | #' residuals method for fastglm fitted objects 146 | #' 147 | #' @param object fastglm fitted object 148 | #' @param type type of residual to be returned 149 | #' @param ... not used 150 | #' @return a vector of residuals 151 | #' @rdname residuals 152 | #' @method residuals fastglm 153 | #' @export 154 | residuals.fastglm <- function(object, 155 | type = c("deviance", "pearson", "working", "response", "partial"), 156 | ...) 157 | { 158 | residuals.glm(object, type, ...) 159 | } 160 | 161 | 162 | 163 | #' logLik method for fastglm fitted objects 164 | #' 165 | #' @param object fastglm fitted object 166 | #' @param ... not used 167 | #' @return Returns an object of class \code{logLik} 168 | #' @rdname logLik 169 | #' @method logLik fastglm 170 | #' @export 171 | logLik.fastglm <- function(object, ...) 172 | { 173 | logLik.glm(object, ...) 174 | } 175 | 176 | 177 | #' deviance method for fastglm fitted objects 178 | #' 179 | #' @param object fastglm fitted object 180 | #' @param ... not used 181 | #' @return The value of the deviance extracted from the object 182 | #' @rdname deviance 183 | #' @method deviance fastglm 184 | #' @export 185 | deviance.fastglm <- function(object, ...) 186 | { 187 | deviance.glm(object, ...) 188 | } 189 | 190 | #' family method for fastglm fitted objects 191 | #' 192 | #' @param object fastglm fitted object 193 | #' @param ... not used 194 | #' @return returns the family of the fitted object 195 | #' @rdname family 196 | #' @method family fastglm 197 | #' @export 198 | family.fastglm <- function(object, ...) 199 | { 200 | family.glm(object, ...) 201 | } 202 | 203 | 204 | #' Obtains predictions and optionally estimates standard errors of those predictions from a fitted generalized linear model object. 205 | #' @param object a fitted object of class inheriting from "\code{fastglm}". 206 | #' @param newdata a matrix to be used for prediction 207 | #' @param type the type of prediction required. The default is on the scale of the linear predictors; 208 | #' the alternative "\code{response}" is on the scale of the response variable. Thus for a default binomial 209 | #' model the default predictions are of log-odds (probabilities on logit scale) and \code{type = "response"} 210 | #' gives the predicted probabilities. The "\code{terms}" option returns a matrix giving the fitted values of each 211 | #' term in the model formula on the linear predictor scale. 212 | #' 213 | #' The value of this argument can be abbreviated. 214 | #' @param se.fit logical switch indicating if standard errors are required. 215 | #' @param dispersion the dispersion of the GLM fit to be assumed in computing the standard errors. 216 | #' If omitted, that returned by \code{summary} applied to the object is used. 217 | #' @param ... further arguments passed to or from other methods. 218 | #' @export 219 | predict.fastglm <- function(object, 220 | newdata = NULL, 221 | type = c("link", "response"), 222 | se.fit = FALSE, 223 | dispersion = NULL, ...) 224 | { 225 | type <- match.arg(type) 226 | 227 | eta <- predict_fastglm_lm(object, newdata, se.fit, scale = 1, ...) 228 | if (type == "response") 229 | { 230 | eta <- family(object)$linkinv(eta) 231 | } 232 | eta 233 | } 234 | 235 | 236 | predict_fastglm_lm <- function(object, newdata, se.fit = FALSE, scale = 1) 237 | { 238 | if (se.fit) 239 | { 240 | stop("confidence/prediction intervals not available yet") 241 | } 242 | dims <- dim(newdata) 243 | if (is.null(dims)) 244 | { 245 | newdata <- as.matrix(newdata) 246 | dims <- dim(newdata) 247 | } 248 | beta <- object$coefficients 249 | 250 | if (dims[2] != length(beta)) 251 | { 252 | stop("newdata provided does not match fitted model 'object'") 253 | } 254 | eta <- drop(newdata %*% beta) 255 | eta 256 | } 257 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r setup, include=FALSE} 8 | library(knitr) 9 | opts_chunk$set(message = FALSE, warning=FALSE) 10 | ``` 11 | 12 | [![version](http://www.r-pkg.org/badges/version/fastglm)](https://cran.r-project.org/package=fastglm) 13 | [![Build Status](https://travis-ci.org/jaredhuling/fastglm.svg?branch=master)](https://travis-ci.org/jaredhuling/fastglm) 14 | 15 | # Overview of 'fastglm' 16 | 17 | The 'fastglm' package is a re-write of `glm()` using `RcppEigen` designed to be computationally efficient and algorithmically stable. 18 | 19 | 20 | 21 | # Installing the 'fastglm' package 22 | 23 | 24 | Install the development version using the **devtools** package: 25 | ```{r, eval = FALSE} 26 | devtools::install_github("jaredhuling/fastglm") 27 | ``` 28 | 29 | or by cloning and building using `R CMD INSTALL` 30 | 31 | # Quick Usage Overview 32 | 33 | Load the package: 34 | ```{r, message = FALSE, warning = FALSE} 35 | library(fastglm) 36 | ``` 37 | 38 | A (not comprehensive) comparison with `glm.fit()` and `speedglm.wfit()`: 39 | 40 | ```{r gen_data, echo = TRUE, out.width= "100%", fig.width = 9, fig.height = 4.5, fig.path="vignettes/"} 41 | library(speedglm) 42 | library(microbenchmark) 43 | library(ggplot2) 44 | 45 | set.seed(123) 46 | n.obs <- 10000 47 | n.vars <- 100 48 | x <- matrix(rnorm(n.obs * n.vars, sd = 3), n.obs, n.vars) 49 | Sigma <- 0.99 ^ abs(outer(1:n.vars, 1:n.vars, FUN = "-")) 50 | x <- MASS::mvrnorm(n.obs, mu = runif(n.vars, min = -1), Sigma = Sigma) 51 | 52 | y <- 1 * ( drop(x[,1:25] %*% runif(25, min = -0.1, max = 0.10)) > rnorm(n.obs)) 53 | 54 | ct <- microbenchmark( 55 | glm.fit = {gl1 <- glm.fit(x, y, family = binomial())}, 56 | speedglm.eigen = {sg1 <- speedglm.wfit(y, x, intercept = FALSE, 57 | family = binomial())}, 58 | speedglm.chol = {sg2 <- speedglm.wfit(y, x, intercept = FALSE, 59 | family = binomial(), method = "Chol")}, 60 | speedglm.qr = {sg3 <- speedglm.wfit(y, x, intercept = FALSE, 61 | family = binomial(), method = "qr")}, 62 | fastglm.qr.cpiv = {gf1 <- fastglm(x, y, family = binomial())}, 63 | fastglm.qr = {gf2 <- fastglm(x, y, family = binomial(), method = 1)}, 64 | fastglm.LLT = {gf3 <- fastglm(x, y, family = binomial(), method = 2)}, 65 | fastglm.LDLT = {gf4 <- fastglm(x, y, family = binomial(), method = 3)}, 66 | fastglm.qr.fpiv = {gf5 <- fastglm(x, y, family = binomial(), method = 4)}, 67 | times = 25L 68 | ) 69 | 70 | autoplot(ct, log = FALSE) + stat_summary(fun.y = median, geom = 'point', size = 2) 71 | 72 | # comparison of estimates 73 | c(glm_vs_fastglm_qrcpiv = max(abs(coef(gl1) - gf1$coef)), 74 | glm_vs_fastglm_qr = max(abs(coef(gl1) - gf2$coef)), 75 | glm_vs_fastglm_qrfpiv = max(abs(coef(gl1) - gf5$coef)), 76 | glm_vs_fastglm_LLT = max(abs(coef(gl1) - gf3$coef)), 77 | glm_vs_fastglm_LDLT = max(abs(coef(gl1) - gf4$coef))) 78 | 79 | 80 | # now between glm and speedglm 81 | c(glm_vs_speedglm_eigen = max(abs(coef(gl1) - sg1$coef)), 82 | glm_vs_speedglm_Chol = max(abs(coef(gl1) - sg2$coef)), 83 | glm_vs_speedglm_qr = max(abs(coef(gl1) - sg3$coef))) 84 | 85 | ``` 86 | 87 | # Stability 88 | 89 | The `fastglm` package does not compromise computational stability for speed. In fact, for many situations where `glm()` and even `glm2()` do not converge, `fastglm()` does converge. 90 | 91 | As an example, consider the following data scenario, where the response distribution is (mildly) misspecified, but the link function is quite badly misspecified. In such scenarios, the standard IRLS algorithm tends to have convergence issues. The `glm2()` package was designed to handle such cases, however, it still can have convergence issues. The `fastglm()` package uses a similar step-halving technique as `glm2()`, but it starts at better initialized values and thus tends to have better convergence properties in practice. 92 | 93 | ```{r, fig.show='hold'} 94 | set.seed(1) 95 | x <- matrix(rnorm(10000 * 100), ncol = 100) 96 | y <- (exp(0.25 * x[,1] - 0.25 * x[,3] + 0.5 * x[,4] - 0.5 * x[,5] + rnorm(10000)) ) + 0.1 97 | 98 | 99 | system.time(gfit1 <- fastglm(cbind(1, x), y, family = Gamma(link = "sqrt"))) 100 | 101 | system.time(gfit2 <- glm(y~x, family = Gamma(link = "sqrt")) ) 102 | 103 | system.time(gfit3 <- glm2::glm2(y~x, family = Gamma(link = "sqrt")) ) 104 | 105 | system.time(gfit4 <- speedglm(y~x, family = Gamma(link = "sqrt"))) 106 | 107 | ## speedglm appears to diverge 108 | system.time(gfit5 <- speedglm(y~x, family = Gamma(link = "sqrt"), maxit = 500)) 109 | 110 | ## Note that fastglm() returns estimates with the 111 | ## largest likelihood 112 | 113 | c(fastglm = logLik(gfit1), glm = logLik(gfit2), glm2 = logLik(gfit3), 114 | speedglm = logLik(gfit4), speedglm500 = logLik(gfit5)) 115 | 116 | rbind(fastglm = coef(gfit1)[1:5], 117 | glm = coef(gfit2)[1:5], 118 | glm2 = coef(gfit3)[1:5], 119 | speedglm = coef(gfit4)[1:5], 120 | speedglm500 = coef(gfit5)[1:5]) 121 | 122 | ## check convergence of fastglm and #iterations 123 | # 1 means converged, 0 means not converged 124 | c(gfit1$converged, gfit1$iter) 125 | 126 | ## now check convergence for glm() 127 | c(gfit2$converged, gfit2$iter) 128 | 129 | ## check convergence for glm2() 130 | c(gfit3$converged, gfit3$iter) 131 | 132 | ## check convergence for speedglm() 133 | c(gfit4$convergence, gfit4$iter, gfit5$convergence, gfit5$iter) 134 | 135 | ## increasing number of IRLS iterations for glm() does not help that much 136 | system.time(gfit2 <- glm(y~x, family = Gamma(link = "sqrt"), control = list(maxit = 1000)) ) 137 | 138 | gfit2$converged 139 | gfit2$iter 140 | 141 | logLik(gfit1) 142 | logLik(gfit2) 143 | 144 | ``` 145 | 146 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | [![version](http://www.r-pkg.org/badges/version/fastglm)](https://cran.r-project.org/package=fastglm) 3 | [![Build 4 | Status](https://travis-ci.org/jaredhuling/fastglm.svg?branch=master)](https://travis-ci.org/jaredhuling/fastglm) 5 | 6 | # Overview of ‘fastglm’ 7 | 8 | The ‘fastglm’ package is a re-write of `glm()` using `RcppEigen` 9 | designed to be computationally efficient and algorithmically stable. 10 | 11 | # Installing the ‘fastglm’ package 12 | 13 | Install the development version using the **devtools** package: 14 | 15 | ``` r 16 | devtools::install_github("jaredhuling/fastglm") 17 | ``` 18 | 19 | or by cloning and building using `R CMD INSTALL` 20 | 21 | # Quick Usage Overview 22 | 23 | Load the package: 24 | 25 | ``` r 26 | library(fastglm) 27 | ``` 28 | 29 | A (not comprehensive) comparison with `glm.fit()` and `speedglm.wfit()`: 30 | 31 | ``` r 32 | library(speedglm) 33 | library(microbenchmark) 34 | library(ggplot2) 35 | 36 | set.seed(123) 37 | n.obs <- 10000 38 | n.vars <- 100 39 | x <- matrix(rnorm(n.obs * n.vars, sd = 3), n.obs, n.vars) 40 | Sigma <- 0.99 ^ abs(outer(1:n.vars, 1:n.vars, FUN = "-")) 41 | x <- MASS::mvrnorm(n.obs, mu = runif(n.vars, min = -1), Sigma = Sigma) 42 | 43 | y <- 1 * ( drop(x[,1:25] %*% runif(25, min = -0.1, max = 0.10)) > rnorm(n.obs)) 44 | 45 | ct <- microbenchmark( 46 | glm.fit = {gl1 <- glm.fit(x, y, family = binomial())}, 47 | speedglm.eigen = {sg1 <- speedglm.wfit(y, x, intercept = FALSE, 48 | family = binomial())}, 49 | speedglm.chol = {sg2 <- speedglm.wfit(y, x, intercept = FALSE, 50 | family = binomial(), method = "Chol")}, 51 | speedglm.qr = {sg3 <- speedglm.wfit(y, x, intercept = FALSE, 52 | family = binomial(), method = "qr")}, 53 | fastglm.qr.cpiv = {gf1 <- fastglm(x, y, family = binomial())}, 54 | fastglm.qr = {gf2 <- fastglm(x, y, family = binomial(), method = 1)}, 55 | fastglm.LLT = {gf3 <- fastglm(x, y, family = binomial(), method = 2)}, 56 | fastglm.LDLT = {gf4 <- fastglm(x, y, family = binomial(), method = 3)}, 57 | fastglm.qr.fpiv = {gf5 <- fastglm(x, y, family = binomial(), method = 4)}, 58 | times = 25L 59 | ) 60 | 61 | autoplot(ct, log = FALSE) + stat_summary(fun.y = median, geom = 'point', size = 2) 62 | ``` 63 | 64 | 65 | 66 | ``` r 67 | # comparison of estimates 68 | c(glm_vs_fastglm_qrcpiv = max(abs(coef(gl1) - gf1$coef)), 69 | glm_vs_fastglm_qr = max(abs(coef(gl1) - gf2$coef)), 70 | glm_vs_fastglm_qrfpiv = max(abs(coef(gl1) - gf5$coef)), 71 | glm_vs_fastglm_LLT = max(abs(coef(gl1) - gf3$coef)), 72 | glm_vs_fastglm_LDLT = max(abs(coef(gl1) - gf4$coef))) 73 | ``` 74 | 75 | ## glm_vs_fastglm_qrcpiv glm_vs_fastglm_qr glm_vs_fastglm_qrfpiv 76 | ## 2.590289e-14 2.546921e-14 2.776945e-14 77 | ## glm_vs_fastglm_LLT glm_vs_fastglm_LDLT 78 | ## 1.140078e-13 1.094264e-13 79 | 80 | ``` r 81 | # now between glm and speedglm 82 | c(glm_vs_speedglm_eigen = max(abs(coef(gl1) - sg1$coef)), 83 | glm_vs_speedglm_Chol = max(abs(coef(gl1) - sg2$coef)), 84 | glm_vs_speedglm_qr = max(abs(coef(gl1) - sg3$coef))) 85 | ``` 86 | 87 | ## glm_vs_speedglm_eigen glm_vs_speedglm_Chol glm_vs_speedglm_qr 88 | ## 1.359413e-12 1.359413e-12 1.191977e-12 89 | 90 | # Stability 91 | 92 | The `fastglm` package does not compromise computational stability for 93 | speed. In fact, for many situations where `glm()` and even `glm2()` do 94 | not converge, `fastglm()` does converge. 95 | 96 | As an example, consider the following data scenario, where the response 97 | distribution is (mildly) misspecified, but the link function is quite 98 | badly misspecified. In such scenarios, the standard IRLS algorithm tends 99 | to have convergence issues. The `glm2()` package was designed to handle 100 | such cases, however, it still can have convergence issues. The 101 | `fastglm()` package uses a similar step-halving technique as `glm2()`, 102 | but it starts at better initialized values and thus tends to have better 103 | convergence properties in practice. 104 | 105 | ``` r 106 | set.seed(1) 107 | x <- matrix(rnorm(10000 * 100), ncol = 100) 108 | y <- (exp(0.25 * x[,1] - 0.25 * x[,3] + 0.5 * x[,4] - 0.5 * x[,5] + rnorm(10000)) ) + 0.1 109 | 110 | 111 | system.time(gfit1 <- fastglm(cbind(1, x), y, family = Gamma(link = "sqrt"))) 112 | ``` 113 | 114 | ## user system elapsed 115 | ## 0.783 0.022 0.807 116 | 117 | ``` r 118 | system.time(gfit2 <- glm(y~x, family = Gamma(link = "sqrt")) ) 119 | ``` 120 | 121 | ## user system elapsed 122 | ## 3.035 0.121 3.166 123 | 124 | ``` r 125 | system.time(gfit3 <- glm2::glm2(y~x, family = Gamma(link = "sqrt")) ) 126 | ``` 127 | 128 | ## user system elapsed 129 | ## 2.117 0.084 2.205 130 | 131 | ``` r 132 | system.time(gfit4 <- speedglm(y~x, family = Gamma(link = "sqrt"))) 133 | ``` 134 | 135 | ## user system elapsed 136 | ## 1.757 0.048 1.807 137 | 138 | ``` r 139 | ## speedglm appears to diverge 140 | system.time(gfit5 <- speedglm(y~x, family = Gamma(link = "sqrt"), maxit = 500)) 141 | ``` 142 | 143 | ## user system elapsed 144 | ## 37.776 1.205 39.272 145 | 146 | ``` r 147 | ## Note that fastglm() returns estimates with the 148 | ## largest likelihood 149 | 150 | c(fastglm = logLik(gfit1), glm = logLik(gfit2), glm2 = logLik(gfit3), 151 | speedglm = logLik(gfit4), speedglm500 = logLik(gfit5)) 152 | ``` 153 | 154 | ## fastglm glm glm2 speedglm speedglm500 155 | ## -16030.81 -16704.05 -16046.66 -47722.66 -57785.72 156 | 157 | ``` r 158 | rbind(fastglm = coef(gfit1)[1:5], 159 | glm = coef(gfit2)[1:5], 160 | glm2 = coef(gfit3)[1:5], 161 | speedglm = coef(gfit4)[1:5], 162 | speedglm500 = coef(gfit5)[1:5]) 163 | ``` 164 | 165 | ## (Intercept) X1 X2 X3 X4 166 | ## fastglm 1.429256 0.1258736 5.321164e-03 -0.1293897 0.2389373 167 | ## glm 1.431168 0.1251936 -6.896739e-05 -0.1281857 0.2366473 168 | ## glm2 1.426864 0.1242616 -9.860241e-05 -0.1254873 0.2361301 169 | ## speedglm -22.182477 3.1784570 -2.970111e+00 -4.9709797 14.0549438 170 | ## speedglm500 -27.891929 -13.9080256 -9.690833e+00 2.7279219 -11.1458325 171 | 172 | ``` r 173 | ## check convergence of fastglm and #iterations 174 | # 1 means converged, 0 means not converged 175 | c(gfit1$converged, gfit1$iter) 176 | ``` 177 | 178 | ## [1] 1 17 179 | 180 | ``` r 181 | ## now check convergence for glm() 182 | c(gfit2$converged, gfit2$iter) 183 | ``` 184 | 185 | ## [1] 0 25 186 | 187 | ``` r 188 | ## check convergence for glm2() 189 | c(gfit3$converged, gfit3$iter) 190 | ``` 191 | 192 | ## [1] 1 19 193 | 194 | ``` r 195 | ## check convergence for speedglm() 196 | c(gfit4$convergence, gfit4$iter, gfit5$convergence, gfit5$iter) 197 | ``` 198 | 199 | ## [1] 0 25 0 500 200 | 201 | ``` r 202 | ## increasing number of IRLS iterations for glm() does not help that much 203 | system.time(gfit2 <- glm(y~x, family = Gamma(link = "sqrt"), control = list(maxit = 1000)) ) 204 | ``` 205 | 206 | ## user system elapsed 207 | ## 116.122 4.148 120.833 208 | 209 | ``` r 210 | gfit2$converged 211 | ``` 212 | 213 | ## [1] FALSE 214 | 215 | ``` r 216 | gfit2$iter 217 | ``` 218 | 219 | ## [1] 1000 220 | 221 | ``` r 222 | logLik(gfit1) 223 | ``` 224 | 225 | ## 'log Lik.' -16030.81 (df=102) 226 | 227 | ``` r 228 | logLik(gfit2) 229 | ``` 230 | 231 | ## 'log Lik.' -16333.99 (df=102) 232 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jaredhuling/fastglm/9a04daa4a99761fee4fc87ecdb100a530f96b161/_pkgdown.yml -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | 2 | ## First CRAN submission for 'fastglm' 3 | 4 | * Moves c++ header files to inst/install so source c++ code can be used externally 5 | 6 | ## Test environments 7 | 8 | * Debian Linux - (Debian 11.2.0-20) 11.2.0 - (2022-05-20 r82390) 9 | * local Mac OSX Mojave (R 4.1.1) 10 | 11 | ## R CMD check results 12 | 13 | 14 | ── R CMD check results ────────────────────────────────────── fastglm 0.0.1 ──── 15 | Duration: 1m 37.7s 16 | 17 | 0 errors | 0 warnings | 0 notes 18 | 19 | R CMD check succeeded 20 | 21 | -------------------------------------------------------------------------------- /docs/articles/Using-the-fastglm-Package.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Using the fastglm Package • fastglm 9 | 10 | 11 | 12 | 13 | 14 | 15 | 19 | 20 | 21 |
22 |
69 | 70 | 71 | 72 |
73 |
74 | 84 | 85 | 86 | 87 |

Structure for the fastglm vignette.

88 |
89 |

90 | Overview

91 |

The ‘fastglm’ package is intended to be a fast (and computationally stable) reimplementation of the glm() function. The source code is based in ‘C++’ using ‘RcppEigen’.

92 |
93 |
94 | 95 | 104 | 105 |
106 | 107 | 108 |
111 | 112 |
113 |

Site built with pkgdown.

114 |
115 | 116 |
117 |
118 | 119 | 120 | 121 | 122 | 123 | -------------------------------------------------------------------------------- /docs/articles/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Articles • fastglm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 44 | 45 | 46 | 47 | 48 | 49 |
50 |
51 | 97 | 98 | 99 |
100 | 101 |
102 |
103 | 106 | 107 |
108 |

All vignettes

109 |

110 | 111 | 114 |
115 |
116 |
117 | 118 |
119 | 122 | 123 |
124 |

Site built with pkgdown.

125 |
126 | 127 |
128 |
129 | 130 | 131 | 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /docs/authors.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Authors • fastglm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 44 | 45 | 46 | 47 | 48 | 49 |
50 |
51 | 97 | 98 | 99 |
100 | 101 |
102 |
103 | 106 | 107 |
    108 |
  • 109 |

    Jared Huling. Author, maintainer. 110 |

    111 |
  • 112 |
  • 113 |

    Douglas Bates. Copyright holder. 114 |

    115 |
  • 116 |
  • 117 |

    Dirk Eddelbuettel. Copyright holder. 118 |

    119 |
  • 120 |
  • 121 |

    Romain Francois. Copyright holder. 122 |

    123 |
  • 124 |
  • 125 |

    Yixuan Qiu. Copyright holder. 126 |

    127 |
  • 128 |
129 | 130 |
131 | 132 |
133 | 134 | 135 |
136 | 139 | 140 |
141 |

Site built with pkgdown.

142 |
143 | 144 |
145 |
146 | 147 | 148 | 149 | 150 | 151 | 152 | -------------------------------------------------------------------------------- /docs/docsearch.css: -------------------------------------------------------------------------------- 1 | /* Docsearch -------------------------------------------------------------- */ 2 | /* 3 | Source: https://github.com/algolia/docsearch/ 4 | License: MIT 5 | */ 6 | 7 | .algolia-autocomplete { 8 | display: block; 9 | -webkit-box-flex: 1; 10 | -ms-flex: 1; 11 | flex: 1 12 | } 13 | 14 | .algolia-autocomplete .ds-dropdown-menu { 15 | width: 100%; 16 | min-width: none; 17 | max-width: none; 18 | padding: .75rem 0; 19 | background-color: #fff; 20 | background-clip: padding-box; 21 | border: 1px solid rgba(0, 0, 0, .1); 22 | box-shadow: 0 .5rem 1rem rgba(0, 0, 0, .175); 23 | } 24 | 25 | @media (min-width:768px) { 26 | .algolia-autocomplete .ds-dropdown-menu { 27 | width: 175% 28 | } 29 | } 30 | 31 | .algolia-autocomplete .ds-dropdown-menu::before { 32 | display: none 33 | } 34 | 35 | .algolia-autocomplete .ds-dropdown-menu [class^=ds-dataset-] { 36 | padding: 0; 37 | background-color: rgb(255,255,255); 38 | border: 0; 39 | max-height: 80vh; 40 | } 41 | 42 | .algolia-autocomplete .ds-dropdown-menu .ds-suggestions { 43 | margin-top: 0 44 | } 45 | 46 | .algolia-autocomplete .algolia-docsearch-suggestion { 47 | padding: 0; 48 | overflow: visible 49 | } 50 | 51 | .algolia-autocomplete .algolia-docsearch-suggestion--category-header { 52 | padding: .125rem 1rem; 53 | margin-top: 0; 54 | font-size: 1.3em; 55 | font-weight: 500; 56 | color: #00008B; 57 | border-bottom: 0 58 | } 59 | 60 | .algolia-autocomplete .algolia-docsearch-suggestion--wrapper { 61 | float: none; 62 | padding-top: 0 63 | } 64 | 65 | .algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column { 66 | float: none; 67 | width: auto; 68 | padding: 0; 69 | text-align: left 70 | } 71 | 72 | .algolia-autocomplete .algolia-docsearch-suggestion--content { 73 | float: none; 74 | width: auto; 75 | padding: 0 76 | } 77 | 78 | .algolia-autocomplete .algolia-docsearch-suggestion--content::before { 79 | display: none 80 | } 81 | 82 | .algolia-autocomplete .ds-suggestion:not(:first-child) .algolia-docsearch-suggestion--category-header { 83 | padding-top: .75rem; 84 | margin-top: .75rem; 85 | border-top: 1px solid rgba(0, 0, 0, .1) 86 | } 87 | 88 | .algolia-autocomplete .ds-suggestion .algolia-docsearch-suggestion--subcategory-column { 89 | display: block; 90 | padding: .1rem 1rem; 91 | margin-bottom: 0.1; 92 | font-size: 1.0em; 93 | font-weight: 400 94 | /* display: none */ 95 | } 96 | 97 | .algolia-autocomplete .algolia-docsearch-suggestion--title { 98 | display: block; 99 | padding: .25rem 1rem; 100 | margin-bottom: 0; 101 | font-size: 0.9em; 102 | font-weight: 400 103 | } 104 | 105 | .algolia-autocomplete .algolia-docsearch-suggestion--text { 106 | padding: 0 1rem .5rem; 107 | margin-top: -.25rem; 108 | font-size: 0.8em; 109 | font-weight: 400; 110 | line-height: 1.25 111 | } 112 | 113 | .algolia-autocomplete .algolia-docsearch-footer { 114 | width: 110px; 115 | height: 20px; 116 | z-index: 3; 117 | margin-top: 10.66667px; 118 | float: right; 119 | font-size: 0; 120 | line-height: 0; 121 | } 122 | 123 | .algolia-autocomplete .algolia-docsearch-footer--logo { 124 | background-image: url("data:image/svg+xml;utf8,"); 125 | background-repeat: no-repeat; 126 | background-position: 50%; 127 | background-size: 100%; 128 | overflow: hidden; 129 | text-indent: -9000px; 130 | width: 100%; 131 | height: 100%; 132 | display: block; 133 | transform: translate(-8px); 134 | } 135 | 136 | .algolia-autocomplete .algolia-docsearch-suggestion--highlight { 137 | color: #FF8C00; 138 | background: rgba(232, 189, 54, 0.1) 139 | } 140 | 141 | 142 | .algolia-autocomplete .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight { 143 | box-shadow: inset 0 -2px 0 0 rgba(105, 105, 105, .5) 144 | } 145 | 146 | .algolia-autocomplete .ds-suggestion.ds-cursor .algolia-docsearch-suggestion--content { 147 | background-color: rgba(192, 192, 192, .15) 148 | } 149 | -------------------------------------------------------------------------------- /docs/docsearch.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | 3 | // register a handler to move the focus to the search bar 4 | // upon pressing shift + "/" (i.e. "?") 5 | $(document).on('keydown', function(e) { 6 | if (e.shiftKey && e.keyCode == 191) { 7 | e.preventDefault(); 8 | $("#search-input").focus(); 9 | } 10 | }); 11 | 12 | $(document).ready(function() { 13 | // do keyword highlighting 14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ 15 | var mark = function() { 16 | 17 | var referrer = document.URL ; 18 | var paramKey = "q" ; 19 | 20 | if (referrer.indexOf("?") !== -1) { 21 | var qs = referrer.substr(referrer.indexOf('?') + 1); 22 | var qs_noanchor = qs.split('#')[0]; 23 | var qsa = qs_noanchor.split('&'); 24 | var keyword = ""; 25 | 26 | for (var i = 0; i < qsa.length; i++) { 27 | var currentParam = qsa[i].split('='); 28 | 29 | if (currentParam.length !== 2) { 30 | continue; 31 | } 32 | 33 | if (currentParam[0] == paramKey) { 34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); 35 | } 36 | } 37 | 38 | if (keyword !== "") { 39 | $(".contents").unmark({ 40 | done: function() { 41 | $(".contents").mark(keyword); 42 | } 43 | }); 44 | } 45 | } 46 | }; 47 | 48 | mark(); 49 | }); 50 | }); 51 | 52 | /* Search term highlighting ------------------------------*/ 53 | 54 | function matchedWords(hit) { 55 | var words = []; 56 | 57 | var hierarchy = hit._highlightResult.hierarchy; 58 | // loop to fetch from lvl0, lvl1, etc. 59 | for (var idx in hierarchy) { 60 | words = words.concat(hierarchy[idx].matchedWords); 61 | } 62 | 63 | var content = hit._highlightResult.content; 64 | if (content) { 65 | words = words.concat(content.matchedWords); 66 | } 67 | 68 | // return unique words 69 | var words_uniq = [...new Set(words)]; 70 | return words_uniq; 71 | } 72 | 73 | function updateHitURL(hit) { 74 | 75 | var words = matchedWords(hit); 76 | var url = ""; 77 | 78 | if (hit.anchor) { 79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; 80 | } else { 81 | url = hit.url + '?q=' + escape(words.join(" ")); 82 | } 83 | 84 | return url; 85 | } 86 | -------------------------------------------------------------------------------- /docs/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/pkgdown.css: -------------------------------------------------------------------------------- 1 | /* Sticky footer */ 2 | 3 | /** 4 | * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/ 5 | * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css 6 | * 7 | * .Site -> body > .container 8 | * .Site-content -> body > .container .row 9 | * .footer -> footer 10 | * 11 | * Key idea seems to be to ensure that .container and __all its parents__ 12 | * have height set to 100% 13 | * 14 | */ 15 | 16 | html, body { 17 | height: 100%; 18 | } 19 | 20 | body > .container { 21 | display: flex; 22 | height: 100%; 23 | flex-direction: column; 24 | 25 | padding-top: 60px; 26 | } 27 | 28 | body > .container .row { 29 | flex: 1 0 auto; 30 | } 31 | 32 | footer { 33 | margin-top: 45px; 34 | padding: 35px 0 36px; 35 | border-top: 1px solid #e5e5e5; 36 | color: #666; 37 | display: flex; 38 | flex-shrink: 0; 39 | } 40 | footer p { 41 | margin-bottom: 0; 42 | } 43 | footer div { 44 | flex: 1; 45 | } 46 | footer .pkgdown { 47 | text-align: right; 48 | } 49 | footer p { 50 | margin-bottom: 0; 51 | } 52 | 53 | img.icon { 54 | float: right; 55 | } 56 | 57 | img { 58 | max-width: 100%; 59 | } 60 | 61 | /* Typographic tweaking ---------------------------------*/ 62 | 63 | .contents h1.page-header { 64 | margin-top: calc(-60px + 1em); 65 | } 66 | 67 | /* Section anchors ---------------------------------*/ 68 | 69 | a.anchor { 70 | margin-left: -30px; 71 | display:inline-block; 72 | width: 30px; 73 | height: 30px; 74 | visibility: hidden; 75 | 76 | background-image: url(./link.svg); 77 | background-repeat: no-repeat; 78 | background-size: 20px 20px; 79 | background-position: center center; 80 | } 81 | 82 | .hasAnchor:hover a.anchor { 83 | visibility: visible; 84 | } 85 | 86 | @media (max-width: 767px) { 87 | .hasAnchor:hover a.anchor { 88 | visibility: hidden; 89 | } 90 | } 91 | 92 | 93 | /* Fixes for fixed navbar --------------------------*/ 94 | 95 | .contents h1, .contents h2, .contents h3, .contents h4 { 96 | padding-top: 60px; 97 | margin-top: -40px; 98 | } 99 | 100 | /* Static header placement on mobile devices */ 101 | @media (max-width: 767px) { 102 | .navbar-fixed-top { 103 | position: absolute; 104 | } 105 | .navbar { 106 | padding: 0; 107 | } 108 | } 109 | 110 | 111 | /* Sidebar --------------------------*/ 112 | 113 | #sidebar { 114 | margin-top: 30px; 115 | } 116 | #sidebar h2 { 117 | font-size: 1.5em; 118 | margin-top: 1em; 119 | } 120 | 121 | #sidebar h2:first-child { 122 | margin-top: 0; 123 | } 124 | 125 | #sidebar .list-unstyled li { 126 | margin-bottom: 0.5em; 127 | } 128 | 129 | .orcid { 130 | height: 16px; 131 | vertical-align: middle; 132 | } 133 | 134 | /* Reference index & topics ----------------------------------------------- */ 135 | 136 | .ref-index th {font-weight: normal;} 137 | 138 | .ref-index td {vertical-align: top;} 139 | .ref-index .alias {width: 40%;} 140 | .ref-index .title {width: 60%;} 141 | 142 | .ref-index .alias {width: 40%;} 143 | .ref-index .title {width: 60%;} 144 | 145 | .ref-arguments th {text-align: right; padding-right: 10px;} 146 | .ref-arguments th, .ref-arguments td {vertical-align: top;} 147 | .ref-arguments .name {width: 20%;} 148 | .ref-arguments .desc {width: 80%;} 149 | 150 | /* Nice scrolling for wide elements --------------------------------------- */ 151 | 152 | table { 153 | display: block; 154 | overflow: auto; 155 | } 156 | 157 | /* Syntax highlighting ---------------------------------------------------- */ 158 | 159 | pre { 160 | word-wrap: normal; 161 | word-break: normal; 162 | border: 1px solid #eee; 163 | } 164 | 165 | pre, code { 166 | background-color: #f8f8f8; 167 | color: #333; 168 | } 169 | 170 | pre code { 171 | overflow: auto; 172 | word-wrap: normal; 173 | white-space: pre; 174 | } 175 | 176 | pre .img { 177 | margin: 5px 0; 178 | } 179 | 180 | pre .img img { 181 | background-color: #fff; 182 | display: block; 183 | height: auto; 184 | } 185 | 186 | code a, pre a { 187 | color: #375f84; 188 | } 189 | 190 | a.sourceLine:hover { 191 | text-decoration: none; 192 | } 193 | 194 | .fl {color: #1514b5;} 195 | .fu {color: #000000;} /* function */ 196 | .ch,.st {color: #036a07;} /* string */ 197 | .kw {color: #264D66;} /* keyword */ 198 | .co {color: #888888;} /* comment */ 199 | 200 | .message { color: black; font-weight: bolder;} 201 | .error { color: orange; font-weight: bolder;} 202 | .warning { color: #6A0366; font-weight: bolder;} 203 | 204 | /* Clipboard --------------------------*/ 205 | 206 | .hasCopyButton { 207 | position: relative; 208 | } 209 | 210 | .btn-copy-ex { 211 | position: absolute; 212 | right: 0; 213 | top: 0; 214 | visibility: hidden; 215 | } 216 | 217 | .hasCopyButton:hover button.btn-copy-ex { 218 | visibility: visible; 219 | } 220 | 221 | /* mark.js ----------------------------*/ 222 | 223 | mark { 224 | background-color: rgba(255, 255, 51, 0.5); 225 | border-bottom: 2px solid rgba(255, 153, 51, 0.3); 226 | padding: 1px; 227 | } 228 | 229 | /* vertical spacing after htmlwidgets */ 230 | .html-widget { 231 | margin-bottom: 10px; 232 | } 233 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | /* http://gregfranko.com/blog/jquery-best-practices/ */ 2 | (function($) { 3 | $(function() { 4 | 5 | $("#sidebar") 6 | .stick_in_parent({offset_top: 40}) 7 | .on('sticky_kit:bottom', function(e) { 8 | $(this).parent().css('position', 'static'); 9 | }) 10 | .on('sticky_kit:unbottom', function(e) { 11 | $(this).parent().css('position', 'relative'); 12 | }); 13 | 14 | $('body').scrollspy({ 15 | target: '#sidebar', 16 | offset: 60 17 | }); 18 | 19 | $('[data-toggle="tooltip"]').tooltip(); 20 | 21 | var cur_path = paths(location.pathname); 22 | var links = $("#navbar ul li a"); 23 | var max_length = -1; 24 | var pos = -1; 25 | for (var i = 0; i < links.length; i++) { 26 | if (links[i].getAttribute("href") === "#") 27 | continue; 28 | var path = paths(links[i].pathname); 29 | 30 | var length = prefix_length(cur_path, path); 31 | if (length > max_length) { 32 | max_length = length; 33 | pos = i; 34 | } 35 | } 36 | 37 | // Add class to parent
  • , and enclosing
  • if in dropdown 38 | if (pos >= 0) { 39 | var menu_anchor = $(links[pos]); 40 | menu_anchor.parent().addClass("active"); 41 | menu_anchor.closest("li.dropdown").addClass("active"); 42 | } 43 | }); 44 | 45 | function paths(pathname) { 46 | var pieces = pathname.split("/"); 47 | pieces.shift(); // always starts with / 48 | 49 | var end = pieces[pieces.length - 1]; 50 | if (end === "index.html" || end === "") 51 | pieces.pop(); 52 | return(pieces); 53 | } 54 | 55 | function prefix_length(needle, haystack) { 56 | if (needle.length > haystack.length) 57 | return(0); 58 | 59 | // Special case for length-0 haystack, since for loop won't run 60 | if (haystack.length === 0) { 61 | return(needle.length === 0 ? 1 : 0); 62 | } 63 | 64 | for (var i = 0; i < haystack.length; i++) { 65 | if (needle[i] != haystack[i]) 66 | return(i); 67 | } 68 | 69 | return(haystack.length); 70 | } 71 | 72 | /* Clipboard --------------------------*/ 73 | 74 | function changeTooltipMessage(element, msg) { 75 | var tooltipOriginalTitle=element.getAttribute('data-original-title'); 76 | element.setAttribute('data-original-title', msg); 77 | $(element).tooltip('show'); 78 | element.setAttribute('data-original-title', tooltipOriginalTitle); 79 | } 80 | 81 | if(Clipboard.isSupported()) { 82 | $(document).ready(function() { 83 | var copyButton = ""; 84 | 85 | $(".examples, div.sourceCode").addClass("hasCopyButton"); 86 | 87 | // Insert copy buttons: 88 | $(copyButton).prependTo(".hasCopyButton"); 89 | 90 | // Initialize tooltips: 91 | $('.btn-copy-ex').tooltip({container: 'body'}); 92 | 93 | // Initialize clipboard: 94 | var clipboardBtnCopies = new Clipboard('[data-clipboard-copy]', { 95 | text: function(trigger) { 96 | return trigger.parentNode.textContent; 97 | } 98 | }); 99 | 100 | clipboardBtnCopies.on('success', function(e) { 101 | changeTooltipMessage(e.trigger, 'Copied!'); 102 | e.clearSelection(); 103 | }); 104 | 105 | clipboardBtnCopies.on('error', function() { 106 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 107 | }); 108 | }); 109 | } 110 | })(window.jQuery || window.$) 111 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: 2.2.3.2 2 | pkgdown: 1.1.0.9000 3 | pkgdown_sha: b2798656f04ce282edea3e12734ba5588c6acd6e 4 | articles: 5 | quick-usage-guide-to-the-fastglm-package: quick-usage-guide-to-the-fastglm-package.html 6 | 7 | -------------------------------------------------------------------------------- /docs/reference/deviance.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | deviance method for fastglm fitted objects — deviance.fastglm • fastglm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 47 | 48 | 49 | 50 | 51 | 52 |
    53 |
    54 | 100 | 101 | 102 |
    103 | 104 |
    105 |
    106 | 111 | 112 |
    113 | 114 |

    deviance method for fastglm fitted objects

    115 | 116 |
    117 | 118 |
    # S3 method for fastglm
    119 | deviance(object, ...)
    120 | 121 |

    Arguments

    122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 |
    object

    fastglm fitted object

    ...

    not used

    133 | 134 |

    Value

    135 | 136 |

    The value of the deviance extracted from the object

    137 | 138 | 139 |
    140 | 149 |
    150 | 151 |
    152 | 155 | 156 |
    157 |

    Site built with pkgdown.

    158 |
    159 | 160 |
    161 |
    162 | 163 | 164 | 165 | 166 | 167 | 168 | -------------------------------------------------------------------------------- /docs/reference/family.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | family method for fastglm fitted objects — family.fastglm • fastglm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 47 | 48 | 49 | 50 | 51 | 52 |
    53 |
    54 | 100 | 101 | 102 |
    103 | 104 |
    105 |
    106 | 111 | 112 |
    113 | 114 |

    family method for fastglm fitted objects

    115 | 116 |
    117 | 118 |
    # S3 method for fastglm
    119 | family(object, ...)
    120 | 121 |

    Arguments

    122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 |
    object

    fastglm fitted object

    ...

    not used

    133 | 134 |

    Value

    135 | 136 |

    returns the family of the fitted object

    137 | 138 | 139 |
    140 | 149 |
    150 | 151 |
    152 | 155 | 156 |
    157 |

    Site built with pkgdown.

    158 |
    159 | 160 |
    161 |
    162 | 163 | 164 | 165 | 166 | 167 | 168 | -------------------------------------------------------------------------------- /docs/reference/fastglm.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | fast generalized linear model fitting — fastglm • fastglm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 48 | 49 | 50 | 51 | 52 | 53 |
    54 |
    55 | 101 | 102 | 103 |
    104 | 105 |
    106 |
    107 | 112 | 113 |
    114 | 115 |

    fast generalized linear model fitting

    116 |

    bigLm default

    117 | 118 |
    119 | 120 |
    fastglm(x, ...)
    121 | 
    122 | # S3 method for default
    123 | fastglm(x, y, family = gaussian(), weights = NULL,
    124 |   offset = NULL, start = NULL, etastart = NULL, mustart = NULL,
    125 |   method = 0L, tol = 1e-08, maxit = 100L, ...)
    126 | 127 |

    Arguments

    128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 |
    x

    input model matrix. Must be a matrix object

    ...

    not used

    y

    numeric response vector of length nobs.

    family

    a description of the error distribution and link function to be used in the model. 145 | For fastglm this can be a character string naming a family function, a family function or the 146 | result of a call to a family function. For fastglmPure only the third option is supported. 147 | (See family for details of family functions.)

    weights

    an optional vector of 'prior weights' to be used in the fitting process. Should be a numeric vector.

    offset

    this can be used to specify an a priori known component to be included in the linear predictor during fitting. 156 | This should be a numeric vector of length equal to the number of cases

    start

    starting values for the parameters in the linear predictor.

    etastart

    starting values for the linear predictor.

    mustart

    values for the vector of means.

    method

    an integer scalar with value 0 for the column-pivoted QR decomposition, 1 for the unpivoted QR decomposition, 173 | 2 for the LLT Cholesky, or 3 for the LDLT Cholesky

    tol

    threshold tolerance for convergence. Should be a positive real number

    maxit

    maximum number of IRLS iterations. Should be an integer

    184 | 185 |

    Value

    186 | 187 |

    A list with the elements

    188 |
    coefficients

    a vector of coefficients

    189 |
    se

    a vector of the standard errors of the coefficient estimates

    190 |
    rank

    a scalar denoting the computed rank of the model matrix

    191 |
    df.residual

    a scalar denoting the degrees of freedom in the model

    192 |
    residuals

    the vector of residuals

    193 |
    s

    a numeric scalar - the root mean square for residuals

    194 |
    fitted.values

    the vector of fitted values

    195 | 196 | 197 | 198 |

    Examples

    199 |
    200 | x <- matrix(rnorm(10000 * 100), ncol = 100) 201 | y <- 1 * (0.25 * x[,1] - 0.25 * x[,3] > rnorm(10000)) 202 | 203 | system.time(gl1 <- glm.fit(x, y, family = binomial()))
    #> user system elapsed 204 | #> 0.452 0.019 0.474
    205 | system.time(gf1 <- fastglm(x, y, family = binomial()))
    #> user system elapsed 206 | #> 0.210 0.006 0.223
    207 | system.time(gf2 <- fastglm(x, y, family = binomial(), method = 1))
    #> user system elapsed 208 | #> 0.250 0.005 0.265
    209 | system.time(gf3 <- fastglm(x, y, family = binomial(), method = 2))
    #> user system elapsed 210 | #> 0.065 0.009 0.074
    211 | system.time(gf4 <- fastglm(x, y, family = binomial(), method = 3))
    #> user system elapsed 212 | #> 0.065 0.008 0.073
    213 | max(abs(coef(gl1) - gf1$coef))
    #> [1] 1.054712e-15
    max(abs(coef(gl1) - gf2$coef))
    #> [1] 1.609823e-15
    max(abs(coef(gl1) - gf3$coef))
    #> [1] 1.165734e-15
    max(abs(coef(gl1) - gf4$coef))
    #> [1] 9.992007e-16
    214 | 215 |
    216 |
    217 | 228 |
    229 | 230 |
    231 | 234 | 235 |
    236 |

    Site built with pkgdown.

    237 |
    238 | 239 |
    240 |
    241 | 242 | 243 | 244 | 245 | 246 | 247 | -------------------------------------------------------------------------------- /docs/reference/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Function reference • fastglm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 44 | 45 | 46 | 47 | 48 | 49 |
    50 |
    51 | 97 | 98 | 99 |
    100 | 101 |
    102 |
    103 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 121 | 122 | 123 | 124 | 127 | 128 | 129 | 130 | 133 | 134 | 135 | 136 | 139 | 140 | 141 | 142 | 145 | 146 | 147 | 148 | 151 | 152 | 153 | 154 | 157 | 158 | 159 | 160 | 163 | 164 | 165 | 166 | 169 | 170 | 171 | 172 | 175 | 176 | 177 | 178 |
    118 |

    All functions

    119 |

    120 |
    125 |

    deviance(<fastglm>)

    126 |

    deviance method for fastglm fitted objects

    131 |

    family(<fastglm>)

    132 |

    family method for fastglm fitted objects

    137 |

    fastglm()

    138 |

    fast generalized linear model fitting

    143 |

    fastglmPure()

    144 |

    fast generalized linear model fitting

    149 |

    logLik(<fastglm>)

    150 |

    logLik method for fastglm fitted objects

    155 |

    predict(<fastglm>)

    156 |

    Obtains predictions and optionally estimates standard errors of those predictions from a fitted generalized linear model object.

    161 |

    print(<fastglm>)

    162 |

    print method for fastglm objects

    167 |

    residuals(<fastglm>)

    168 |

    residuals method for fastglm fitted objects

    173 |

    summary(<fastglm>)

    174 |

    summary method for fastglm fitted objects

    179 |
    180 | 181 | 187 |
    188 | 189 |
    190 | 193 | 194 |
    195 |

    Site built with pkgdown.

    196 |
    197 | 198 |
    199 |
    200 | 201 | 202 | 203 | 204 | 205 | 206 | -------------------------------------------------------------------------------- /docs/reference/logLik.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | logLik method for fastglm fitted objects — logLik.fastglm • fastglm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 47 | 48 | 49 | 50 | 51 | 52 |
    53 |
    54 | 100 | 101 | 102 |
    103 | 104 |
    105 |
    106 | 111 | 112 |
    113 | 114 |

    logLik method for fastglm fitted objects

    115 | 116 |
    117 | 118 |
    # S3 method for fastglm
    119 | logLik(object, ...)
    120 | 121 |

    Arguments

    122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 |
    object

    fastglm fitted object

    ...

    not used

    133 | 134 |

    Value

    135 | 136 |

    Returns an object of class logLik

    137 | 138 | 139 |
    140 | 149 |
    150 | 151 |
    152 | 155 | 156 |
    157 |

    Site built with pkgdown.

    158 |
    159 | 160 |
    161 |
    162 | 163 | 164 | 165 | 166 | 167 | 168 | -------------------------------------------------------------------------------- /docs/reference/predict.fastglm.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Obtains predictions and optionally estimates standard errors of those predictions from a fitted generalized linear model object. — predict.fastglm • fastglm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 47 | 48 | 49 | 50 | 51 | 52 |
    53 |
    54 | 100 | 101 | 102 |
    103 | 104 |
    105 |
    106 | 111 | 112 |
    113 | 114 |

    Obtains predictions and optionally estimates standard errors of those predictions from a fitted generalized linear model object.

    115 | 116 |
    117 | 118 |
    # S3 method for fastglm
    119 | predict(object, newdata = NULL, type = c("link",
    120 |   "response"), se.fit = FALSE, dispersion = NULL, ...)
    121 | 122 |

    Arguments

    123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 150 | 151 | 152 | 153 | 154 | 155 |
    object

    a fitted object of class inheriting from "fastglm".

    newdata

    a matrix to be used for prediction

    type

    the type of prediction required. The default is on the scale of the linear predictors; 136 | the alternative "response" is on the scale of the response variable. Thus for a default binomial 137 | model the default predictions are of log-odds (probabilities on logit scale) and type = "response" 138 | gives the predicted probabilities. The "terms" option returns a matrix giving the fitted values of each 139 | term in the model formula on the linear predictor scale.

    140 |

    The value of this argument can be abbreviated.

    se.fit

    logical switch indicating if standard errors are required.

    dispersion

    the dispersion of the GLM fit to be assumed in computing the standard errors. 149 | If omitted, that returned by summary applied to the object is used.

    ...

    further arguments passed to or from other methods.

    156 | 157 | 158 |
    159 | 166 |
    167 | 168 |
    169 | 172 | 173 |
    174 |

    Site built with pkgdown.

    175 |
    176 | 177 |
    178 |
    179 | 180 | 181 | 182 | 183 | 184 | 185 | -------------------------------------------------------------------------------- /docs/reference/print.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | print method for fastglm objects — print.fastglm • fastglm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 47 | 48 | 49 | 50 | 51 | 52 |
    53 |
    54 | 100 | 101 | 102 |
    103 | 104 |
    105 |
    106 | 111 | 112 |
    113 | 114 |

    print method for fastglm objects

    115 | 116 |
    117 | 118 |
    # S3 method for fastglm
    119 | print(x, ...)
    120 | 121 |

    Arguments

    122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 |
    x

    object to print

    ...

    not used

    133 | 134 | 135 |
    136 | 143 |
    144 | 145 |
    146 | 149 | 150 |
    151 |

    Site built with pkgdown.

    152 |
    153 | 154 |
    155 |
    156 | 157 | 158 | 159 | 160 | 161 | 162 | -------------------------------------------------------------------------------- /docs/reference/residuals.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | residuals method for fastglm fitted objects — residuals.fastglm • fastglm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 47 | 48 | 49 | 50 | 51 | 52 |
    53 |
    54 | 100 | 101 | 102 |
    103 | 104 |
    105 |
    106 | 111 | 112 |
    113 | 114 |

    residuals method for fastglm fitted objects

    115 | 116 |
    117 | 118 |
    # S3 method for fastglm
    119 | residuals(object, type = c("deviance", "pearson",
    120 |   "working", "response", "partial"), ...)
    121 | 122 |

    Arguments

    123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 |
    object

    fastglm fitted object

    type

    type of residual to be returned

    ...

    not used

    138 | 139 |

    Value

    140 | 141 |

    a vector of residuals

    142 | 143 | 144 |
    145 | 154 |
    155 | 156 |
    157 | 160 | 161 |
    162 |

    Site built with pkgdown.

    163 |
    164 | 165 |
    166 |
    167 | 168 | 169 | 170 | 171 | 172 | 173 | -------------------------------------------------------------------------------- /docs/reference/summary.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | summary method for fastglm fitted objects — summary.fastglm • fastglm 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 47 | 48 | 49 | 50 | 51 | 52 |
    53 |
    54 | 100 | 101 | 102 |
    103 | 104 |
    105 |
    106 | 111 | 112 |
    113 | 114 |

    summary method for fastglm fitted objects

    115 | 116 |
    117 | 118 |
    # S3 method for fastglm
    119 | summary(object, dispersion = NULL, ...)
    120 | 121 |

    Arguments

    122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 132 | 133 | 134 | 135 | 136 | 137 |
    object

    fastglm fitted object

    dispersion

    the dispersion parameter for the family used. 131 | Either a single numerical value or NULL (the default), when it is inferred from object.

    ...

    not used

    138 | 139 |

    Value

    140 | 141 |

    a summary.fastglm object

    142 | 143 | 144 |

    Examples

    145 |
    146 | x <- matrix(rnorm(10000 * 10), ncol = 10) 147 | y <- 1 * (0.25 * x[,1] - 0.25 * x[,3] > rnorm(10000)) 148 | 149 | fit <- fastglm(x, y, family = binomial()) 150 | 151 | summary(fit)
    #> 152 | #> Call: 153 | #> fastglm.default(x = x, y = y, family = binomial()) 154 | #> 155 | #> Deviance Residuals: 156 | #> Min 1Q Median 3Q Max 157 | #> -2.0129 -1.1218 -0.6275 1.1112 2.0918 158 | #> 159 | #> Coefficients: 160 | #> Estimate Std. Error z value Pr(>|z|) 161 | #> X1 0.424298 0.021406 19.821 <2e-16 *** 162 | #> X2 -0.029000 0.020471 -1.417 0.157 163 | #> X3 -0.364584 0.021316 -17.103 <2e-16 *** 164 | #> X4 0.007105 0.020891 0.340 0.734 165 | #> X5 -0.003902 0.020628 -0.189 0.850 166 | #> X6 0.026920 0.020796 1.295 0.195 167 | #> X7 -0.009822 0.020591 -0.477 0.633 168 | #> X8 0.006501 0.020869 0.311 0.755 169 | #> X9 0.000051 0.020562 0.002 0.998 170 | #> X10 0.022796 0.020788 1.097 0.273 171 | #> --- 172 | #> Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 173 | #> 174 | #> (Dispersion parameter for binomial family taken to be 1) 175 | #> 176 | #> Null deviance: 13863 on 10000 degrees of freedom 177 | #> Residual deviance: 13150 on 9990 degrees of freedom 178 | #> AIC: 13170 179 | #> 180 | #> Number of Fisher Scoring iterations: 4 181 | #>
    182 | 183 |
    184 |
    185 | 196 |
    197 | 198 |
    199 | 202 | 203 |
    204 |

    Site built with pkgdown.

    205 |
    206 | 207 |
    208 |
    209 | 210 | 211 | 212 | 213 | 214 | 215 | -------------------------------------------------------------------------------- /fastglm.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 4 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | PackageRoxygenize: rd,collate,namespace 19 | -------------------------------------------------------------------------------- /inst/include/bigmemory.h: -------------------------------------------------------------------------------- 1 | #ifndef _bigFastlm_BIGMEMORY_H 2 | #define _bigFastlm_BIGMEMORY_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | using namespace Rcpp; 14 | using namespace RcppEigen; 15 | 16 | 17 | RcppExport SEXP crossprod_big(SEXP); 18 | 19 | RcppExport SEXP colsums_big(SEXP); 20 | 21 | RcppExport SEXP colmax_big(SEXP); 22 | 23 | RcppExport SEXP colmin_big(SEXP); 24 | 25 | RcppExport SEXP prod_vec_big(SEXP, SEXP); 26 | 27 | RcppExport SEXP prod_vec_right(SEXP, SEXP); 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /inst/include/glm_base.h: -------------------------------------------------------------------------------- 1 | #ifndef GLM_BASE_H 2 | #define GLM_BASE_H 3 | 4 | #include 5 | #include 6 | 7 | using namespace Rcpp; 8 | using namespace std; 9 | 10 | using Eigen::Map; 11 | 12 | 13 | template 14 | class GlmBase 15 | { 16 | protected: 17 | 18 | const int nvars; // dimension of beta 19 | const int nobs; // number of rows 20 | 21 | VecTypeX beta; // parameters to be optimized 22 | VecTypeX beta_prev; // auxiliary parameters 23 | 24 | VecTypeX eta; 25 | VecTypeX var_mu; 26 | VecTypeX mu_eta; 27 | VecTypeX mu; 28 | VecTypeX z; 29 | VecTypeX w; 30 | MatTypeX vcov; 31 | VecTypeX se; 32 | double dev, devold, devnull; 33 | 34 | int maxit; // max iterations 35 | double tol; // tolerance for convergence 36 | bool conv; 37 | 38 | 39 | virtual bool converged() 40 | { 41 | if (std::abs(dev - devold)/(0.1 + std::abs(dev)) < tol) 42 | { 43 | return true; 44 | } else 45 | { 46 | return false; 47 | } 48 | } 49 | 50 | 51 | virtual void update_eta() 52 | { 53 | 54 | } 55 | 56 | virtual void update_var_mu() 57 | { 58 | 59 | } 60 | 61 | virtual void update_mu_eta() 62 | { 63 | 64 | } 65 | 66 | virtual void update_mu() 67 | { 68 | 69 | } 70 | 71 | virtual void update_z() 72 | { 73 | 74 | } 75 | 76 | virtual void update_w() 77 | { 78 | 79 | } 80 | 81 | virtual void step_halve() 82 | { 83 | 84 | } 85 | 86 | virtual void run_step_halving(int &iterr) 87 | { 88 | 89 | } 90 | 91 | virtual void update_dev_resids() 92 | { 93 | 94 | } 95 | 96 | virtual void update_dev_resids_dont_update_old() 97 | { 98 | 99 | } 100 | 101 | virtual void solve_wls(int iter) 102 | { 103 | 104 | } 105 | 106 | virtual void save_se() 107 | { 108 | 109 | } 110 | 111 | 112 | public: 113 | GlmBase(int n_, int p_, 114 | double tol_ = 1e-6, 115 | int maxit_ = 100) : 116 | nvars(p_), nobs(n_), 117 | beta(p_), 118 | beta_prev(p_), // allocate space but do not set values 119 | eta(n_), 120 | var_mu(n_), 121 | mu_eta(n_), 122 | mu(n_), 123 | z(n_), 124 | w(n_), 125 | vcov(p_, p_), 126 | se(p_), 127 | maxit(maxit_), 128 | tol(tol_) 129 | {} 130 | 131 | virtual ~GlmBase() {} 132 | 133 | virtual void init_parms(const Map & start_, 134 | const Map & mu_, 135 | const Map & eta_) {} 136 | 137 | void update_beta() 138 | { 139 | //VecTypeX newbeta(nvars); 140 | next_beta(beta); 141 | //beta.swap(newbeta); 142 | } 143 | 144 | int solve(int maxit) 145 | { 146 | int i; 147 | 148 | conv = false; 149 | 150 | for(i = 0; i < maxit; ++i) 151 | { 152 | 153 | update_var_mu(); 154 | 155 | update_mu_eta(); 156 | 157 | update_z(); 158 | 159 | update_w(); 160 | 161 | solve_wls(i); 162 | 163 | update_eta(); 164 | 165 | update_mu(); 166 | 167 | update_dev_resids(); 168 | 169 | run_step_halving(i); 170 | 171 | if (std::isinf(dev) && i == 0) 172 | { 173 | stop("cannot find valid starting values: please specify some"); 174 | } 175 | 176 | if(converged()) 177 | { 178 | conv = true; 179 | break; 180 | } 181 | 182 | 183 | } 184 | 185 | save_se(); 186 | 187 | return std::min(i + 1, maxit); 188 | } 189 | 190 | virtual VecTypeX get_beta() { return beta; } 191 | virtual VecTypeX get_eta() { return eta; } 192 | virtual VecTypeX get_se() { return se; } 193 | virtual VecTypeX get_mu() { return mu; } 194 | virtual VecTypeX get_weights() { return w; } 195 | virtual VecTypeX get_w() { return w.array().square(); } 196 | virtual double get_dev() { return dev; } 197 | virtual int get_rank() { return nvars; } 198 | virtual MatTypeX get_vcov() { return vcov; } 199 | virtual bool get_converged() { return conv; } 200 | 201 | }; 202 | 203 | 204 | 205 | #endif // GLM_BASE_H 206 | -------------------------------------------------------------------------------- /man/deviance.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/glm_methods.R 3 | \name{deviance.fastglm} 4 | \alias{deviance.fastglm} 5 | \title{deviance method for fastglm fitted objects} 6 | \usage{ 7 | \method{deviance}{fastglm}(object, ...) 8 | } 9 | \arguments{ 10 | \item{object}{fastglm fitted object} 11 | 12 | \item{...}{not used} 13 | } 14 | \value{ 15 | The value of the deviance extracted from the object 16 | } 17 | \description{ 18 | deviance method for fastglm fitted objects 19 | } 20 | -------------------------------------------------------------------------------- /man/extract-methods.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bigmemory_methods.R 3 | \docType{methods} 4 | \name{\%*\%,big.matrix,vector-method} 5 | \alias{\%*\%,big.matrix,vector-method} 6 | \alias{\%*\%,vector,big.matrix-method} 7 | \title{big.matrix prod} 8 | \usage{ 9 | \S4method{\%*\%}{big.matrix,vector}(x, y) 10 | 11 | \S4method{\%*\%}{vector,big.matrix}(x, y) 12 | } 13 | \arguments{ 14 | \item{x}{big.matrix} 15 | 16 | \item{y}{numeric vector} 17 | } 18 | \description{ 19 | big.matrix prod 20 | 21 | big.matrix prod 22 | } 23 | -------------------------------------------------------------------------------- /man/family.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/glm_methods.R 3 | \name{family.fastglm} 4 | \alias{family.fastglm} 5 | \title{family method for fastglm fitted objects} 6 | \usage{ 7 | \method{family}{fastglm}(object, ...) 8 | } 9 | \arguments{ 10 | \item{object}{fastglm fitted object} 11 | 12 | \item{...}{not used} 13 | } 14 | \value{ 15 | returns the family of the fitted object 16 | } 17 | \description{ 18 | family method for fastglm fitted objects 19 | } 20 | -------------------------------------------------------------------------------- /man/fastglm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fit_glm.R 3 | \name{fastglm} 4 | \alias{fastglm} 5 | \alias{fastglm.default} 6 | \title{fast generalized linear model fitting} 7 | \usage{ 8 | fastglm(x, ...) 9 | 10 | \method{fastglm}{default}( 11 | x, 12 | y, 13 | family = gaussian(), 14 | weights = NULL, 15 | offset = NULL, 16 | start = NULL, 17 | etastart = NULL, 18 | mustart = NULL, 19 | method = 0L, 20 | tol = 1e-08, 21 | maxit = 100L, 22 | ... 23 | ) 24 | } 25 | \arguments{ 26 | \item{x}{input model matrix. Must be a matrix object} 27 | 28 | \item{...}{not used} 29 | 30 | \item{y}{numeric response vector of length nobs.} 31 | 32 | \item{family}{a description of the error distribution and link function to be used in the model. 33 | For \code{fastglm} this can be a character string naming a family function, a family function or the 34 | result of a call to a family function. For \code{fastglmPure} only the third option is supported. 35 | (See \code{\link[stats]{family}} for details of family functions.)} 36 | 37 | \item{weights}{an optional vector of 'prior weights' to be used in the fitting process. Should be a numeric vector.} 38 | 39 | \item{offset}{this can be used to specify an a priori known component to be included in the linear predictor during fitting. 40 | This should be a numeric vector of length equal to the number of cases} 41 | 42 | \item{start}{starting values for the parameters in the linear predictor.} 43 | 44 | \item{etastart}{starting values for the linear predictor.} 45 | 46 | \item{mustart}{values for the vector of means.} 47 | 48 | \item{method}{an integer scalar with value 0 for the column-pivoted QR decomposition, 1 for the unpivoted QR decomposition, 49 | 2 for the LLT Cholesky, or 3 for the LDLT Cholesky} 50 | 51 | \item{tol}{threshold tolerance for convergence. Should be a positive real number} 52 | 53 | \item{maxit}{maximum number of IRLS iterations. Should be an integer} 54 | } 55 | \value{ 56 | A list with the elements 57 | \item{coefficients}{a vector of coefficients} 58 | \item{se}{a vector of the standard errors of the coefficient estimates} 59 | \item{rank}{a scalar denoting the computed rank of the model matrix} 60 | \item{df.residual}{a scalar denoting the degrees of freedom in the model} 61 | \item{residuals}{the vector of residuals} 62 | \item{s}{a numeric scalar - the root mean square for residuals} 63 | \item{fitted.values}{the vector of fitted values} 64 | } 65 | \description{ 66 | fast generalized linear model fitting 67 | 68 | bigLm default 69 | } 70 | \examples{ 71 | 72 | x <- matrix(rnorm(10000 * 100), ncol = 100) 73 | y <- 1 * (0.25 * x[,1] - 0.25 * x[,3] > rnorm(10000)) 74 | 75 | system.time(gl1 <- glm.fit(x, y, family = binomial())) 76 | 77 | system.time(gf1 <- fastglm(x, y, family = binomial())) 78 | 79 | system.time(gf2 <- fastglm(x, y, family = binomial(), method = 1)) 80 | 81 | system.time(gf3 <- fastglm(x, y, family = binomial(), method = 2)) 82 | 83 | system.time(gf4 <- fastglm(x, y, family = binomial(), method = 3)) 84 | 85 | max(abs(coef(gl1) - gf1$coef)) 86 | max(abs(coef(gl1) - gf2$coef)) 87 | max(abs(coef(gl1) - gf3$coef)) 88 | max(abs(coef(gl1) - gf4$coef)) 89 | 90 | 91 | \dontrun{ 92 | nrows <- 50000 93 | ncols <- 50 94 | bkFile <- "bigmat2.bk" 95 | descFile <- "bigmatk2.desc" 96 | bigmat <- filebacked.big.matrix(nrow=nrows, ncol=ncols, type="double", 97 | backingfile=bkFile, backingpath=".", 98 | descriptorfile=descFile, 99 | dimnames=c(NULL,NULL)) 100 | for (i in 1:ncols) bigmat[,i] = rnorm(nrows)*i 101 | y <- 1*(rnorm(nrows) + bigmat[,1] > 0) 102 | 103 | system.time(gfb1 <- fastglm(bigmat, y, family = binomial(), method = 3)) 104 | } 105 | 106 | } 107 | -------------------------------------------------------------------------------- /man/fastglmPure.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fit_glm.R 3 | \name{fastglmPure} 4 | \alias{fastglmPure} 5 | \title{fast generalized linear model fitting} 6 | \usage{ 7 | fastglmPure( 8 | x, 9 | y, 10 | family = gaussian(), 11 | weights = rep(1, NROW(y)), 12 | offset = rep(0, NROW(y)), 13 | start = NULL, 14 | etastart = NULL, 15 | mustart = NULL, 16 | method = 0L, 17 | tol = 1e-07, 18 | maxit = 100L 19 | ) 20 | } 21 | \arguments{ 22 | \item{x}{input model matrix. Must be a matrix object} 23 | 24 | \item{y}{numeric response vector of length nobs.} 25 | 26 | \item{family}{a description of the error distribution and link function to be used in the model. 27 | For \code{fastglmPure} this can only be the result of a call to a family function. 28 | (See \code{\link[stats]{family}} for details of family functions.)} 29 | 30 | \item{weights}{an optional vector of 'prior weights' to be used in the fitting process. Should be a numeric vector.} 31 | 32 | \item{offset}{this can be used to specify an a priori known component to be included in the linear predictor during fitting. 33 | This should be a numeric vector of length equal to the number of cases} 34 | 35 | \item{start}{starting values for the parameters in the linear predictor.} 36 | 37 | \item{etastart}{starting values for the linear predictor.} 38 | 39 | \item{mustart}{values for the vector of means.} 40 | 41 | \item{method}{an integer scalar with value 0 for the column-pivoted QR decomposition, 1 for the unpivoted QR decomposition, 42 | 2 for the LLT Cholesky, 3 for the LDLT Cholesky, 4 for the full pivoted QR decomposition, 5 for the Bidiagonal Divide and 43 | Conquer SVD} 44 | 45 | \item{tol}{threshold tolerance for convergence. Should be a positive real number} 46 | 47 | \item{maxit}{maximum number of IRLS iterations. Should be an integer} 48 | } 49 | \value{ 50 | A list with the elements 51 | \item{coefficients}{a vector of coefficients} 52 | \item{se}{a vector of the standard errors of the coefficient estimates} 53 | \item{rank}{a scalar denoting the computed rank of the model matrix} 54 | \item{df.residual}{a scalar denoting the degrees of freedom in the model} 55 | \item{residuals}{the vector of residuals} 56 | \item{s}{a numeric scalar - the root mean square for residuals} 57 | \item{fitted.values}{the vector of fitted values} 58 | } 59 | \description{ 60 | fast generalized linear model fitting 61 | } 62 | \examples{ 63 | 64 | set.seed(1) 65 | x <- matrix(rnorm(1000 * 25), ncol = 25) 66 | eta <- 0.1 + 0.25 * x[,1] - 0.25 * x[,3] + 0.75 * x[,5] -0.35 * x[,6] #0.25 * x[,1] - 0.25 * x[,3] 67 | y <- 1 * (eta > rnorm(1000)) 68 | 69 | yp <- rpois(1000, eta ^ 2) 70 | yg <- rgamma(1000, exp(eta) * 1.75, 1.75) 71 | 72 | # binomial 73 | system.time(gl1 <- glm.fit(x, y, family = binomial())) 74 | 75 | system.time(gf1 <- fastglmPure(x, y, family = binomial(), tol = 1e-8)) 76 | 77 | system.time(gf2 <- fastglmPure(x, y, family = binomial(), method = 1, tol = 1e-8)) 78 | 79 | system.time(gf3 <- fastglmPure(x, y, family = binomial(), method = 2, tol = 1e-8)) 80 | 81 | system.time(gf4 <- fastglmPure(x, y, family = binomial(), method = 3, tol = 1e-8)) 82 | 83 | max(abs(coef(gl1) - gf1$coef)) 84 | max(abs(coef(gl1) - gf2$coef)) 85 | max(abs(coef(gl1) - gf3$coef)) 86 | max(abs(coef(gl1) - gf4$coef)) 87 | 88 | # poisson 89 | system.time(gl1 <- glm.fit(x, yp, family = poisson(link = "log"))) 90 | 91 | system.time(gf1 <- fastglmPure(x, yp, family = poisson(link = "log"), tol = 1e-8)) 92 | 93 | system.time(gf2 <- fastglmPure(x, yp, family = poisson(link = "log"), method = 1, tol = 1e-8)) 94 | 95 | system.time(gf3 <- fastglmPure(x, yp, family = poisson(link = "log"), method = 2, tol = 1e-8)) 96 | 97 | system.time(gf4 <- fastglmPure(x, yp, family = poisson(link = "log"), method = 3, tol = 1e-8)) 98 | 99 | max(abs(coef(gl1) - gf1$coef)) 100 | max(abs(coef(gl1) - gf2$coef)) 101 | max(abs(coef(gl1) - gf3$coef)) 102 | max(abs(coef(gl1) - gf4$coef)) 103 | 104 | # gamma 105 | system.time(gl1 <- glm.fit(x, yg, family = Gamma(link = "log"))) 106 | 107 | system.time(gf1 <- fastglmPure(x, yg, family = Gamma(link = "log"), tol = 1e-8)) 108 | 109 | system.time(gf2 <- fastglmPure(x, yg, family = Gamma(link = "log"), method = 1, tol = 1e-8)) 110 | 111 | system.time(gf3 <- fastglmPure(x, yg, family = Gamma(link = "log"), method = 2, tol = 1e-8)) 112 | 113 | system.time(gf4 <- fastglmPure(x, yg, family = Gamma(link = "log"), method = 3, tol = 1e-8)) 114 | 115 | max(abs(coef(gl1) - gf1$coef)) 116 | max(abs(coef(gl1) - gf2$coef)) 117 | max(abs(coef(gl1) - gf3$coef)) 118 | max(abs(coef(gl1) - gf4$coef)) 119 | 120 | } 121 | -------------------------------------------------------------------------------- /man/logLik.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/glm_methods.R 3 | \name{logLik.fastglm} 4 | \alias{logLik.fastglm} 5 | \title{logLik method for fastglm fitted objects} 6 | \usage{ 7 | \method{logLik}{fastglm}(object, ...) 8 | } 9 | \arguments{ 10 | \item{object}{fastglm fitted object} 11 | 12 | \item{...}{not used} 13 | } 14 | \value{ 15 | Returns an object of class \code{logLik} 16 | } 17 | \description{ 18 | logLik method for fastglm fitted objects 19 | } 20 | -------------------------------------------------------------------------------- /man/predict.fastglm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/glm_methods.R 3 | \name{predict.fastglm} 4 | \alias{predict.fastglm} 5 | \title{Obtains predictions and optionally estimates standard errors of those predictions from a fitted generalized linear model object.} 6 | \usage{ 7 | \method{predict}{fastglm}( 8 | object, 9 | newdata = NULL, 10 | type = c("link", "response"), 11 | se.fit = FALSE, 12 | dispersion = NULL, 13 | ... 14 | ) 15 | } 16 | \arguments{ 17 | \item{object}{a fitted object of class inheriting from "\code{fastglm}".} 18 | 19 | \item{newdata}{a matrix to be used for prediction} 20 | 21 | \item{type}{the type of prediction required. The default is on the scale of the linear predictors; 22 | the alternative "\code{response}" is on the scale of the response variable. Thus for a default binomial 23 | model the default predictions are of log-odds (probabilities on logit scale) and \code{type = "response"} 24 | gives the predicted probabilities. The "\code{terms}" option returns a matrix giving the fitted values of each 25 | term in the model formula on the linear predictor scale. 26 | 27 | The value of this argument can be abbreviated.} 28 | 29 | \item{se.fit}{logical switch indicating if standard errors are required.} 30 | 31 | \item{dispersion}{the dispersion of the GLM fit to be assumed in computing the standard errors. 32 | If omitted, that returned by \code{summary} applied to the object is used.} 33 | 34 | \item{...}{further arguments passed to or from other methods.} 35 | } 36 | \description{ 37 | Obtains predictions and optionally estimates standard errors of those predictions from a fitted generalized linear model object. 38 | } 39 | -------------------------------------------------------------------------------- /man/print.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/glm_methods.R 3 | \name{print.fastglm} 4 | \alias{print.fastglm} 5 | \title{print method for fastglm objects} 6 | \usage{ 7 | \method{print}{fastglm}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{object to print} 11 | 12 | \item{...}{not used} 13 | } 14 | \description{ 15 | print method for fastglm objects 16 | } 17 | -------------------------------------------------------------------------------- /man/residuals.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/glm_methods.R 3 | \name{residuals.fastglm} 4 | \alias{residuals.fastglm} 5 | \title{residuals method for fastglm fitted objects} 6 | \usage{ 7 | \method{residuals}{fastglm}( 8 | object, 9 | type = c("deviance", "pearson", "working", "response", "partial"), 10 | ... 11 | ) 12 | } 13 | \arguments{ 14 | \item{object}{fastglm fitted object} 15 | 16 | \item{type}{type of residual to be returned} 17 | 18 | \item{...}{not used} 19 | } 20 | \value{ 21 | a vector of residuals 22 | } 23 | \description{ 24 | residuals method for fastglm fitted objects 25 | } 26 | -------------------------------------------------------------------------------- /man/summary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/glm_methods.R 3 | \name{summary.fastglm} 4 | \alias{summary.fastglm} 5 | \title{summary method for fastglm fitted objects} 6 | \usage{ 7 | \method{summary}{fastglm}(object, dispersion = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{object}{fastglm fitted object} 11 | 12 | \item{dispersion}{the dispersion parameter for the family used. 13 | Either a single numerical value or \code{NULL} (the default), when it is inferred from \code{object}.} 14 | 15 | \item{...}{not used} 16 | } 17 | \value{ 18 | a summary.fastglm object 19 | } 20 | \description{ 21 | summary method for fastglm fitted objects 22 | } 23 | \examples{ 24 | 25 | x <- matrix(rnorm(10000 * 10), ncol = 10) 26 | y <- 1 * (0.25 * x[,1] - 0.25 * x[,3] > rnorm(10000)) 27 | 28 | fit <- fastglm(x, y, family = binomial()) 29 | 30 | summary(fit) 31 | 32 | 33 | } 34 | -------------------------------------------------------------------------------- /paper/paper.bib: -------------------------------------------------------------------------------- 1 | 2 | @article{marschner2011glm2, 3 | title={glm2: fitting generalized linear models with convergence problems}, 4 | author={Marschner, Ian C}, 5 | journal={The R journal}, 6 | volume={3}, 7 | number={2}, 8 | year={2011} 9 | } 10 | 11 | @manual{R, 12 | title = {R: A Language and Environment for Statistical Computing}, 13 | author = {{R Core Team}}, 14 | organization = {R Foundation for Statistical Computing}, 15 | address = {Vienna, Austria}, 16 | year = {2018}, 17 | url = {https://www.R-project.org/} 18 | } 19 | 20 | 21 | @MISC{eigenweb, 22 | author = {Ga\"{e}l Guennebaud and Beno\^{i}t Jacob and others}, 23 | title = {Eigen v3}, 24 | howpublished = {http://eigen.tuxfamily.org}, 25 | year = {2010} 26 | } 27 | 28 | @article{bates2013fast, 29 | title = {Fast and Elegant Numerical Linear Algebra Using the 30 | {RcppEigen} Package}, 31 | author = {Douglas Bates and Dirk Eddelbuettel}, 32 | journal = {Journal of Statistical Software}, 33 | year = {2013}, 34 | volume = {52}, 35 | number = {5}, 36 | pages = {1--24}, 37 | url = {http://www.jstatsoft.org/v52/i05/}, 38 | } -------------------------------------------------------------------------------- /paper/paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'fastglm: An R package for fast and stable generalized linear model fitting' 3 | tags: 4 | - statistics 5 | - regression 6 | - generalized linear models 7 | - statistical computing 8 | - R language 9 | authors: 10 | - name: Jared Davis Huling 11 | orcid: 0000-0003-0670-4845 12 | affiliation: "1" # (Multiple affiliations must be quoted) 13 | affiliations: 14 | - name: Department of Statistics, The Ohio State University 15 | index: 1 16 | date: 01 March 2019 17 | bibliography: paper.bib 18 | --- 19 | 20 | # Summary 21 | 22 | The `fastglm` R package contains an efficient and stable implementation of iteratively 23 | reweighted least squares (IRLS) for general purpose fitting of generalized linear models (GLMs). The 24 | implementation of IRLS utilizes a step-halving approach as was used in the `glm2` package 25 | [@marschner2011glm2] for mitigating divergence issues due boundary violations and other issues that 26 | often arise when using non-canonical link functions in GLMs. The `fastglm` package combines this with 27 | parameter initialization in a manner that makes it more robust to divergence problems than 28 | the base R function `glm()` and the `glm2()` function of the `glm2` package. 29 | The package is written in C++ using the efficient Eigen numerical linear algebra library [@eigenweb] 30 | and is delivered as a package for the R language and environment for statistical computing [@R]. 31 | 32 | The `fastglm` package is designed to be used concurrently with the `family` class of objects, enabling 33 | its use for a wide variety of GLMs. Further, the API and returned objects make the `fastglm` package 34 | easily-usable by practitioners already familiar with fitting GLMs in R. The `fastglm` package includes 35 | thorough documentation and usage vignettes that allow for users to quickly and thoroughly understand 36 | how to utilize the package for their own data analyses. 37 | 38 | The `fastglm` package utilizes an IRLS algorithm where at each iteration the weighted least squares 39 | subproblem is solved via one of six different linear solvers which range along a spectrum between 40 | computational speed and stability: 1) the column-pivoted QR decomposition (stable, moderately fast) 41 | 2) the unpivoted QR decomposition (moderately stable, fast) 3) LDLT Cholesky decomposition (reasonably 42 | stable, very fast) 4) LLT Cholesky decomposition (somewhat stable, fastest) 5) the full pivoted QR 43 | decomposition (very stable, not so fast) 6) the Bidiagonal divide and conquer SVD (the stablest, 44 | the slowest). 45 | 46 | 47 | 48 | 49 | 50 | # References -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | PKG_CXXFLAGS = -DNDEBUG -I"../inst/include/" 2 | #PKG_LIBS = `$(R_HOME)/bin/Rscript -e "Rcpp:::LdFlags()"` $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 3 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 4 | 5 | CXX_STD = CXX11 -------------------------------------------------------------------------------- /src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | #include 6 | 7 | using namespace Rcpp; 8 | 9 | #ifdef RCPP_USE_GLOBAL_ROSTREAM 10 | Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); 11 | Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); 12 | #endif 13 | 14 | // colMax_dense 15 | Eigen::MatrixXd colMax_dense(const Eigen::Map& A); 16 | RcppExport SEXP _fastglm_colMax_dense(SEXP ASEXP) { 17 | BEGIN_RCPP 18 | Rcpp::RObject rcpp_result_gen; 19 | Rcpp::RNGScope rcpp_rngScope_gen; 20 | Rcpp::traits::input_parameter< const Eigen::Map& >::type A(ASEXP); 21 | rcpp_result_gen = Rcpp::wrap(colMax_dense(A)); 22 | return rcpp_result_gen; 23 | END_RCPP 24 | } 25 | // colMin_dense 26 | Eigen::MatrixXd colMin_dense(const Eigen::Map& A); 27 | RcppExport SEXP _fastglm_colMin_dense(SEXP ASEXP) { 28 | BEGIN_RCPP 29 | Rcpp::RObject rcpp_result_gen; 30 | Rcpp::RNGScope rcpp_rngScope_gen; 31 | Rcpp::traits::input_parameter< const Eigen::Map& >::type A(ASEXP); 32 | rcpp_result_gen = Rcpp::wrap(colMin_dense(A)); 33 | return rcpp_result_gen; 34 | END_RCPP 35 | } 36 | // fit_glm 37 | List fit_glm(Rcpp::NumericMatrix x, Rcpp::NumericVector y, Rcpp::NumericVector weights, Rcpp::NumericVector offset, Rcpp::NumericVector start, Rcpp::NumericVector mu, Rcpp::NumericVector eta, Function var, Function mu_eta, Function linkinv, Function dev_resids, Function valideta, Function validmu, int type, double tol, int maxit); 38 | RcppExport SEXP _fastglm_fit_glm(SEXP xSEXP, SEXP ySEXP, SEXP weightsSEXP, SEXP offsetSEXP, SEXP startSEXP, SEXP muSEXP, SEXP etaSEXP, SEXP varSEXP, SEXP mu_etaSEXP, SEXP linkinvSEXP, SEXP dev_residsSEXP, SEXP validetaSEXP, SEXP validmuSEXP, SEXP typeSEXP, SEXP tolSEXP, SEXP maxitSEXP) { 39 | BEGIN_RCPP 40 | Rcpp::RObject rcpp_result_gen; 41 | Rcpp::RNGScope rcpp_rngScope_gen; 42 | Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type x(xSEXP); 43 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type y(ySEXP); 44 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type weights(weightsSEXP); 45 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type offset(offsetSEXP); 46 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type start(startSEXP); 47 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type mu(muSEXP); 48 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type eta(etaSEXP); 49 | Rcpp::traits::input_parameter< Function >::type var(varSEXP); 50 | Rcpp::traits::input_parameter< Function >::type mu_eta(mu_etaSEXP); 51 | Rcpp::traits::input_parameter< Function >::type linkinv(linkinvSEXP); 52 | Rcpp::traits::input_parameter< Function >::type dev_resids(dev_residsSEXP); 53 | Rcpp::traits::input_parameter< Function >::type valideta(validetaSEXP); 54 | Rcpp::traits::input_parameter< Function >::type validmu(validmuSEXP); 55 | Rcpp::traits::input_parameter< int >::type type(typeSEXP); 56 | Rcpp::traits::input_parameter< double >::type tol(tolSEXP); 57 | Rcpp::traits::input_parameter< int >::type maxit(maxitSEXP); 58 | rcpp_result_gen = Rcpp::wrap(fit_glm(x, y, weights, offset, start, mu, eta, var, mu_eta, linkinv, dev_resids, valideta, validmu, type, tol, maxit)); 59 | return rcpp_result_gen; 60 | END_RCPP 61 | } 62 | // fit_big_glm 63 | List fit_big_glm(SEXP x, Rcpp::NumericVector y, Rcpp::NumericVector weights, Rcpp::NumericVector offset, Rcpp::NumericVector start, Rcpp::NumericVector mu, Rcpp::NumericVector eta, Function var, Function mu_eta, Function linkinv, Function dev_resids, Function valideta, Function validmu, int type, double tol, int maxit); 64 | RcppExport SEXP _fastglm_fit_big_glm(SEXP xSEXP, SEXP ySEXP, SEXP weightsSEXP, SEXP offsetSEXP, SEXP startSEXP, SEXP muSEXP, SEXP etaSEXP, SEXP varSEXP, SEXP mu_etaSEXP, SEXP linkinvSEXP, SEXP dev_residsSEXP, SEXP validetaSEXP, SEXP validmuSEXP, SEXP typeSEXP, SEXP tolSEXP, SEXP maxitSEXP) { 65 | BEGIN_RCPP 66 | Rcpp::RObject rcpp_result_gen; 67 | Rcpp::RNGScope rcpp_rngScope_gen; 68 | Rcpp::traits::input_parameter< SEXP >::type x(xSEXP); 69 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type y(ySEXP); 70 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type weights(weightsSEXP); 71 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type offset(offsetSEXP); 72 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type start(startSEXP); 73 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type mu(muSEXP); 74 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type eta(etaSEXP); 75 | Rcpp::traits::input_parameter< Function >::type var(varSEXP); 76 | Rcpp::traits::input_parameter< Function >::type mu_eta(mu_etaSEXP); 77 | Rcpp::traits::input_parameter< Function >::type linkinv(linkinvSEXP); 78 | Rcpp::traits::input_parameter< Function >::type dev_resids(dev_residsSEXP); 79 | Rcpp::traits::input_parameter< Function >::type valideta(validetaSEXP); 80 | Rcpp::traits::input_parameter< Function >::type validmu(validmuSEXP); 81 | Rcpp::traits::input_parameter< int >::type type(typeSEXP); 82 | Rcpp::traits::input_parameter< double >::type tol(tolSEXP); 83 | Rcpp::traits::input_parameter< int >::type maxit(maxitSEXP); 84 | rcpp_result_gen = Rcpp::wrap(fit_big_glm(x, y, weights, offset, start, mu, eta, var, mu_eta, linkinv, dev_resids, valideta, validmu, type, tol, maxit)); 85 | return rcpp_result_gen; 86 | END_RCPP 87 | } 88 | 89 | RcppExport SEXP colmax_big(SEXP); 90 | RcppExport SEXP colmin_big(SEXP); 91 | RcppExport SEXP colsums_big(SEXP); 92 | RcppExport SEXP crossprod_big(SEXP); 93 | RcppExport SEXP prod_vec_big(SEXP, SEXP); 94 | RcppExport SEXP prod_vec_big_right(SEXP, SEXP); 95 | 96 | static const R_CallMethodDef CallEntries[] = { 97 | {"_fastglm_colMax_dense", (DL_FUNC) &_fastglm_colMax_dense, 1}, 98 | {"_fastglm_colMin_dense", (DL_FUNC) &_fastglm_colMin_dense, 1}, 99 | {"_fastglm_fit_glm", (DL_FUNC) &_fastglm_fit_glm, 16}, 100 | {"_fastglm_fit_big_glm", (DL_FUNC) &_fastglm_fit_big_glm, 16}, 101 | {"colmax_big", (DL_FUNC) &colmax_big, 1}, 102 | {"colmin_big", (DL_FUNC) &colmin_big, 1}, 103 | {"colsums_big", (DL_FUNC) &colsums_big, 1}, 104 | {"crossprod_big", (DL_FUNC) &crossprod_big, 1}, 105 | {"prod_vec_big", (DL_FUNC) &prod_vec_big, 2}, 106 | {"prod_vec_big_right", (DL_FUNC) &prod_vec_big_right, 2}, 107 | {NULL, NULL, 0} 108 | }; 109 | 110 | RcppExport void R_init_fastglm(DllInfo *dll) { 111 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 112 | R_useDynamicSymbols(dll, FALSE); 113 | } 114 | -------------------------------------------------------------------------------- /src/bigmemory.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "math.h" 3 | #include "../inst/include/bigmemory.h" 4 | #include 5 | #include 6 | 7 | 8 | using Eigen::MatrixXf; 9 | using Eigen::VectorXf; 10 | using Eigen::MatrixXd; 11 | using Eigen::MatrixXi; 12 | using Eigen::VectorXd; 13 | using Eigen::VectorXi; 14 | using namespace Rcpp; 15 | using namespace RcppEigen; 16 | 17 | template 18 | Eigen::Matrix BigEigenColSums(const Eigen::Matrix& bigMat) 19 | { 20 | return bigMat.colwise().sum(); 21 | } 22 | 23 | template 24 | Eigen::Matrix BigEigenCrossprod(const Eigen::Matrix& bigMat) 25 | { 26 | typedef Eigen::Matrix MatrixXTT; 27 | int p = bigMat.cols(); 28 | 29 | MatrixXTT retmat(p, p); 30 | retmat.setZero(); 31 | 32 | return retmat.template selfadjointView().rankUpdate( bigMat.adjoint() ); 33 | } 34 | 35 | // Logic for BigColSums. 36 | template 37 | NumericVector BigColSums(XPtr pMat, MatrixAccessor mat) { 38 | 39 | // Create the vector we'll store the column sums in. 40 | NumericVector colSums(pMat->ncol()); 41 | for (size_t i=0; i < pMat->ncol(); ++i) 42 | colSums[i] = std::accumulate(mat[i], mat[i]+pMat->nrow(), 0.0); 43 | return colSums; 44 | } 45 | 46 | RcppExport SEXP crossprod_big(SEXP X_) 47 | { 48 | using namespace Rcpp; 49 | using namespace RcppEigen; 50 | try { 51 | using Eigen::Map; 52 | using Eigen::MatrixXd; 53 | using Eigen::VectorXd; 54 | 55 | typedef Eigen::Matrix MatrixXchar; 56 | typedef Eigen::Matrix MatrixXshort; 57 | 58 | XPtr bMPtr(X_); 59 | 60 | 61 | unsigned int type = bMPtr->matrix_type(); 62 | 63 | 64 | if (type == 1) 65 | { 66 | Map bM = Map((char *)bMPtr->matrix(), bMPtr->nrow(), bMPtr->ncol() ); 67 | int p = bM.cols(); 68 | MatrixXchar crossprod = MatrixXchar(p, p).setZero().selfadjointView().rankUpdate( bM.adjoint() ); 69 | return wrap(crossprod); 70 | } else if (type == 2) 71 | { 72 | Map bM = Map((short *)bMPtr->matrix(), bMPtr->nrow(), bMPtr->ncol() ); 73 | int p = bM.cols(); 74 | MatrixXshort crossprod = MatrixXshort(p, p).setZero().selfadjointView().rankUpdate( bM.adjoint() ); 75 | return wrap(crossprod); 76 | } else if (type == 4) 77 | { 78 | Map bM = Map((int *)bMPtr->matrix(), bMPtr->nrow(), bMPtr->ncol() ); 79 | int p = bM.cols(); 80 | MatrixXi crossprod = MatrixXi(p, p).setZero().selfadjointView().rankUpdate( bM.adjoint() ); 81 | return wrap(crossprod); 82 | } else if (type == 6) 83 | { 84 | Map bM = Map((float *)bMPtr->matrix(), bMPtr->nrow(), bMPtr->ncol() ); 85 | int p = bM.cols(); 86 | MatrixXf crossprod = MatrixXf(p, p).setZero().selfadjointView().rankUpdate( bM.adjoint() ); 87 | return wrap(crossprod); 88 | } else if (type == 8) 89 | { 90 | Map bM = Map((double *)bMPtr->matrix(), bMPtr->nrow(), bMPtr->ncol() ); 91 | int p = bM.cols(); 92 | MatrixXd crossprod = MatrixXd(p, p).setZero().selfadjointView().rankUpdate( bM.adjoint() ); 93 | return wrap(crossprod); 94 | } else { 95 | // We should never get here, but it resolves compiler warnings. 96 | throw Rcpp::exception("Undefined type for provided big.matrix"); 97 | } 98 | 99 | } catch (std::exception &ex) { 100 | forward_exception_to_r(ex); 101 | } catch (...) { 102 | ::Rf_error("C++ exception (unknown reason)"); 103 | } 104 | return R_NilValue; //-Wall 105 | } 106 | 107 | RcppExport SEXP colsums_big(SEXP X_) 108 | { 109 | BEGIN_RCPP 110 | using Eigen::Map; 111 | using Eigen::MatrixXd; 112 | using Eigen::VectorXd; 113 | 114 | typedef Eigen::Matrix MatrixXchar; 115 | typedef Eigen::Matrix MatrixXshort; 116 | typedef Eigen::Matrix Vectorchar; 117 | typedef Eigen::Matrix Vectorshort; 118 | 119 | XPtr xpMat(X_); 120 | 121 | 122 | unsigned int type = xpMat->matrix_type(); 123 | // The data stored in the big.matrix can either be represent by 1, 2, 124 | // 4, 6, or 8 bytes. See the "type" argument in `?big.matrix`. 125 | if (type == 1) 126 | { 127 | Map bM = Map((char *)xpMat->matrix(), xpMat->nrow(), xpMat->ncol() ); 128 | Vectorchar colSums = bM.colwise().sum(); 129 | return wrap(colSums); 130 | } else if (type == 2) 131 | { 132 | Map bM = Map((short *)xpMat->matrix(), xpMat->nrow(), xpMat->ncol() ); 133 | Vectorshort colSums = bM.colwise().sum(); 134 | return wrap(colSums); 135 | } else if (type == 4) 136 | { 137 | Map bM = Map((int *)xpMat->matrix(), xpMat->nrow(), xpMat->ncol() ); 138 | VectorXi colSums = bM.colwise().sum(); 139 | return wrap(colSums); 140 | } else if (type == 6) 141 | { 142 | Map bM = Map((float *)xpMat->matrix(), xpMat->nrow(), xpMat->ncol() ); 143 | VectorXf colSums = bM.colwise().sum(); 144 | return wrap(colSums); 145 | } else if (type == 8) 146 | { 147 | Map bM = Map((double *)xpMat->matrix(), xpMat->nrow(), xpMat->ncol() ); 148 | VectorXd colSums = bM.colwise().sum(); 149 | return wrap(colSums); 150 | } else { 151 | // We should never get here, but it resolves compiler warnings. 152 | throw Rcpp::exception("Undefined type for provided big.matrix"); 153 | } 154 | 155 | END_RCPP 156 | } 157 | 158 | 159 | 160 | RcppExport SEXP colmax_big(SEXP X_) 161 | { 162 | BEGIN_RCPP 163 | using Eigen::Map; 164 | using Eigen::MatrixXd; 165 | using Eigen::VectorXd; 166 | 167 | typedef Eigen::Matrix MatrixXchar; 168 | typedef Eigen::Matrix MatrixXshort; 169 | typedef Eigen::Matrix Vectorchar; 170 | typedef Eigen::Matrix Vectorshort; 171 | 172 | XPtr xpMat(X_); 173 | 174 | 175 | unsigned int type = xpMat->matrix_type(); 176 | // The data stored in the big.matrix can either be represent by 1, 2, 177 | // 4, 6, or 8 bytes. See the "type" argument in `?big.matrix`. 178 | if (type == 1) 179 | { 180 | Map bM = Map((char *)xpMat->matrix(), xpMat->nrow(), xpMat->ncol() ); 181 | Vectorchar colSums = bM.colwise().maxCoeff(); 182 | return wrap(colSums); 183 | } else if (type == 2) 184 | { 185 | Map bM = Map((short *)xpMat->matrix(), xpMat->nrow(), xpMat->ncol() ); 186 | Vectorshort colSums = bM.colwise().maxCoeff(); 187 | return wrap(colSums); 188 | } else if (type == 4) 189 | { 190 | Map bM = Map((int *)xpMat->matrix(), xpMat->nrow(), xpMat->ncol() ); 191 | VectorXi colSums = bM.colwise().maxCoeff(); 192 | return wrap(colSums); 193 | } else if (type == 6) 194 | { 195 | Map bM = Map((float *)xpMat->matrix(), xpMat->nrow(), xpMat->ncol() ); 196 | VectorXf colSums = bM.colwise().maxCoeff(); 197 | return wrap(colSums); 198 | } else if (type == 8) 199 | { 200 | Map bM = Map((double *)xpMat->matrix(), xpMat->nrow(), xpMat->ncol() ); 201 | VectorXd colSums = bM.colwise().maxCoeff(); 202 | return wrap(colSums); 203 | } else { 204 | // We should never get here, but it resolves compiler warnings. 205 | throw Rcpp::exception("Undefined type for provided big.matrix"); 206 | } 207 | 208 | END_RCPP 209 | } 210 | 211 | 212 | RcppExport SEXP colmin_big(SEXP X_) 213 | { 214 | BEGIN_RCPP 215 | using Eigen::Map; 216 | using Eigen::MatrixXd; 217 | using Eigen::VectorXd; 218 | 219 | typedef Eigen::Matrix MatrixXchar; 220 | typedef Eigen::Matrix MatrixXshort; 221 | typedef Eigen::Matrix Vectorchar; 222 | typedef Eigen::Matrix Vectorshort; 223 | 224 | XPtr xpMat(X_); 225 | 226 | 227 | unsigned int type = xpMat->matrix_type(); 228 | // The data stored in the big.matrix can either be represent by 1, 2, 229 | // 4, 6, or 8 bytes. See the "type" argument in `?big.matrix`. 230 | if (type == 1) 231 | { 232 | Map bM = Map((char *)xpMat->matrix(), xpMat->nrow(), xpMat->ncol() ); 233 | Vectorchar colSums = bM.colwise().minCoeff(); 234 | return wrap(colSums); 235 | } else if (type == 2) 236 | { 237 | Map bM = Map((short *)xpMat->matrix(), xpMat->nrow(), xpMat->ncol() ); 238 | Vectorshort colSums = bM.colwise().minCoeff(); 239 | return wrap(colSums); 240 | } else if (type == 4) 241 | { 242 | Map bM = Map((int *)xpMat->matrix(), xpMat->nrow(), xpMat->ncol() ); 243 | VectorXi colSums = bM.colwise().minCoeff(); 244 | return wrap(colSums); 245 | } else if (type == 6) 246 | { 247 | Map bM = Map((float *)xpMat->matrix(), xpMat->nrow(), xpMat->ncol() ); 248 | VectorXf colSums = bM.colwise().minCoeff(); 249 | return wrap(colSums); 250 | } else if (type == 8) 251 | { 252 | Map bM = Map((double *)xpMat->matrix(), xpMat->nrow(), xpMat->ncol() ); 253 | VectorXd colSums = bM.colwise().minCoeff(); 254 | return wrap(colSums); 255 | } else { 256 | // We should never get here, but it resolves compiler warnings. 257 | throw Rcpp::exception("Undefined type for provided big.matrix"); 258 | } 259 | 260 | END_RCPP 261 | } 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | RcppExport SEXP prod_vec_big(SEXP A_, SEXP B_) 271 | { 272 | BEGIN_RCPP 273 | using Eigen::Map; 274 | using Eigen::MatrixXd; 275 | using Eigen::VectorXd; 276 | using Eigen::VectorXi; 277 | using Eigen::VectorXf; 278 | 279 | typedef Eigen::Matrix MatrixXchar; 280 | typedef Eigen::Matrix MatrixXshort; 281 | typedef Eigen::Matrix Vectorchar; 282 | typedef Eigen::Matrix Vectorshort; 283 | typedef Map MapVecd; 284 | typedef Map MapVeci; 285 | typedef Map MapVecf; 286 | 287 | XPtr ApMat(A_); 288 | 289 | 290 | unsigned int Atype = ApMat->matrix_type(); 291 | 292 | 293 | // The data stored in the big.matrix can either be represent by 1, 2, 294 | // 4, 6, or 8 bytes. See the "type" argument in `?big.matrix`. 295 | if (Atype == 1) 296 | { 297 | throw Rcpp::exception("Unavailable type for provided big.matrix"); 298 | } else if (Atype == 2) 299 | { 300 | throw Rcpp::exception("Unavailable type for provided big.matrix"); 301 | } else if (Atype == 4) 302 | { 303 | Map bA = Map((int *)ApMat->matrix(), ApMat->nrow(), ApMat->ncol() ); 304 | const MapVeci B(as(B_)); 305 | 306 | if (ApMat->ncol() != B.size()) 307 | { 308 | throw Rcpp::exception("Dimensions imcompatible"); 309 | } 310 | 311 | 312 | VectorXi prod = bA * B; 313 | return wrap(prod); 314 | } else if (Atype == 6) 315 | { 316 | throw Rcpp::exception("Unavailable type for provided big.matrix"); 317 | } else if (Atype == 8) 318 | { 319 | Map bA = Map((double *)ApMat->matrix(), ApMat->nrow(), ApMat->ncol() ); 320 | const MapVecd B(as(B_)); 321 | 322 | if (ApMat->ncol() != B.size()) 323 | { 324 | throw Rcpp::exception("Dimensions imcompatible"); 325 | } 326 | 327 | VectorXd prod = bA * B; 328 | return wrap(prod); 329 | } else { 330 | // We should never get here, but it resolves compiler warnings. 331 | throw Rcpp::exception("Undefined type for provided big.matrix"); 332 | } 333 | 334 | return NULL; 335 | 336 | END_RCPP 337 | } 338 | 339 | 340 | RcppExport SEXP prod_vec_big_right(SEXP A_, SEXP B_) 341 | { 342 | BEGIN_RCPP 343 | using Eigen::Map; 344 | using Eigen::MatrixXd; 345 | using Eigen::VectorXd; 346 | using Eigen::VectorXi; 347 | using Eigen::VectorXf; 348 | 349 | typedef Eigen::Matrix MatrixXchar; 350 | typedef Eigen::Matrix MatrixXshort; 351 | typedef Eigen::Matrix Vectorchar; 352 | typedef Eigen::Matrix Vectorshort; 353 | typedef Map MapVecd; 354 | typedef Map MapVeci; 355 | typedef Map MapVecf; 356 | 357 | XPtr ApMat(B_); 358 | 359 | 360 | unsigned int Atype = ApMat->matrix_type(); 361 | 362 | 363 | // The data stored in the big.matrix can either be represent by 1, 2, 364 | // 4, 6, or 8 bytes. See the "type" argument in `?big.matrix`. 365 | if (Atype == 1) 366 | { 367 | throw Rcpp::exception("Unavailable type for provided big.matrix"); 368 | } else if (Atype == 2) 369 | { 370 | throw Rcpp::exception("Unavailable type for provided big.matrix"); 371 | } else if (Atype == 4) 372 | { 373 | Map bA = Map((int *)ApMat->matrix(), ApMat->nrow(), ApMat->ncol() ); 374 | const MapVeci A(as(A_)); 375 | 376 | if (ApMat->nrow() != A.size()) 377 | { 378 | throw Rcpp::exception("Dimensions imcompatible"); 379 | } 380 | 381 | 382 | VectorXi prod = A * bA; 383 | return wrap(prod); 384 | } else if (Atype == 6) 385 | { 386 | throw Rcpp::exception("Unavailable type for provided big.matrix"); 387 | } else if (Atype == 8) 388 | { 389 | Map bA = Map((double *)ApMat->matrix(), ApMat->nrow(), ApMat->ncol() ); 390 | const MapVecd A(as(A_)); 391 | 392 | if (ApMat->nrow() != A.size()) 393 | { 394 | throw Rcpp::exception("Dimensions imcompatible"); 395 | } 396 | 397 | VectorXd prod = A * bA; 398 | return wrap(prod); 399 | } else { 400 | // We should never get here, but it resolves compiler warnings. 401 | throw Rcpp::exception("Undefined type for provided big.matrix"); 402 | } 403 | 404 | return NULL; 405 | 406 | END_RCPP 407 | } 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | -------------------------------------------------------------------------------- /src/colmin_colmax.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | //[[Rcpp::export]] 5 | Eigen::MatrixXd colMax_dense(const Eigen::Map & A){ 6 | Eigen::VectorXd colM = A.colwise().maxCoeff(); 7 | return colM; 8 | } 9 | 10 | //[[Rcpp::export]] 11 | Eigen::MatrixXd colMin_dense(const Eigen::Map & A){ 12 | Eigen::VectorXd colM = A.colwise().minCoeff(); 13 | return colM; 14 | } 15 | 16 | 17 | -------------------------------------------------------------------------------- /src/fit_glm_dense.cpp: -------------------------------------------------------------------------------- 1 | #define EIGEN_DONT_PARALLELIZE 2 | 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include "../inst/include/glm.h" 9 | #include 10 | 11 | using namespace Rcpp; 12 | 13 | using Eigen::VectorXd; 14 | using Eigen::MatrixXd; 15 | using Eigen::Map; 16 | 17 | typedef MatrixXd::Index Index; 18 | 19 | 20 | List fastglm(Rcpp::NumericMatrix Xs, 21 | Rcpp::NumericVector ys, 22 | Rcpp::NumericVector weightss, 23 | Rcpp::NumericVector offsets, 24 | Rcpp::NumericVector starts, 25 | Rcpp::NumericVector mus, 26 | Rcpp::NumericVector etas, 27 | Function var, 28 | Function mu_eta, 29 | Function linkinv, 30 | Function dev_resids, 31 | Function valideta, 32 | Function validmu, 33 | int type, 34 | double tol, 35 | int maxit) 36 | { 37 | const Map X(as >(Xs)); 38 | const Map y(as >(ys)); 39 | const Map weights(as >(weightss)); 40 | const Map offset(as >(offsets)); 41 | const Map beta_init(as >(starts)); 42 | const Map mu_init(as >(mus)); 43 | const Map eta_init(as >(etas)); 44 | Index n = X.rows(); 45 | if ((Index)y.size() != n) throw invalid_argument("size mismatch"); 46 | 47 | // instantiate fitting class 48 | GlmBase *glm_solver = NULL; 49 | 50 | bool is_big_matrix = false; 51 | 52 | glm_solver = new glm(X, y, weights, offset, 53 | var, mu_eta, linkinv, dev_resids, 54 | valideta, validmu, tol, maxit, type, 55 | is_big_matrix); 56 | 57 | // initialize parameters 58 | glm_solver->init_parms(beta_init, mu_init, eta_init); 59 | 60 | 61 | // maximize likelihood 62 | int iters = glm_solver->solve(maxit); 63 | 64 | VectorXd beta = glm_solver->get_beta(); 65 | VectorXd se = glm_solver->get_se(); 66 | VectorXd mu = glm_solver->get_mu(); 67 | VectorXd eta = glm_solver->get_eta(); 68 | VectorXd wts = glm_solver->get_w(); 69 | VectorXd pweights = glm_solver->get_weights(); 70 | 71 | double dev = glm_solver->get_dev(); 72 | int rank = glm_solver->get_rank(); 73 | bool converged = glm_solver->get_converged(); 74 | 75 | int df = X.rows() - rank; 76 | 77 | delete glm_solver; 78 | 79 | return List::create(_["coefficients"] = beta, 80 | _["se"] = se, 81 | _["fitted.values"] = mu, 82 | _["linear.predictors"] = eta, 83 | _["deviance"] = dev, 84 | _["weights"] = wts, 85 | _["prior.weights"] = pweights, 86 | _["rank"] = rank, 87 | _["df.residual"] = df, 88 | _["iter"] = iters, 89 | _["converged"] = converged); 90 | } 91 | 92 | 93 | // [[Rcpp::export]] 94 | List fit_glm(Rcpp::NumericMatrix x, Rcpp::NumericVector y, Rcpp::NumericVector weights, Rcpp::NumericVector offset, 95 | Rcpp::NumericVector start, Rcpp::NumericVector mu, Rcpp::NumericVector eta, 96 | Function var, Function mu_eta, Function linkinv, Function dev_resids, 97 | Function valideta, Function validmu, 98 | int type, double tol, int maxit) 99 | { 100 | return fastglm(x, y, weights, offset, start, mu, eta, var, mu_eta, linkinv, dev_resids, valideta, validmu, type, tol, maxit); 101 | } 102 | 103 | 104 | 105 | 106 | 107 | List bigfastglm(XPtr Xs, 108 | Rcpp::NumericVector ys, 109 | Rcpp::NumericVector weightss, 110 | Rcpp::NumericVector offsets, 111 | Rcpp::NumericVector starts, 112 | Rcpp::NumericVector mus, 113 | Rcpp::NumericVector etas, 114 | Function var, 115 | Function mu_eta, 116 | Function linkinv, 117 | Function dev_resids, 118 | Function valideta, 119 | Function validmu, 120 | int type, 121 | double tol, 122 | int maxit) 123 | { 124 | //const Map X(as >(Xs)); 125 | //XPtr bMPtr(Xs); 126 | 127 | unsigned int typedata = Xs->matrix_type(); 128 | 129 | if (typedata != 8) 130 | { 131 | throw Rcpp::exception("type for provided big.matrix not available"); 132 | } 133 | const Map X = Map((double *)Xs->matrix(), Xs->nrow(), Xs->ncol() ); 134 | const Map y(as >(ys)); 135 | 136 | 137 | 138 | const Map weights(as >(weightss)); 139 | const Map offset(as >(offsets)); 140 | const Map beta_init(as >(starts)); 141 | const Map mu_init(as >(mus)); 142 | const Map eta_init(as >(etas)); 143 | Index n = X.rows(); 144 | if ((Index)y.size() != n) throw invalid_argument("size mismatch"); 145 | 146 | 147 | if (type != 2 && type != 3) 148 | { 149 | throw invalid_argument("type must be either 2 or 3 for big.matrix objects"); 150 | } 151 | 152 | // instantiate fitting class 153 | GlmBase *glm_solver = NULL; 154 | 155 | bool is_big_matrix = true; 156 | 157 | glm_solver = new glm(X, y, weights, offset, 158 | var, mu_eta, linkinv, dev_resids, 159 | valideta, validmu, tol, maxit, type, 160 | is_big_matrix); 161 | 162 | // initialize parameters 163 | glm_solver->init_parms(beta_init, mu_init, eta_init); 164 | 165 | 166 | // maximize likelihood 167 | int iters = glm_solver->solve(maxit); 168 | 169 | VectorXd beta = glm_solver->get_beta(); 170 | VectorXd se = glm_solver->get_se(); 171 | VectorXd mu = glm_solver->get_mu(); 172 | VectorXd eta = glm_solver->get_eta(); 173 | VectorXd wts = glm_solver->get_w(); 174 | VectorXd pweights = glm_solver->get_weights(); 175 | 176 | double dev = glm_solver->get_dev(); 177 | int rank = glm_solver->get_rank(); 178 | bool converged = glm_solver->get_converged(); 179 | 180 | int df = X.rows() - rank; 181 | 182 | delete glm_solver; 183 | 184 | return List::create(_["coefficients"] = beta, 185 | _["se"] = se, 186 | _["fitted.values"] = mu, 187 | _["linear.predictors"] = eta, 188 | _["deviance"] = dev, 189 | _["weights"] = wts, 190 | _["prior.weights"] = pweights, 191 | _["rank"] = rank, 192 | _["df.residual"] = df, 193 | _["iter"] = iters, 194 | _["converged"] = converged); 195 | } 196 | 197 | 198 | // [[Rcpp::export]] 199 | List fit_big_glm(SEXP x, Rcpp::NumericVector y, Rcpp::NumericVector weights, Rcpp::NumericVector offset, 200 | Rcpp::NumericVector start, Rcpp::NumericVector mu, Rcpp::NumericVector eta, 201 | Function var, Function mu_eta, Function linkinv, Function dev_resids, 202 | Function valideta, Function validmu, 203 | int type, double tol, int maxit) 204 | { 205 | XPtr xpMat(x); 206 | 207 | return bigfastglm(xpMat, y, weights, offset, start, mu, eta, var, mu_eta, linkinv, dev_resids, valideta, validmu, type, tol, maxit); 208 | } 209 | 210 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /vignettes/gen_data-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jaredhuling/fastglm/9a04daa4a99761fee4fc87ecdb100a530f96b161/vignettes/gen_data-1.png -------------------------------------------------------------------------------- /vignettes/quick-usage-guide-to-the-fastglm-package.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Quick Usage Guide to the 'fastglm' Package" 3 | author: "Jared Huling" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Quick Usage Guide} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | ```{r setup, include = FALSE} 13 | knitr::opts_chunk$set( 14 | collapse = TRUE, 15 | comment = "#>" 16 | ) 17 | ``` 18 | 19 | The `fastglm` package is intended to be a **fast** *and* **stable** alternative to the `glm()` and `glm2()` functions for fitting generalized lienar models. The The `fastglm` package is compatible with `R`'s `family` objects (see `?family`). The package can be installed via 20 | 21 | ```{r, eval = FALSE, echo=TRUE} 22 | devtools::install_github("jaredhuling/fastglm") 23 | ``` 24 | 25 | and loaded via: 26 | 27 | ```{r, echo=TRUE} 28 | library(fastglm) 29 | ``` 30 | 31 | 32 | ## Example 33 | 34 | Currently, the `fastglm` package does not allow for formula-based data input and is restricted to matrices. We use the following example to demonstrate the usage of `fastglm`: 35 | 36 | ```{r, echo = TRUE} 37 | data(esoph) 38 | x <- model.matrix(cbind(ncases, ncontrols) ~ agegp + unclass(tobgp) 39 | + unclass(alcgp), data = esoph) 40 | y <- cbind(esoph$ncases, esoph$ncontrols) 41 | 42 | gfit1 <- fastglm(x = x, y = y, family = binomial(link = "cloglog")) 43 | 44 | summary(gfit1) 45 | ``` 46 | 47 | 48 | ## Computational stability 49 | 50 | The `fastglm` package does not compromise computational stability for speed. In fact, for many situations where `glm()` and even `glm2()` do not converge, `fastglm()` does converge. 51 | 52 | As an example, consider the following data scenario, where the response distribution is (mildly) misspecified, but the link function is quite badly misspecified. In such scenarios, the standard IRLS algorithm tends to have convergence issues. The `glm2()` package was designed to handle such cases, however, it still can have convergence issues. The `fastglm()` package uses a similar step-halving technique as `glm2()`, but it starts at better initialized values and thus tends to have better convergence properties in practice. 53 | 54 | ```{r, fig.show='hold'} 55 | set.seed(1) 56 | x <- matrix(rnorm(10000 * 100), ncol = 100) 57 | y <- (exp(0.25 * x[,1] - 0.25 * x[,3] + 0.5 * x[,4] - 0.5 * x[,5] + rnorm(10000)) ) + 0.1 58 | 59 | 60 | system.time(gfit1 <- fastglm(cbind(1, x), y, family = Gamma(link = "sqrt"))) 61 | 62 | system.time(gfit2 <- glm(y~x, family = Gamma(link = "sqrt")) ) 63 | 64 | system.time(gfit3 <- glm2::glm2(y~x, family = Gamma(link = "sqrt")) ) 65 | 66 | ## Note that fastglm() returns estimates with the 67 | ## largest likelihood 68 | logLik(gfit1) 69 | logLik(gfit2) 70 | logLik(gfit3) 71 | 72 | coef(gfit1)[1:5] 73 | coef(gfit2)[1:5] 74 | coef(gfit3)[1:5] 75 | 76 | ## check convergence of fastglm 77 | gfit1$converged 78 | ## number of IRLS iterations 79 | gfit1$iter 80 | 81 | ## now check convergence for glm() 82 | gfit2$converged 83 | gfit2$iter 84 | 85 | ## check convergence for glm2() 86 | gfit3$converged 87 | gfit3$iter 88 | 89 | 90 | ## increasing number of IRLS iterations for glm() does not help that much 91 | system.time(gfit2 <- glm(y~x, family = Gamma(link = "sqrt"), control = list(maxit = 100)) ) 92 | 93 | gfit2$converged 94 | gfit2$iter 95 | 96 | logLik(gfit1) 97 | logLik(gfit2) 98 | 99 | ``` 100 | 101 | 102 | --------------------------------------------------------------------------------