├── .Rbuildignore ├── .github └── workflows │ ├── check-standard.yaml │ ├── pkgdown.yaml │ └── test-coverage.yaml ├── .gitignore ├── .version.json ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── R ├── G.R ├── auc.R ├── calc_null_dev.R ├── cv-grpreg.R ├── cv-grpsurv.R ├── data.R ├── expand_spline.R ├── gBridge.R ├── gen_nonlinear_data.R ├── grpreg-package.R ├── grpreg.R ├── grpsurv.R ├── logLik.R ├── loss.R ├── multi.R ├── newS.R ├── newXG.R ├── newY.R ├── orthogonalize.R ├── plot-cv.R ├── plot-grpsurv-func.R ├── plot.R ├── plot_spline.R ├── predict-cv.R ├── predict-surv.R ├── predict.R ├── predict_spline.R ├── residuals.R ├── se-grpsurv.R ├── select.R ├── setupLambda.R ├── setupLambdaCox.R ├── standardize.R └── summary-cv-grpreg.R ├── README.md ├── data ├── Birthwt.RData ├── Lung.RData └── birthwt.grpreg.RData ├── grpreg.Rproj ├── inst ├── CITATION └── tinytest │ ├── additive-models.r │ ├── agreement.r │ ├── auc.r │ ├── binomial.r │ ├── coerce.r │ ├── cv-grpsurv.r │ ├── cv.r │ ├── extra-features.r │ ├── gBridge.r │ ├── gaussian.r │ ├── grpsurv.r │ ├── multitask.r │ ├── noiseless.r │ ├── poisson.r │ ├── select.r │ ├── standardization-ortho.r │ ├── surv-torture.r │ └── torture.r ├── man ├── AUC.cv.grpsurv.Rd ├── Birthwt.Rd ├── Lung.Rd ├── birthwt.grpreg.Rd ├── cv.grpreg.Rd ├── expand_spline.Rd ├── gBridge.Rd ├── gen_nonlinear_data.Rd ├── grpreg-package.Rd ├── grpreg.Rd ├── grpsurv.Rd ├── logLik.grpreg.Rd ├── plot.cv.grpreg.Rd ├── plot.grpreg.Rd ├── plot.grpsurv.func.Rd ├── plot_spline.Rd ├── predict.grpreg.Rd ├── predict.grpsurv.Rd ├── residuals.grpreg.Rd ├── select.Rd └── summary.cv.grpreg.Rd ├── pkgdown ├── _pkgdown.yml └── extra.css ├── src ├── gdfit_cox.c ├── gdfit_gaussian.c ├── gdfit_glm.c ├── grpreg_init.c ├── lcdfit_cox.c ├── lcdfit_gaussian.c ├── lcdfit_glm.c ├── maxprod.c └── standardize.c ├── tests └── test.R └── vignettes ├── articles ├── _commands.tex ├── adaptive-rescaling.rmd ├── additive-models.rmd ├── models.rmd └── penalties.rmd ├── grpreg.rmd └── vignette.css /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^pkgdown$ 2 | ^docs$ 3 | ^index.Rmd$ 4 | ^\.github$ 5 | ^\.version\.json$ 6 | ^.*\.Rproj$ 7 | ^\.Rproj\.user$ 8 | -------------------------------------------------------------------------------- /.github/workflows/check-standard.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: [main, master] 4 | pull_request: 5 | branches: [main, master] 6 | 7 | name: R-CMD-check 8 | 9 | jobs: 10 | R-CMD-check: 11 | runs-on: ${{ matrix.config.os }} 12 | 13 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 14 | 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | config: 19 | - {os: windows-latest, r: 'release'} 20 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 21 | - {os: ubuntu-latest, r: 'release'} 22 | 23 | env: 24 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 25 | R_KEEP_PKG_SOURCE: yes 26 | R_FORCE_TEST: true 27 | 28 | steps: 29 | - uses: actions/checkout@v4 30 | 31 | - uses: r-lib/actions/setup-pandoc@v2 32 | 33 | - uses: r-lib/actions/setup-r@v2 34 | with: 35 | r-version: ${{ matrix.config.r }} 36 | http-user-agent: ${{ matrix.config.http-user-agent }} 37 | use-public-rspm: true 38 | 39 | - uses: r-lib/actions/setup-r-dependencies@v2 40 | with: 41 | extra-packages: | 42 | any::rcmdcheck 43 | any::glmnet 44 | any::ncvreg 45 | any::survival 46 | 47 | - uses: r-lib/actions/check-r-package@v2 48 | with: 49 | args: '"--no-manual"' 50 | 51 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: [main, master] 4 | pull_request: 5 | branches: [main, master] 6 | release: 7 | types: [published] 8 | workflow_dispatch: 9 | 10 | name: pkgdown 11 | 12 | jobs: 13 | pkgdown: 14 | runs-on: ubuntu-latest 15 | # Only restrict concurrency for non-PR jobs 16 | concurrency: 17 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 18 | env: 19 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 20 | permissions: 21 | contents: write 22 | steps: 23 | - uses: actions/checkout@v4 24 | 25 | - uses: r-lib/actions/setup-pandoc@v2 26 | 27 | - uses: r-lib/actions/setup-r@v2 28 | with: 29 | use-public-rspm: true 30 | 31 | - uses: r-lib/actions/setup-r-dependencies@v2 32 | with: 33 | extra-packages: | 34 | any::pkgdown 35 | local::. 36 | needs: website 37 | 38 | - name: Build site 39 | run: options(yaml.eval.expr = TRUE); pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 40 | shell: Rscript {0} 41 | 42 | - name: Deploy to GitHub pages 🚀 43 | if: github.event_name != 'pull_request' 44 | uses: JamesIves/github-pages-deploy-action@v4.5.0 45 | with: 46 | clean: false 47 | branch: gh-pages 48 | folder: docs 49 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: [main, master] 4 | pull_request: 5 | branches: [main, master] 6 | 7 | name: test-coverage 8 | 9 | permissions: read-all 10 | 11 | jobs: 12 | test-coverage: 13 | runs-on: ubuntu-latest 14 | env: 15 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 16 | R_FORCE_TEST: true 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | 21 | - uses: r-lib/actions/setup-r@v2 22 | with: 23 | use-public-rspm: true 24 | 25 | - uses: r-lib/actions/setup-r-dependencies@v2 26 | with: 27 | extra-packages: | 28 | any::covr 29 | any::glmnet 30 | any::ncvreg 31 | any::survival 32 | any::tinytest 33 | any::xml2 34 | needs: coverage 35 | 36 | - name: Test coverage 37 | run: | 38 | cov <- covr::package_coverage( 39 | quiet = FALSE, 40 | clean = FALSE, 41 | install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") 42 | ) 43 | print(cov) 44 | covr::to_cobertura(cov) 45 | shell: Rscript {0} 46 | 47 | - uses: codecov/codecov-action@v4 48 | with: 49 | # Fail if error if not on PR, or if on PR and token is given 50 | fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }} 51 | file: ./cobertura.xml 52 | plugin: noop 53 | disable_search: true 54 | token: ${{ secrets.CODECOV_TOKEN }} 55 | 56 | - name: Upload test results 57 | if: failure() 58 | uses: actions/upload-artifact@v4 59 | with: 60 | name: coverage-test-failures 61 | path: ${{ runner.temp }}/package 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | vignettes/*.html 2 | .Rproj.user 3 | .Rhistory 4 | docs 5 | 6 | *.o 7 | *.so 8 | -------------------------------------------------------------------------------- /.version.json: -------------------------------------------------------------------------------- 1 | { 2 | "schemaVersion": 1, 3 | "label": "GitHub", 4 | "message": "3.5.0.3", 5 | "color": "blue" 6 | } 7 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: grpreg 2 | Title: Regularization Paths for Regression Models with Grouped Covariates 3 | Version: 3.5.0.3 4 | Authors@R: c( 5 | person("Patrick", "Breheny", role=c("aut","cre"), email="patrick-breheny@uiowa.edu", comment=c(ORCID="0000-0002-0650-1119")), 6 | person("Yaohui", "Zeng", role="ctb"), 7 | person("Ryan", "Kurth", role="ctb")) 8 | Description: Efficient algorithms for fitting the regularization path of linear 9 | regression, GLM, and Cox regression models with grouped penalties. This 10 | includes group selection methods such as group lasso, group MCP, and 11 | group SCAD as well as bi-level selection methods such as the group 12 | exponential lasso, the composite MCP, and the group bridge. For more 13 | information, see Breheny and Huang (2009) , 14 | Huang, Breheny, and Ma (2012) , Breheny and Huang 15 | (2015) , and Breheny (2015) 16 | , or visit the package homepage 17 | . 18 | License: GPL-3 19 | URL: https://pbreheny.github.io/grpreg/, https://github.com/pbreheny/grpreg 20 | BugReports: https://github.com/pbreheny/grpreg/issues 21 | Depends: R (>= 3.1.0) 22 | Imports: Matrix 23 | Suggests: knitr, rmarkdown, splines, survival, tinytest 24 | VignetteBuilder: knitr 25 | Encoding: UTF-8 26 | Roxygen: list(markdown = TRUE) 27 | RoxygenNote: 7.3.2 28 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(AUC,cv.grpsurv) 4 | S3method(coef,cv.grpreg) 5 | S3method(coef,grpreg) 6 | S3method(logLik,grpreg) 7 | S3method(logLik,grpsurv) 8 | S3method(plot,cv.grpreg) 9 | S3method(plot,grpreg) 10 | S3method(plot,grpsurv.func) 11 | S3method(predict,cv.grpreg) 12 | S3method(predict,grpreg) 13 | S3method(predict,grpsurv) 14 | S3method(print,summary.cv.grpreg) 15 | S3method(residuals,grpreg) 16 | S3method(select,grpreg) 17 | S3method(summary,cv.grpreg) 18 | export(AUC) 19 | export(cv.grpreg) 20 | export(cv.grpsurv) 21 | export(expand_spline) 22 | export(gBridge) 23 | export(gen_nonlinear_data) 24 | export(grpreg) 25 | export(grpsurv) 26 | export(plot_spline) 27 | export(select) 28 | import(grDevices) 29 | import(graphics) 30 | import(stats) 31 | import(utils) 32 | importFrom(Matrix,bdiag) 33 | useDynLib(grpreg, .registration = TRUE) 34 | -------------------------------------------------------------------------------- /R/G.R: -------------------------------------------------------------------------------- 1 | setupG <- function(group, m, bilevel) { 2 | gf <- factor(group) 3 | if (any(levels(gf)=='0')) { 4 | g <- as.integer(gf) - 1 5 | lev <- levels(gf)[levels(gf)!='0'] 6 | } else { 7 | g <- as.integer(gf) 8 | lev <- levels(gf) 9 | } 10 | if (is.numeric(group) | is.integer(group)) { 11 | lev <- paste0("G", lev) 12 | } 13 | if (missing(m)) { 14 | m <- rep(NA, length(lev)) 15 | names(m) <- lev 16 | } else { 17 | #if (all.equal(sort(names(m)), sort(group))) 18 | TRY <- try(as.integer(group)==g) 19 | if (inherits(TRY, 'try-error') || any(!TRY)) stop('Attempting to set group.multiplier is ambiguous if group is not a factor', call.=FALSE) 20 | if (length(m) != length(lev)) stop("Length of group.multiplier must equal number of penalized groups", call.=FALSE) 21 | if (storage.mode(m) != "double") storage.mode(m) <- "double" 22 | if (any(m < 0)) stop('group.multiplier cannot be negative', call.=FALSE) 23 | } 24 | structure(g, levels=lev, m=m) 25 | } 26 | subsetG <- function(g, nz) { 27 | lev <- attr(g, 'levels') 28 | m <- attr(g, 'm') 29 | new <- g[nz] 30 | dropped <- setdiff(g, new) 31 | if (length(dropped)) { 32 | lev <- lev[-dropped] 33 | m <- m[-dropped] 34 | gf <- factor(new) 35 | new <- as.integer(gf) - 1*any(levels(gf)=='0') 36 | } 37 | structure(new, levels=lev, m=m) 38 | } 39 | reorderG <- function(g, m, bilevel) { 40 | og <- g 41 | lev <- attr(g, 'levels') 42 | m <- attr(g, 'm') 43 | if (any(g==0)) { 44 | g <- as.integer(relevel(factor(g), "0"))-1 45 | } 46 | if (any(order(g) != 1:length(g))) { 47 | reorder <- TRUE 48 | gf <- factor(g) 49 | if (any(levels(gf)=="0")) { 50 | gf <- relevel(gf, "0") 51 | g <- as.integer(gf) - 1 52 | } else { 53 | g <- as.integer(gf) 54 | } 55 | ord <- order(g) 56 | ord.inv <- match(1:length(g), ord) 57 | g <- g[ord] 58 | } else { 59 | reorder <- FALSE 60 | ord <- ord.inv <- NULL 61 | } 62 | structure(g, levels=lev, m=m, ord=ord, ord.inv=ord.inv, reorder=reorder) 63 | } 64 | -------------------------------------------------------------------------------- /R/auc.R: -------------------------------------------------------------------------------- 1 | #' Calculates AUC for cv.grpsurv objects 2 | #' 3 | #' Calculates the cross-validated AUC (concordance) from a "cv.grpsurv" object. 4 | #' 5 | #' The area under the curve (AUC), or equivalently, the concordance statistic 6 | #' (C), is calculated according to the procedure described in van Houwelingen 7 | #' and Putter (2011). The function calls `survival::concordancefit()`, except 8 | #' cross-validated linear predictors are used to guard against overfitting. 9 | #' Thus, the values returned by `AUC.cv.grpsurv()` will be lower than those you 10 | #' would obtain with `concordancefit()` if you fit the full (unpenalized) model. 11 | #' 12 | #' @aliases AUC 13 | #' 14 | #' @param obj A `cv.grpsurv` object. You must run `cv.grpsurv()` with the option `returnY=TRUE` in order for \code{AUC} to work. 15 | #' @param \dots For S3 method compatibility. 16 | #' 17 | #' @seealso [cv.grpsurv()], [survival::survConcordance()] 18 | #' 19 | #' @references van Houwelingen H, Putter H (2011). *Dynamic Prediction in Clinical Survival Analysis*. CRC Press. 20 | #' 21 | #' @examples 22 | #' \dontshow{set.seed(1)} 23 | #' data(Lung) 24 | #' X <- Lung$X 25 | #' y <- Lung$y 26 | #' group <- Lung$group 27 | #' 28 | #' cvfit <- cv.grpsurv(X, y, group, returnY=TRUE) 29 | #' head(AUC(cvfit)) 30 | #' ll <- log(cvfit$fit$lambda) 31 | #' plot(ll, AUC(cvfit), xlim=rev(range(ll)), lwd=3, type='l', 32 | #' xlab=expression(log(lambda)), ylab='AUC', las=1) 33 | #' @export 34 | 35 | AUC.cv.grpsurv <- function(obj, ...) { 36 | if (!("Y" %in% names(obj))) stop("Must run cv.grpsurv with 'returnY=TRUE' in order to calculate AUC", call.=FALSE) 37 | if (!requireNamespace("survival", quietly = TRUE)) { 38 | stop("The 'survival' package is needed for AUC() to work. Please install it.", call. = FALSE) 39 | } 40 | if (utils::packageVersion("survival") < "3.2.10") stop("AUC.cv.grpsurv requires version 3.2.10 of 'survival' package or higher", call.=FALSE) 41 | S <- survival::Surv(obj$fit$time, obj$fit$fail) 42 | apply(obj$Y, 2, concord, y = S) 43 | } 44 | 45 | #' @rdname AUC.cv.grpsurv 46 | #' @export 47 | 48 | AUC <- function(obj, ...) UseMethod("AUC") 49 | 50 | concord <- function(x, y) { 51 | survival::concordancefit(y, -x)$concordance 52 | } 53 | -------------------------------------------------------------------------------- /R/calc_null_dev.R: -------------------------------------------------------------------------------- 1 | calc_null_dev <- function(X, y, group, family) { 2 | form <- if (any(group==0)) formula(y~X[, group==0]) else formula(y~1) 3 | fit <- glm(form, family=family) 4 | mean(deviance_grpreg(y, predict(fit, type="response"), family)) 5 | } 6 | -------------------------------------------------------------------------------- /R/cv-grpsurv.R: -------------------------------------------------------------------------------- 1 | #' @rdname cv.grpreg 2 | #' @export 3 | 4 | cv.grpsurv <- function(X, y, group=1:ncol(X), ..., nfolds=10, seed, fold, se=c('quick', 'bootstrap'), returnY=FALSE, trace=FALSE) { 5 | se <- match.arg(se) 6 | 7 | # Complete data fit 8 | fit.args <- list(...) 9 | fit.args$X <- X 10 | fit.args$y <- y 11 | if (!inherits(X, "expandedMatrix")) fit.args$group <- group 12 | fit.args$returnX <- TRUE 13 | fit <- do.call("grpsurv", fit.args) 14 | 15 | # Get standardized X, y 16 | X <- fit$XG$X 17 | y <- cbind(fit$time, fit$fail) 18 | returnX <- list(...)$returnX 19 | if (is.null(returnX) || !returnX) fit$X <- NULL 20 | 21 | # Set up folds 22 | n <- nrow(X) 23 | if (!missing(seed)) { 24 | original_seed <- .GlobalEnv$.Random.seed 25 | on.exit(.GlobalEnv$.Random.seed <- original_seed) 26 | set.seed(seed) 27 | } 28 | if (missing(fold)) { 29 | ind1 <- which(fit$fail==1) 30 | ind0 <- which(fit$fail==0) 31 | n1 <- length(ind1) 32 | n0 <- length(ind0) 33 | fold1 <- 1:n1 %% nfolds 34 | fold0 <- (n1 + 1:n0) %% nfolds 35 | fold1[fold1==0] <- nfolds 36 | fold0[fold0==0] <- nfolds 37 | fold <- integer(n) 38 | fold[fit$fail==1] <- sample(fold1) 39 | fold[fit$fail==0] <- sample(fold0) 40 | } else { 41 | nfolds <- max(fold) 42 | } 43 | Y <- matrix(NA, nrow=n, ncol=length(fit$lambda)) 44 | 45 | cv.args <- list(...) 46 | cv.args$lambda <- fit$lambda 47 | cv.args$group <- fit$XG$g 48 | cv.args$group.multiplier <- fit$XG$m 49 | cv.args$warn <- FALSE 50 | 51 | for (i in 1:nfolds) { 52 | if (trace) cat("Starting CV fold #", i, sep="","\n") 53 | res <- cvf.surv(i, X, y, fold, cv.args) 54 | Y[fold==i, 1:res$nl] <- res$yhat 55 | } 56 | 57 | # Eliminate saturated lambda values, if any 58 | ind <- which(apply(is.finite(Y), 2, all)) 59 | Y <- Y[, ind] 60 | lambda <- fit$lambda[ind] 61 | 62 | # Return 63 | if (se == "quick") { 64 | L <- deviance_grpsurv(y, Y, total=FALSE) 65 | cve <- apply(L, 2, sum)/sum(fit$fail) 66 | cvse <- apply(L, 2, sd)*sqrt(nrow(L))/sum(fit$fail) 67 | } else { 68 | cve <- as.double(deviance_grpsurv(y, Y))/sum(fit$fail) 69 | cvse <- se_grpsurv(y, Y)/sum(fit$fail) 70 | } 71 | min <- which.min(cve) 72 | 73 | val <- list(cve=cve, cvse=cvse, fold=fold, lambda=lambda, fit=fit, min=min, lambda.min=lambda[min], null.dev=cve[1]) 74 | if (returnY) val$Y <- Y 75 | structure(val, class=c("cv.grpsurv", "cv.grpreg")) 76 | } 77 | cvf.surv <- function(i, XX, y, fold, cv.args) { 78 | cv.args$X <- XX[fold!=i, , drop=FALSE] 79 | cv.args$y <- y[fold!=i,] 80 | fit.i <- do.call("grpsurv", cv.args) 81 | 82 | X2 <- XX[fold==i, , drop=FALSE] 83 | y2 <- y[fold==i,] 84 | nl <- length(fit.i$lambda) 85 | yhat <- predict(fit.i, X2) 86 | 87 | list(nl=length(fit.i$lambda), yhat=yhat) 88 | } 89 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #' Risk Factors Associated with Low Infant Birth Weight 2 | #' 3 | #' This version of the data set has been deprecated and will not be supported 4 | #' in future versions. Please use \code{\link{Birthwt}} instead. 5 | #' 6 | #' @format This data frame contains the following columns: 7 | #' \itemize{ 8 | #' \item\code{low} Indicator of birth weight less than 2.5kg \item\code{bwt} 9 | #' Birth weight in kilograms \item\code{age1,age2,age3} Orthogonal polynomials 10 | #' of first, second, and third degree representing mother's age in years 11 | #' \item\code{lwt1,lwt2,lwt3} Orthogonal polynomials of first, second, and 12 | #' third degree representing mother's weight in pounds at last menstrual period 13 | #' \item\code{white,black} Indicator functions for mother's race; "other" is 14 | #' reference group \item\code{smoke} smoking status during pregnancy 15 | #' \item\code{ptl1,ptl2m} Indicator functions for one or for two or more 16 | #' previous premature labors, respectively. No previous premature labors is 17 | #' the reference category. \item\code{ht} History of hypertension 18 | #' \item\code{ui} Presence of uterine irritability \item\code{ftv1,ftv2,ftv3m} 19 | #' Indicator functions for one, for two, or for three or more physician visits 20 | #' during the first trimester, respectively. No visits is the reference 21 | #' category. 22 | #' } 23 | #' @seealso \code{\link{Birthwt}} 24 | "birthwt.grpreg" 25 | 26 | 27 | #' Risk Factors Associated with Low Infant Birth Weight 28 | #' 29 | #' The `Birthwt` data contains 189 observations, 16 predictors, and an 30 | #' outcome, birthweight, available both as a continuous measure and a binary 31 | #' indicator for low birth weight.The data were collected at Baystate Medical 32 | #' Center, Springfield, Mass during 1986. This data frame is a 33 | #' reparameterization of the `birthwt` data frame from the **MASS** package. 34 | #' 35 | #' @format The \code{Birthwt} object is a list containing four elements (`X`, `bwt`, `low`, and `group`): 36 | #' \describe{ 37 | #' \item{bwt}{Birth weight in kilograms} 38 | #' \item{low}{Indicator of birth weight less than 2.5kg} 39 | #' \item{group}{Vector describing how the columns of X are grouped} 40 | #' \item{X}{A matrix with 189 observations (rows) and 16 predictor variables (columns).} 41 | #' } 42 | #' The matrix `X` contains the following columns: 43 | #' \describe{ 44 | #' \item{age1,age2,age3}{Orthogonal polynomials of first, second, and third degree representing mother's age in years} 45 | #' \item{lwt1,lwt2,lwt3}{Orthogonal polynomials of first, second, and third degree representing mother's weight in pounds at last menstrual period} 46 | #' \item{white,black}{Indicator functions for mother's race; "other" is reference group} 47 | #' \item{smoke}{Smoking status during pregnancy} 48 | #' \item{ptl1,ptl2m}{Indicator functions for one or for two or more previous premature labors, respectively. No previous premature labors is the reference category.} 49 | #' \item{ht}{History of hypertension} 50 | #' \item{ui}{Presence of uterine irritability} 51 | #' \item{ftv1,ftv2,ftv3m}{Indicator functions for one, for two, or for three or more physician visits during the first trimester, respectively. No visits is the reference category.} 52 | #' } 53 | #' 54 | #' @seealso [MASS::birthwt], [grpreg()] 55 | #' 56 | #' @references 57 | #' \itemize{ 58 | #' \item Venables, W. N. and Ripley, B. D. (2002). *Modern Applied Statistics with S.* Fourth edition. Springer. 59 | #' \item Hosmer, D.W. and Lemeshow, S. (1989) *Applied Logistic Regression.* New York: Wiley 60 | #' } 61 | #' 62 | #' @source 63 | #' 64 | #' @examples 65 | #' data(Birthwt) 66 | #' hist(Birthwt$bwt, xlab="Child's birth weight", main="") 67 | #' table(Birthwt$low) 68 | #' ## See examples in ?birthwt (MASS package) 69 | #' ## for more about the data set 70 | #' ## See examples in ?grpreg for use of this data set 71 | #' ## with group penalized regression models 72 | "Birthwt" 73 | 74 | 75 | #' VA lung cancer data set 76 | #' 77 | #' Data from a randomised trial of two treatment regimens for lung cancer. This 78 | #' is a standard survival analysis data set from the classic textbook by 79 | #' Kalbfleisch and Prentice. 80 | #' 81 | #' @format A list of two objects: `y` and `X` 82 | #' \describe{ 83 | #' \item{y}{A two column matrix (`Surv` object) containing the follow-up 84 | #' time (in days) and an indicator variable for whether the patient died 85 | #' while on the study or not.} 86 | #' \item{X}{A matrix with 137 observations (rows) and 9 predictor variables 87 | #' (columns). The remainder of this list describes the columns of `X`} 88 | #' \item{trt}{Treatment indicator (1=control group, 2=treatment group)} 89 | #' \item{karno}{Karnofsky performance score (0=bad, 100=good)} 90 | #' \item{diagtime}{Time from diagnosis to randomization (months)} 91 | #' \item{age}{Age (years, at baseline)} 92 | #' \item{prior}{Prior therapy (0=no, 1=yes)} 93 | #' \item{squamous}{Indicator for whether the cancer type is squamous cell 94 | #' carcinoma (0=no, 1=yes)} 95 | #' \item{small}{Indicator for whether the cancer type is small cell lung 96 | #' cancer (0=no, 1=yes)} 97 | #' \item{adeno}{Indicator for whether the cancer type is adenocarcinoma 98 | #' (0=no, 1=yes)} 99 | #' \item{large}{Indicator for whether the cancer type is large cell carcinoma 100 | #' (0=no, 1=yes)} 101 | #' } 102 | #' 103 | #' @seealso `grpsurv()` 104 | #' @references 105 | #' \itemize{ 106 | #' \item Kalbfleisch D and Prentice RL (1980), *The Statistical Analysis of 107 | #' Failure Time Data*. Wiley, New York. 108 | #' } 109 | #' @source \url{https://cran.r-project.org/package=survival} 110 | "Lung" 111 | -------------------------------------------------------------------------------- /R/expand_spline.R: -------------------------------------------------------------------------------- 1 | #' Expand feature matrix using basis splines 2 | #' 3 | #' Performs a basis expansion for many features at once, returning output that is compatible 4 | #' for use with the `grpreg()` function. Returns an expanded matrix along with a vector 5 | #' that describes its grouping. 6 | #' 7 | #' `expand_spline()` uses the function [splines::bs()] or [splines::ns()] to generate a basis 8 | #' matrix for each column of `x`. These matrices represent the spline basis for piecewise 9 | #' polynomials with specified degree evaluated separately for each original column of `x`. 10 | #' These matrices are then column-bound to form a single grouped matrix of derived features. A vector 11 | #' that describes the grouping present in the resulting matrix is also generated. The resulting 12 | #' object can be passed to [grpreg()]. 13 | #' 14 | #' This methodology was originally proposed by Ravikumar et al. (2009), who named it SPAM (SParse Additive Modeling). 15 | #' 16 | #' @param x Features to be expanded (numeric matrix). 17 | #' @param df Degrees of freedom (numeric; default = 3). 18 | #' @param degree Degree of the piecewise polynomial (integer; default = 3 (cubic splines)). 19 | #' @param type Type of spline, either B-spline (`"bs"`) or natural cubic spline (`"ns"`; default). 20 | #' 21 | #' @return 22 | #' An object of class `expandedMatrix` consisting of: 23 | #' * `X`: A matrix of dimension `nrow(x)` by `df*ncol(x)` 24 | #' * `group`: A vector of length `df*ncol(x)` that describes the grouping structure 25 | #' * Additional metadata on the splines, such as knot locations, required in order to evaluate spline at new feature values (e.g., for prediction) 26 | #' 27 | #' @references 28 | #' * Ravikumar P, Lafferty J, Liu H and Wasserman L (2009). Sparse additive models. *Journal of the Royal Statistical Society Series B*, **71**: 1009-1030. 29 | #' 30 | #' @seealso [plot_spline()] to visualize the resulting nonlinear fits 31 | #' 32 | #' @examples 33 | #' \dontshow{set.seed(1)} 34 | #' Data <- gen_nonlinear_data(n=1000) 35 | #' X <- expand_spline(Data$X) 36 | #' fit <- grpreg(X, Data$y) 37 | #' plot_spline(fit, "V02", lambda = 0.03) 38 | #' @export 39 | 40 | expand_spline <- function(x, df = 3, degree = 3, type = c("ns", "bs")) { 41 | type <- match.arg(type) 42 | if(type == "ns" && degree != 3) { 43 | warning("Degree has been set to 3 for natural splines") 44 | } 45 | n <- nrow(x) 46 | p <- ncol(x) 47 | finalx <- matrix(NA, n, (p*df)) 48 | knots <- rep(list(rep(NA, (df-degree))), p) 49 | boundary <- rep(list(rep(NA, 2)), p) 50 | 51 | if(type == "bs"){ 52 | for(i in 0:(p-1)){ 53 | bs <- splines::bs(x[,i+1], df = df, degree = degree) 54 | finalx[,(df*i+1):(df*i+df)] <- bs 55 | boundary[[i+1]] <- attr(bs, "Boundary.knots") 56 | knots[[i+1]] <- attr(bs, "knots") 57 | } 58 | } 59 | else if (type == "ns") { 60 | for (i in 0:(p-1)) { 61 | ns <- splines::ns(x[,i+1], df = df) 62 | finalx[,(df*i+1):(df*i+df)] <- ns 63 | boundary[[i+1]] <- attr(ns, "Boundary.knots") 64 | knots[[i+1]] <- attr(ns, "knots") 65 | } 66 | } 67 | else { 68 | stop(paste(type, "is not a valid type"), call. = FALSE) 69 | } 70 | 71 | if (length(colnames(x)) == p) { 72 | groups <- rep(colnames(x), each = df) 73 | colnames(finalx) <- paste(groups, 1:df, sep='_') 74 | } 75 | else { 76 | groups <- rep(paste0("V", 1:p), each = df) 77 | colnames(finalx) <- paste(groups, 1:df, sep="_") 78 | } 79 | 80 | return(structure(list(X = finalx, 81 | group = groups, 82 | knots = knots, 83 | boundary = boundary, 84 | degree = degree, 85 | originalx = x, 86 | type = type), class='expandedMatrix')) 87 | } 88 | -------------------------------------------------------------------------------- /R/gBridge.R: -------------------------------------------------------------------------------- 1 | #' Fit a group bridge regression path 2 | #' 3 | #' Fit regularization paths for linear and logistic group bridge-penalized 4 | #' regression models over a grid of values for the regularization parameter 5 | #' lambda. 6 | #' 7 | #' This method fits the group bridge method of Huang et al. (2009). Unlike the 8 | #' penalties in \code{grpreg}, the group bridge is not differentiable at zero; 9 | #' because of this, a number of changes must be made to the algorithm, which is 10 | #' why it has its own function. Most notably, the method is unable to start at 11 | #' \code{lambda.max}; it must start at \code{lambda.min} and proceed in the 12 | #' opposite direction. 13 | #' 14 | #' In other respects, the usage and behavior of the function is similar to the 15 | #' rest of the \code{grpreg} package. 16 | #' 17 | #' @param X The design matrix, as in \code{grpreg}. 18 | #' @param y The response vector (or matrix), as in \code{grpreg}. 19 | #' @param group The grouping vector, as in \code{grpreg}. 20 | #' @param family Either "gaussian" or "binomial", depending on the response. 21 | #' @param nlambda The number of \code{lambda} values, as in \code{grpreg}. 22 | #' @param lambda A user supplied sequence of `lambda values, as in `grpreg()`. 23 | #' @param lambda.min The smallest value for \code{lambda}, as in \code{grpreg}. 24 | #' @param lambda.max The maximum value for \code{lambda}. Unlike the penalties 25 | #' in \code{grpreg}, it is not possible to solve for \code{lambda.max} directly 26 | #' with group bridge models. Thus, it must be specified by the user. If it is 27 | #' not specified, \code{gBridge} will attempt to guess \code{lambda.max}, but 28 | #' this is not particularly accurate. 29 | #' @param alpha Tuning parameter for the balance between the group penalty and 30 | #' the L2 penalty, as in \code{grpreg}. 31 | #' @param eps Convergence threshhold, as in \code{grpreg}. 32 | #' @param delta The group bridge penalty is not differentiable at zero, and 33 | #' requires a small number \code{delta} to bound it away from zero. There is 34 | #' typically no need to change this value. 35 | #' @param max.iter Maximum number of iterations, as in \code{grpreg}. 36 | #' @param gamma Tuning parameter of the group bridge penalty (the exponent to 37 | #' which the L1 norm of the coefficients in the group are raised). Default is 38 | #' 0.5, the square root. 39 | #' @param group.multiplier The multiplicative factor by which each group's 40 | #' penalty is to be multiplied, as in \code{grpreg}. 41 | #' @param warn Should the function give a warning if it fails to converge? As 42 | #' in \code{grpreg}. 43 | #' @param returnX Return the standardized design matrix (and associated group 44 | #' structure information)? Default is FALSE. 45 | #' @param ... Not used. 46 | #' 47 | #' @return An object with S3 class \code{"grpreg"}, as in \code{grpreg}. 48 | #' 49 | #' @seealso [grpreg()] 50 | #' 51 | #' @references 52 | #' \itemize{ 53 | #' \item Huang J, Ma S, Xie H, and Zhang C. (2009) A group bridge approach for 54 | #' variable selection. *Biometrika*, **96**: 339-355. \doi{10.1093/biomet/asp020} 55 | #' 56 | #' \item Breheny P and Huang J. (2009) Penalized methods for bi-level variable 57 | #' selection. *Statistics and its interface*, **2**: 369-380. 58 | #' \doi{10.4310/sii.2009.v2.n3.a10} 59 | #' } 60 | #' 61 | #' @examples 62 | #' data(Birthwt) 63 | #' X <- Birthwt$X 64 | #' group <- Birthwt$group 65 | #' 66 | #' ## Linear regression 67 | #' y <- Birthwt$bwt 68 | #' fit <- gBridge(X, y, group, lambda.max=0.08) 69 | #' plot(fit) 70 | #' select(fit)$beta 71 | #' 72 | #' ## Logistic regression 73 | #' y <- Birthwt$low 74 | #' fit <- gBridge(X, y, group, family="binomial", lambda.max=0.17) 75 | #' plot(fit) 76 | #' select(fit)$beta 77 | #' @export 78 | 79 | gBridge <- function(X, y, group=1:ncol(X), family=c("gaussian", "binomial", "poisson"), nlambda=100, lambda, 80 | lambda.min={if (nrow(X) > ncol(X)) .001 else .05}, lambda.max, alpha=1, eps=.001, delta=1e-7, 81 | max.iter=10000, gamma=0.5, group.multiplier, warn=TRUE, returnX=FALSE, ...) { 82 | # Error checking 83 | family <- match.arg(family) 84 | if (alpha > 1 | alpha <= 0) stop("alpha must be in (0, 1]", call.=FALSE) 85 | if (any(is.na(y)) | any(is.na(X))) stop("Missing data (NA's) detected. Take actions (e.g., removing cases, removing features, imputation) to eliminate missing data before passing X and y to gBridge", call.=FALSE) 86 | if (length(group)!=ncol(X)) stop("group does not match X", call.=FALSE) 87 | if (delta <= 0) stop("Delta must be a positive number", call.=FALSE) 88 | 89 | # Construct XG, yy 90 | yy <- newY(y, family) 91 | m <- attr(yy, "m") 92 | XG <- newXG(X, group, group.multiplier, m, TRUE) 93 | if (nrow(XG$X) != length(yy)) stop("X and y do not have the same number of observations", call.=FALSE) 94 | 95 | # Set up lambda 96 | if (missing(lambda)) { 97 | lambda <- setupLambda.gBridge(XG$X, yy, XG$g, family, alpha, lambda.min, lambda.max, nlambda, gamma, XG$m) 98 | } else { 99 | nlambda <- length(lambda) 100 | } 101 | 102 | # Fit 103 | n <- length(yy) 104 | p <- ncol(XG$X) 105 | K <- as.integer(table(XG$g)) 106 | K0 <- as.integer(if (min(XG$g)==0) K[1] else 0) 107 | K1 <- as.integer(if (min(XG$g)==0) cumsum(K) else c(0, cumsum(K))) 108 | if (family=="gaussian") { 109 | fit <- .Call("lcdfit_gaussian", XG$X, yy, "gBridge", K1, K0, lambda, alpha, eps, delta, gamma, 0, as.integer(max.iter), as.double(XG$m), as.integer(p), as.integer(max(XG$g)), as.integer(TRUE)) 110 | b <- rbind(mean(y), matrix(fit[[1]], nrow=p)) 111 | loss <- fit[[2]] 112 | Eta <- matrix(fit[[3]], nrow=n) + mean(y) 113 | df <- fit[[4]] + 1 # Intercept 114 | iter <- fit[[5]] 115 | } else { 116 | fit <- .Call("lcdfit_glm", XG$X, yy, family, "gBridge", K1, K0, lambda, alpha, eps, delta, gamma, 0, as.integer(max.iter), as.double(XG$m), as.integer(p), as.integer(max(XG$g)), as.integer(warn), as.integer(TRUE)) 117 | b <- rbind(fit[[1]], matrix(fit[[2]], nrow=p)) 118 | loss <- fit[[3]] 119 | Eta <- matrix(fit[[4]], nrow=n) 120 | df <- fit[[5]] 121 | iter <- fit[[6]] 122 | } 123 | 124 | # Eliminate saturated lambda values, if any 125 | ind <- !is.na(iter) 126 | b <- b[, ind, drop=FALSE] 127 | iter <- iter[ind] 128 | lambda <- lambda[ind] 129 | df <- df[ind] 130 | loss <- loss[ind] 131 | if (iter[1] == max.iter) stop("Algorithm failed to converge for any values of lambda. This indicates a combination of (a) an ill-conditioned feature matrix X and (b) insufficient penalization. You must fix one or the other for your model to be identifiable.", call.=FALSE) 132 | if (warn & any(iter==max.iter)) warning("Algorithm failed to converge for all values of lambda", call.=FALSE) 133 | 134 | # Unstandardize 135 | if (XG$reorder) b[-1,] <- b[1+XG$ord.inv,] 136 | beta <- unstandardize(b, XG) 137 | 138 | # Names 139 | varnames <- c("(Intercept)", XG$names) 140 | if (m > 1) { 141 | beta[2:m,] <- sweep(beta[2:m, , drop=FALSE], 2, beta[1,], FUN="+") 142 | beta <- array(beta, dim=c(m, nrow(beta)/m, ncol(beta))) 143 | group <- group[-(1:(m-1))] 144 | dimnames(beta) <- list(colnames(yy), varnames, round(lambda, digits=4)) 145 | } else { 146 | dimnames(beta) <- list(varnames, round(lambda, digits=4)) 147 | } 148 | 149 | val <- structure(list(beta = beta, 150 | family = family, 151 | group = group, 152 | lambda = lambda, 153 | alpha = alpha, 154 | deviance = 2 * loss, 155 | n = n, 156 | penalty = "gBridge", 157 | df = df, 158 | iter = iter, 159 | group.multiplier = XG$m), 160 | class = "grpreg") 161 | if (returnX) { 162 | val$XG = XG 163 | val$y = yy 164 | } else if (family=="poisson") { 165 | val$y <- y 166 | } 167 | val 168 | } 169 | -------------------------------------------------------------------------------- /R/gen_nonlinear_data.R: -------------------------------------------------------------------------------- 1 | #' Generate nonlinear example data 2 | #' 3 | #' Mainly intended to demonstrate the use of basis expansion models for sparse additive modeling; intended for use with [expand_spline()]. 4 | #' 5 | #' @param n Sample size (numeric; default = 100). 6 | #' @param p Number of features (numeric; default = 16). 7 | #' @param seed Set to get different random data sets, passed to [set.seed()] 8 | #' 9 | #' @examples 10 | #' Data <- gen_nonlinear_data() 11 | #' @export 12 | 13 | gen_nonlinear_data <- function(n=100, p=16, seed) { 14 | if (!missing(seed)) { 15 | original_seed <- .GlobalEnv$.Random.seed 16 | on.exit(.GlobalEnv$.Random.seed <- original_seed) 17 | set.seed(seed) 18 | } 19 | if (!(is.numeric(p) && p >= 6)) stop('p must be at least 6', call.=FALSE) 20 | X <- matrix(runif(n*p), nrow=n, ncol=p) 21 | w <- floor(log10(p)) + 1 22 | colnames(X) <- sprintf(paste0('V%0', w, 'd'), 1:p) 23 | f <- list( 24 | function(x){2*(exp(-10*x)-exp(-10))/(1-exp(-10)) - 1}, 25 | function(x){-2*(exp(-10*x)-exp(-10))/(1-exp(-10)) + 1}, 26 | function(x){2*x-1}, 27 | function(x){-2*x+1}, 28 | function(x){8*(x-0.5)^2 - 1}, 29 | function(x){-8*(x-0.5)^2 + 1}) 30 | eta <- matrix(NA, nrow=n, ncol=6) 31 | for (j in 1:6) eta[,j] <- f[[j]](X[,j]) 32 | mu <- 5 + apply(eta, 1, sum) 33 | y <- rnorm(n, mean=mu, sd=0.25) 34 | list(X=X, y=y, mu=mu) 35 | } 36 | -------------------------------------------------------------------------------- /R/grpreg-package.R: -------------------------------------------------------------------------------- 1 | #' @keywords internal 2 | #' @aliases grpreg-package NULL 3 | #' @author Patrick Breheny 4 | #' 5 | #' @references 6 | #' \itemize{ 7 | #' \item Yuan M and Lin Y. (2006) Model selection and estimation in regression 8 | #' with grouped variables. *Journal of the Royal Statistical Society Series B*, 9 | #' **68**: 49-67. \doi{10.1111/j.1467-9868.2005.00532.x} 10 | #' 11 | #' \item Huang J, Ma S, Xie H, and Zhang C. (2009) A group bridge approach for 12 | #' variable selection. *Biometrika*, **96**: 339-355. \doi{10.1093/biomet/asp020} 13 | #' 14 | #' \item Breheny P and Huang J. (2009) Penalized methods for bi-level variable 15 | #' selection. *Statistics and its interface*, **2**: 369-380. 16 | #' \doi{10.4310/sii.2009.v2.n3.a10} 17 | #' 18 | #' \item Huang J, Breheny P, and Ma S. (2012). A selective review of group 19 | #' selection in high dimensional models. *Statistical Science*, **27**: 481-499. 20 | #' \doi{10.1214/12-sts392} 21 | #' 22 | #' \item Breheny P and Huang J. (2015) Group descent algorithms for nonconvex 23 | #' penalized linear and logistic regression models with grouped predictors. 24 | #' *Statistics and Computing*, **25**: 173-187. \doi{10.1007/s11222-013-9424-2} 25 | #' 26 | #' \item Breheny P. (2015) The group exponential lasso for bi-level variable 27 | #' selection. *Biometrics*, **71**: 731-740. \doi{10.1111/biom.12300} 28 | #' } 29 | #' 30 | #' @examples 31 | #' \donttest{vignette("getting-started", package="grpreg")} 32 | "_PACKAGE" 33 | 34 | #' @useDynLib grpreg, .registration = TRUE 35 | #' @import stats 36 | #' @import graphics 37 | #' @import grDevices 38 | #' @import utils 39 | #' @importFrom Matrix bdiag 40 | NULL 41 | -------------------------------------------------------------------------------- /R/logLik.R: -------------------------------------------------------------------------------- 1 | #' logLik method for grpreg 2 | #' 3 | #' Calculates the log likelihood and degrees of freedom for a fitted grpreg 4 | #' object. 5 | #' 6 | #' Exists mainly for use with [stats::AIC()] and [stats::BIC()]. 7 | #' 8 | #' @aliases logLik logLik.grpreg 9 | #' 10 | #' @param object A fitted `grpreg` or `grpsurv` object, as obtained from 11 | #' [grpreg()] or [grpsurv()] 12 | #' @param df.method How should effective model parameters be calculated? One 13 | #' of: `"active"`, which counts the number of nonzero coefficients; or 14 | #' `"default"`, which uses the calculated `df` returned by 15 | #' `grpreg`. Default is `"default"`. 16 | #' @param REML Use restricted MLE for estimation of the scale parameter in a 17 | #' gaussian model? Default is FALSE. 18 | #' @param ... For S3 method compatibility. 19 | #' 20 | #' @returns Returns an object of class 'logLik', in this case consisting of a 21 | #' number (or vector of numbers) with two attributes: 'df' (the estimated 22 | #' degrees of freedom in the model) and 'nobs' (number of observations). 23 | #' 24 | #' The 'print' method for 'logLik' objects is not intended to handle vectors; 25 | #' consequently, the value of the function does not necessarily display 26 | #' correctly. However, it works with 'AIC' and 'BIC' without any glitches and 27 | #' returns the expected vectorized output. 28 | #' 29 | #' @author Patrick Breheny 30 | #' @seealso [grpreg()] 31 | #' 32 | #' @examples 33 | #' data(Birthwt) 34 | #' X <- Birthwt$X 35 | #' y <- Birthwt$bwt 36 | #' group <- Birthwt$group 37 | #' fit <- grpreg(X,y,group,penalty="cMCP") 38 | #' logLik(fit) ## Display is glitchy for vectors 39 | #' AIC(fit) 40 | #' BIC(fit) 41 | #' @export 42 | 43 | logLik.grpreg <- function(object, df.method=c("default","active"), REML=FALSE, ...) { 44 | df.method <- match.arg(df.method) 45 | n <- as.integer(object$n) 46 | df <- if (df.method=="active") apply(coef(object)!=0, 2, sum) else object$df 47 | if (object$family=="gaussian") { 48 | rdf <- if (REML) n-df else n 49 | RSS <- object$deviance 50 | l <- -n/2 * (log(2*pi) + log(RSS) - log(rdf)) - rdf/2 51 | df <- df + 1 52 | } else if (object$family=='poisson') { 53 | y <- object$y 54 | ind <- y != 0 55 | l <- -object$deviance/2 + sum(y[ind]*log(y[ind])) - sum(y) - sum(lfactorial(y)) 56 | } else { 57 | l <- -object$deviance/2 58 | } 59 | structure(l, df=df, nobs=n, class='logLik') 60 | } 61 | 62 | #' @rdname logLik.grpreg 63 | #' @export 64 | 65 | logLik.grpsurv <- function(object, df.method=c("default","active"), ...) { 66 | df.method <- match.arg(df.method) 67 | n <- as.integer(object$n) 68 | df <- if (df.method=="active") apply(coef(object)!=0, 2, sum) else object$df 69 | structure(-object$deviance/2, df=df, nobs=n, class='logLik') 70 | } 71 | -------------------------------------------------------------------------------- /R/loss.R: -------------------------------------------------------------------------------- 1 | deviance_grpreg <- function(y, yhat, family) { 2 | n <- length(y) 3 | if (family=="gaussian") { 4 | val <- (y - yhat)^2 5 | } else if (family=="binomial") { 6 | yhat[yhat < 0.00001] <- 0.00001 7 | yhat[yhat > 0.99999] <- 0.99999 8 | if (is.matrix(yhat)) { 9 | val <- matrix(NA, nrow=nrow(yhat), ncol=ncol(yhat)) 10 | if (sum(y==1)) val[y==1,] <- -2*log(yhat[y==1, , drop=FALSE]) 11 | if (sum(y==0)) val[y==0,] <- -2*log(1-yhat[y==0, , drop=FALSE]) 12 | } else { 13 | val <- double(length(y)) 14 | if (sum(y==1)) val[y==1] <- -2*log(yhat[y==1]) 15 | if (sum(y==0)) val[y==0] <- -2*log(1-yhat[y==0]) 16 | } 17 | } else if (family=="poisson") { 18 | yly <- y*log(y) 19 | yly[y==0] <- 0 20 | val <- 2*(yly - y + yhat - y*log(yhat)) 21 | } 22 | val 23 | } 24 | deviance_grpsurv <- function(y, eta, total=TRUE) { 25 | ind <- order(y[,1]) 26 | d <- as.integer(y[ind,2]) 27 | if (is.matrix(eta)) { 28 | eta <- eta[ind, , drop=FALSE] 29 | r <- apply(eta, 2, function(x) rev(cumsum(rev(exp(x))))) 30 | } else { 31 | eta <- as.matrix(eta[ind]) 32 | r <- as.matrix(rev(cumsum(rev(exp(eta))))) 33 | } 34 | if (total) { 35 | return(-2*(crossprod(d, eta) - crossprod(d, log(r)))) 36 | } else { 37 | return(-2*(eta[d==1, , drop=FALSE] - log(r)[d==1, , drop=FALSE])) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /R/multi.R: -------------------------------------------------------------------------------- 1 | multiX <- function(X, m) { 2 | p <- ncol(X) 3 | n <- nrow(X) 4 | A <- matrix(0, m*n, m*p) 5 | for (i in 1:m) { 6 | A[m*(1:n)-i+1, m*(1:p)-i+1] <- X 7 | } 8 | cbind(matrix(as.double(diag(m)), m*n, m, byrow=TRUE)[,2:m], A) 9 | } 10 | multiG <- function(g, ncolY) { 11 | structure(c(rep(0, ncolY-1), rep(g, each=ncolY)), 12 | levels=attr(g, 'levels'), 13 | m=attr(g, 'm')) 14 | } 15 | -------------------------------------------------------------------------------- /R/newS.R: -------------------------------------------------------------------------------- 1 | newS <- function(y) { 2 | ind <- order(y[,1]) 3 | list(time = as.double(y[ind,1]), 4 | fail = as.double(y[ind,2]), 5 | ind = ind) 6 | } 7 | -------------------------------------------------------------------------------- /R/newXG.R: -------------------------------------------------------------------------------- 1 | newXG <- function(X, g, m, ncolY, bilevel) { 2 | # Coerce X to matrix 3 | if (!inherits(X, "matrix")) { 4 | tmp <- try(X <- model.matrix(~0+., data=X), silent=TRUE) 5 | if (inherits(tmp, "try-error")) stop("X must be a matrix or able to be coerced to a matrix", call.=FALSE) 6 | } 7 | if (storage.mode(X)=="integer") storage.mode(X) <- "double" 8 | if (any(is.na(X))) stop("Missing data (NA's) detected in X. You must eliminate missing data (e.g., by removing cases, removing features, or imputation) before passing X to grpreg", call.=FALSE) 9 | if (length(g) != ncol(X)) stop ("Dimensions of group is not compatible with X", call.=FALSE) 10 | xnames <- if (is.null(colnames(X))) paste("V", 1:ncol(X), sep="") else colnames(X) 11 | 12 | # Setup group 13 | G <- setupG(g, m, bilevel) 14 | 15 | # Reconfigure for multiple outcomes, if necessary 16 | if (ncolY > 1) { 17 | X <- multiX(X, ncolY) 18 | G <- multiG(G, ncolY) 19 | } 20 | 21 | # Feature-level standardization 22 | std <- .Call("standardize", X) 23 | XX <- std[[1]] 24 | center <- std[[2]] 25 | scale <- std[[3]] 26 | nz <- which(scale > 1e-6) # non-constant columns 27 | if (length(nz) != ncol(X)) { 28 | XX <- XX[, nz, drop=FALSE] 29 | G <- subsetG(G, nz) 30 | } 31 | 32 | # Reorder groups, if necessary 33 | G <- reorderG(G, attr(G, 'm'), bilevel) 34 | if (attr(G, 'reorder')) XX <- XX[, attr(G, 'ord')] 35 | 36 | # Group-level standardization 37 | if (!bilevel) { 38 | XX <- orthogonalize(XX, G) 39 | g <- attr(XX, "group") 40 | } else { 41 | g <- as.integer(G) 42 | } 43 | 44 | # Set group multiplier if missing 45 | m <- attr(G, 'm') 46 | if (all(is.na(m))) { 47 | m <- if (bilevel) rep(1, max(g)) else sqrt(table(g[g!=0])) 48 | } 49 | 50 | # Return 51 | return(list(X=XX, g=g, m=m, reorder=attr(G, 'reorder'), ord.inv=attr(G, 'ord.inv'), names=xnames, 52 | center=center, scale=scale, nz=nz)) 53 | } 54 | -------------------------------------------------------------------------------- /R/newY.R: -------------------------------------------------------------------------------- 1 | newY <- function(y, family) { 2 | if (is.data.frame(y)) y <- as.matrix(y) 3 | if (is.matrix(y)) { 4 | d <- dim(y) 5 | y <- t(y) 6 | } else { 7 | d <- c(length(y), 1) 8 | } 9 | 10 | # Convert fuzzy binomial data 11 | if (family=="binomial" && typeof(y) != "logical") { 12 | tab <- table(y) 13 | if (length(tab) > 2) stop("Attemping to use family='binomial' with non-binary data", call.=FALSE) 14 | if (!identical(names(tab), c("0", "1"))) { 15 | message(paste0("Logistic regression modeling Pr(y=", names(tab)[2], ")")) 16 | y <- as.double(as.character(y) == names(tab)[2]) 17 | if (d[2] > 1) attr(y, "dim") <- d 18 | } 19 | } 20 | 21 | # Convert to double, if necessary 22 | if (typeof(y) != "double") { 23 | tryCatch(storage.mode(y) <- "double", warning=function(w) {stop("y must be numeric or able to be coerced to numeric", call.=FALSE)}) 24 | } 25 | if (any(is.na(y))) stop("Missing data (NA's) detected in outcome y. You must eliminate missing data (e.g., by removing cases or imputation) before passing y to grpreg", call.=FALSE) 26 | 27 | # Handle multi 28 | if (is.matrix(y)) { 29 | if (ncol(y) > 1) { 30 | if (is.null(colnames(y))) paste("Y", 1:ncol(y), sep="") 31 | } 32 | attributes(y) <- NULL 33 | } 34 | 35 | if (family=="gaussian") { 36 | meanY <- mean(y) 37 | y <- y - meanY 38 | attr(y, "mean") <- meanY 39 | } 40 | attr(y, "m") <- d[2] 41 | y 42 | } 43 | -------------------------------------------------------------------------------- /R/orthogonalize.R: -------------------------------------------------------------------------------- 1 | orthogonalize <- function(X, group) { 2 | n <- nrow(X) 3 | J <- max(group) 4 | T <- vector("list", J) 5 | XX <- matrix(0, nrow=nrow(X), ncol=ncol(X)) 6 | XX[, which(group==0)] <- X[, which(group==0)] 7 | for (j in seq_along(integer(J))) { 8 | ind <- which(group==j) 9 | if (length(ind)==0) next 10 | SVD <- svd(X[, ind, drop=FALSE], nu=0) 11 | r <- which(SVD$d > 1e-10) 12 | T[[j]] <- sweep(SVD$v[, r, drop=FALSE], 2, sqrt(n)/SVD$d[r], "*") 13 | XX[, ind[r]] <- X[, ind] %*% T[[j]] 14 | } 15 | nz <- !apply(XX==0, 2, all) 16 | XX <- XX[, nz, drop=FALSE] 17 | attr(XX, "T") <- T 18 | attr(XX, "group") <- group[nz] 19 | XX 20 | } 21 | unorthogonalize <- function(b, XX, group, intercept=TRUE) { 22 | ind <- !sapply(attr(XX, "T"), is.null) 23 | T <- bdiag(attr(XX, "T")[ind]) 24 | if (intercept) { 25 | ind0 <- c(1, 1+which(group==0)) 26 | val <- Matrix::as.matrix(rbind(b[ind0, , drop=FALSE], T %*% b[-ind0, , drop=FALSE])) 27 | } else if (sum(group==0)) { 28 | ind0 <- which(group==0) 29 | val <- Matrix::as.matrix(rbind(b[ind0, , drop=FALSE], T %*% b[-ind0, , drop=FALSE])) 30 | } else { 31 | val <- as.matrix(T %*% b) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /R/plot-cv.R: -------------------------------------------------------------------------------- 1 | #' Plots the cross-validation curve from a \code{cv.grpreg} object 2 | #' 3 | #' Plots the cross-validation curve from a \code{cv.grpreg} object, along with 4 | #' standard error bars. 5 | #' 6 | #' Error bars representing approximate +/- 1 SE (68\% confidence intervals) are 7 | #' plotted along with the estimates at value of \code{lambda}. For \code{rsq} 8 | #' and \code{snr}, these confidence intervals are quite crude, especially near 9 | #' zero, and will hopefully be improved upon in later versions of 10 | #' \code{grpreg}. 11 | #' 12 | #' @param x A \code{cv.grpreg} object. 13 | #' @param log.l Should horizontal axis be on the log scale? Default is TRUE. 14 | #' @param type What to plot on the vertical axis. \code{cve} plots the 15 | #' cross-validation error (deviance); \code{rsq} plots an estimate of the 16 | #' fraction of the deviance explained by the model (R-squared); \code{snr} 17 | #' plots an estimate of the signal-to-noise ratio; \code{scale} plots, for 18 | #' \code{family="gaussian"}, an estimate of the scale parameter (standard 19 | #' deviation); \code{pred} plots, for \code{family="binomial"}, the estimated 20 | #' prediction error; \code{all} produces all of the above. 21 | #' @param selected If \code{TRUE} (the default), places an axis on top of the 22 | #' plot denoting the number of groups in the model (i.e., that contain a 23 | #' nonzero regression coefficient) at that value of \code{lambda}. 24 | #' @param vertical.line If \code{TRUE} (the default), draws a vertical line at 25 | #' the value where cross-validaton error is minimized. 26 | #' @param col Controls the color of the dots (CV estimates). 27 | #' @param \dots Other graphical parameters to \code{plot} 28 | #' 29 | #' @seealso [grpreg()], [cv.grpreg()] 30 | #' 31 | #' @examples 32 | #' \dontshow{set.seed(1)} 33 | #' # Birthweight data 34 | #' data(Birthwt) 35 | #' X <- Birthwt$X 36 | #' group <- Birthwt$group 37 | #' 38 | #' # Linear regression 39 | #' y <- Birthwt$bwt 40 | #' cvfit <- cv.grpreg(X, y, group) 41 | #' plot(cvfit) 42 | #' op <- par(mfrow=c(2,2)) 43 | #' plot(cvfit, type="all") 44 | #' 45 | #' ## Logistic regression 46 | #' y <- Birthwt$low 47 | #' cvfit <- cv.grpreg(X, y, group, family="binomial") 48 | #' par(op) 49 | #' plot(cvfit) 50 | #' par(mfrow=c(2,2)) 51 | #' plot(cvfit, type="all") 52 | #' @export 53 | 54 | plot.cv.grpreg <- function(x, log.l=TRUE, type=c("cve", "rsq", "scale", "snr", "pred", "all"), selected=TRUE, vertical.line=TRUE, col="red", ...) { 55 | type <- match.arg(type) 56 | if (type=="all") { 57 | plot(x, log.l=log.l, type="cve", selected=selected, ...) 58 | plot(x, log.l=log.l, type="rsq", selected=selected, ...) 59 | plot(x, log.l=log.l, type="snr", selected=selected, ...) 60 | if (length(x$fit$family)) { 61 | if (x$fit$family == "binomial") plot(x, log.l=log.l, type="pred", selected=selected, ...) 62 | if (x$fit$family == "gaussian") plot(x, log.l=log.l, type="scale", selected=selected, ...) 63 | } 64 | return(invisible(NULL)) 65 | } 66 | l <- x$lambda 67 | if (log.l) { 68 | l <- log(l) 69 | xlab <- expression(log(lambda)) 70 | } else xlab <- expression(lambda) 71 | 72 | ## Calculate y 73 | L.cve <- x$cve - x$cvse 74 | U.cve <- x$cve + x$cvse 75 | if (type=="cve") { 76 | y <- x$cve 77 | L <- L.cve 78 | U <- U.cve 79 | ylab <- "Cross-validation error" 80 | } else if (type=="rsq" | type == "snr") { 81 | if (length(x$fit$family) && x$fit$family=='gaussian') { 82 | rsq <- pmin(pmax(1 - x$cve/x$null.dev, 0), 1) 83 | rsql <- pmin(pmax(1 - U.cve/x$null.dev, 0), 1) 84 | rsqu <- pmin(pmax(1 - L.cve/x$null.dev, 0), 1) 85 | } else { 86 | rsq <- pmin(pmax(1 - exp(x$cve-x$null.dev), 0), 1) 87 | rsql <- pmin(pmax(1 - exp(U.cve-x$null.dev), 0), 1) 88 | rsqu <- pmin(pmax(1 - exp(L.cve-x$null.dev), 0), 1) 89 | } 90 | if (type == "rsq") { 91 | y <- rsq 92 | L <- rsql 93 | U <- rsqu 94 | ylab <- ~R^2 95 | } else if(type=="snr") { 96 | y <- pmin(rsq/(1-rsq), 1e6) 97 | L <- pmin(rsql/(1-rsql), 1e6) 98 | U <- pmin(rsqu/(1-rsqu), 1e6) 99 | if (max(c(y,L,U)) == 1e6) warning('Signal-to-noise ratio is infinite') 100 | ylab <- "Signal-to-noise ratio" 101 | } 102 | } else if (type=="scale") { 103 | if (x$fit$family == "binomial") stop("Scale parameter for binomial family fixed at 1", call.=FALSE) 104 | y <- sqrt(x$cve) 105 | L <- sqrt(L.cve) 106 | U <- sqrt(U.cve) 107 | ylab <- ~hat(sigma) 108 | } else if (type=="pred") { 109 | y <- x$pe 110 | n <- x$fit$n 111 | CI <- sapply(y, function(x) {binom.test(x*n, n, conf.level=0.68)$conf.int}) 112 | L <- CI[1,] 113 | U <- CI[2,] 114 | ylab <- "Prediction error" 115 | } 116 | 117 | ind <- if (type=="pred") is.finite(l[1:length(x$pe)]) else is.finite(l[1:length(x$cve)]) 118 | ylim <- range(c(L[ind], U[ind])) 119 | aind <- ((U-L)/diff(ylim) > 1e-3) & ind 120 | plot.args = list(x=l[ind], y=y[ind], ylim=ylim, xlab=xlab, ylab=ylab, type="n", xlim=rev(range(l[ind])), las=1, bty="n") 121 | new.args = list(...) 122 | if (length(new.args)) plot.args[names(new.args)] = new.args 123 | do.call("plot", plot.args) 124 | if (vertical.line) abline(v=l[x$min], lty=2, lwd=.5) 125 | suppressWarnings(arrows(x0=l[aind], x1=l[aind], y0=L[aind], y1=U[aind], code=3, angle=90, col="gray80", length=.05)) 126 | points(l[ind], y[ind], col=col, pch=19, cex=.5) 127 | if (selected) { 128 | n.s <- sapply(predict(x$fit, lambda=x$lambda, type="groups"), length) 129 | axis(3, at=l, labels=n.s, tick=FALSE, line=-0.5) 130 | mtext("Groups selected", cex=0.8, line=1.5) 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /R/plot-grpsurv-func.R: -------------------------------------------------------------------------------- 1 | #' Plot survival curve for grpsurv model 2 | #' 3 | #' Plot survival curve for a model that has been fit using \code{grpsurv} 4 | #' followed by a prediction of the survival function using 5 | #' \code{predict.grpsurv} 6 | #' 7 | #' 8 | #' @param x A \code{'grpsurv.func'} object, which is returned by 9 | #' \code{predict.grpsurv} if \code{type='survival'} is specified. See 10 | #' examples. 11 | #' @param alpha Controls alpha-blending (i.e., transparency). Useful if many 12 | #' overlapping lines are present. 13 | #' @param \dots Other graphical parameters to pass to \code{plot} 14 | #' @author Patrick Breheny 15 | #' @seealso \code{\link{grpsurv}}, \code{\link{predict.grpsurv}} 16 | #' @examples 17 | #' 18 | #' data(Lung) 19 | #' X <- Lung$X 20 | #' y <- Lung$y 21 | #' group <- Lung$group 22 | #' fit <- grpsurv(X, y, group) 23 | #' 24 | #' # A single survival curve 25 | #' S <- predict(fit, X[1,], type='survival', lambda=.05) 26 | #' plot(S, xlim=c(0,200)) 27 | #' 28 | #' # Lots of survival curves 29 | #' S <- predict(fit, X, type='survival', lambda=.05) 30 | #' plot(S, xlim=c(0,200), alpha=0.3) 31 | #' @export 32 | 33 | plot.grpsurv.func <- function(x, alpha=1, ...) { 34 | time <- attr(x, 'time') 35 | if (length(x) > 1) { 36 | Y <- sapply(x, function(f) f(time)) 37 | n <- ncol(Y) 38 | } else { 39 | Y <- x(time) 40 | n <- 1 41 | } 42 | 43 | plot.args <- list(x=1, y=1, xlim=range(time), ylim=range(Y), xlab='Time', ylab='Pr(Survival)', type="n", las=1, bty="n") 44 | new.args <- list(...) 45 | if (length(new.args)) plot.args[names(new.args)] <- new.args 46 | do.call("plot", plot.args) 47 | 48 | cols <- hcl(h=seq(15, 375, len=max(4, n+1)), l=60, c=150, alpha=alpha) 49 | cols <- if (n==2) cols[c(1,3)] else cols[1:n] 50 | line.args <- list(col=cols, lwd=1+2*exp(-n/20), lty=1, type='s') 51 | if (length(new.args)) line.args[names(new.args)] <- new.args 52 | line.args$x <- time 53 | line.args$y <- Y 54 | do.call("matlines", line.args) 55 | } 56 | -------------------------------------------------------------------------------- /R/plot.R: -------------------------------------------------------------------------------- 1 | #' Plot coefficients from a "grpreg" object 2 | #' 3 | #' Produces a plot of the coefficient paths for a fitted \code{grpreg} object. 4 | #' 5 | #' 6 | #' @param x Fitted \code{"grpreg"} model. 7 | #' @param alpha Controls alpha-blending. Default is alpha=1. 8 | #' @param legend.loc Where should the legend go? If left unspecified, no 9 | #' legend is drawn. See \code{\link[graphics]{legend}} for details. 10 | #' @param label If TRUE, annotates the plot with text labels in the right 11 | #' margin describing which variable/group the corresponding line belongs to. 12 | #' @param log.l Should horizontal axis be on the log scale? Default is FALSE. 13 | #' @param norm If \code{TRUE}, plot the norm of each group, rather than the 14 | #' individual coefficients. 15 | #' @param \dots Other graphical parameters to \code{plot}, \code{matlines}, or 16 | #' \code{legend} 17 | #' 18 | #' @seealso [grpreg()] 19 | #' 20 | #' @examples 21 | #' # Fit model to birthweight data 22 | #' data(Birthwt) 23 | #' X <- Birthwt$X 24 | #' y <- Birthwt$bwt 25 | #' group <- Birthwt$group 26 | #' fit <- grpreg(X, y, group, penalty="grLasso") 27 | #' 28 | #' # Plot (basic) 29 | #' plot(fit) 30 | #' 31 | #' # Plot group norms, with labels in right margin 32 | #' plot(fit, norm=TRUE, label=TRUE) 33 | #' 34 | #' # Plot (miscellaneous options) 35 | #' myColors <- c("black", "red", "green", "blue", "yellow", "purple", 36 | #' "orange", "brown") 37 | #' plot(fit, legend.loc="topleft", col=myColors) 38 | #' labs <- c("Mother's Age", "# Phys. visits", "Hypertension", "Mother's weight", 39 | #' "# Premature", "Race", "Smoking", "Uterine irritability") 40 | #' plot(fit, legend.loc="topleft", lwd=6, alpha=0.5, legend=labs) 41 | #' plot(fit, norm=TRUE, legend.loc="topleft", lwd=6, alpha=0.5, legend=labs) 42 | #' @export 43 | 44 | plot.grpreg <- function(x, alpha=1, legend.loc, label=FALSE, log.l=FALSE, norm=FALSE, ...) { 45 | if (norm) { 46 | Y <- predict(x, type="norm") 47 | if (any(x$group==0)) Y <- Y[-1,] 48 | nonzero <- which(apply(abs(Y), 1, sum)!=0) 49 | Y <- Y[nonzero,] 50 | g <- 1:nrow(Y) 51 | } else { 52 | if (length(dim(x$beta))==3) { 53 | beta <- matrix(x$beta[, -1, , drop=FALSE], ncol=dim(x$beta)[3]) 54 | } else if (inherits(x, "grpsurv")) { 55 | beta <- x$beta 56 | } else { 57 | beta <- x$beta[-1, , drop=FALSE] 58 | } 59 | penalized <- which(x$group!=0) 60 | nonzero <- which(apply(abs(beta), 1, sum)!=0) 61 | ind <- intersect(penalized, nonzero) 62 | Y <- beta[ind, , drop=FALSE] 63 | g <- as.integer(as.factor(x$group[ind])) 64 | } 65 | p <- nrow(Y) 66 | l <- x$lambda 67 | n.g <- max(g) 68 | 69 | if (log.l) { 70 | l <- log(l) 71 | xlab <- expression(log(lambda)) 72 | } else xlab <- expression(lambda) 73 | 74 | plot.args <- list(x=l, y=1:length(l), ylim=range(Y), xlab=xlab, ylab="", type="n", xlim=rev(range(l)), las=1, bty="n") 75 | new.args <- list(...) 76 | if (length(new.args)) { 77 | new.plot.args <- new.args[names(new.args) %in% c(names(par()), names(formals(plot.default)))] 78 | plot.args[names(new.plot.args)] <- new.plot.args 79 | } 80 | do.call("plot", plot.args) 81 | if (plot.args$ylab=="") { 82 | ylab <- if (norm) expression("||"*hat(beta)*"||") else expression(hat(beta)) 83 | mtext(ylab, 2, 3.5, las=1, adj=0) 84 | } 85 | abline(h=0, lwd=0.5, col="gray") 86 | 87 | cols <- hcl(h=seq(15, 375, len=max(4, n.g+1)), l=60, c=150, alpha=alpha) 88 | cols <- if (n.g==2) cols[c(1,3)] else cols[1:n.g] 89 | line.args <- list(col=cols, lwd=1+2*exp(-p/20), lty=1, pch="") 90 | if (length(new.args)) line.args[names(new.args)] <- new.args 91 | line.args$x <- l 92 | line.args$y <- t(Y) 93 | line.args$col <- line.args$col[g] 94 | line.args$lty <- rep(line.args$lty, length.out=max(g)) 95 | line.args$lty <- line.args$lty[g] 96 | do.call("matlines", line.args) 97 | 98 | if(!missing(legend.loc)) { 99 | legend.args <- list(col=cols, lwd=line.args$lwd, lty=line.args$lty, legend=names(x$group.multiplier)) 100 | if (length(new.args)) { 101 | new.legend.args <- new.args[names(new.args) %in% names(formals(legend))] 102 | legend.args[names(new.legend.args)] <- new.legend.args 103 | } 104 | legend.args$x <- legend.loc 105 | do.call("legend", legend.args) 106 | } 107 | if (label) { 108 | ypos <- Y[, ncol(Y)] 109 | text(-0.001, ypos, names(ypos), xpd=NA, adj=c(0, NA)) 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /R/predict-cv.R: -------------------------------------------------------------------------------- 1 | #' @rdname predict.grpreg 2 | #' @export 3 | 4 | predict.cv.grpreg <- function(object, X, lambda=object$lambda.min, which=object$min, type=c("link", "response", "class", "coefficients", "vars", "groups", "nvars", "ngroups", "norm"), ...) { 5 | type <- match.arg(type) 6 | if (inherits(object, 'cv.grpsurv')) { 7 | return(predict.grpsurv(object$fit, X=X, lambda=lambda, which=which, type=type, ...)) 8 | } else { 9 | return(predict.grpreg(object$fit, X=X, lambda=lambda, which=which, type=type, ...)) 10 | } 11 | } 12 | 13 | #' @rdname predict.grpreg 14 | #' @export 15 | 16 | coef.cv.grpreg <- function(object, lambda=object$lambda.min, which=object$min, ...) { 17 | coef.grpreg(object$fit, lambda=lambda, which=which, ...) 18 | } 19 | -------------------------------------------------------------------------------- /R/predict-surv.R: -------------------------------------------------------------------------------- 1 | #' Model predictions for grpsurv objects 2 | #' 3 | #' Similar to other predict methods, this function returns predictions from a fitted `grpsurv` object. 4 | #' 5 | #' Estimation of baseline survival function conditional on the estimated values of `beta` is carried out according to the method described in Chapter 4.3 of Kalbfleisch and Prentice. 6 | #' 7 | #' @param object Fitted `grpsurv` model object. 8 | #' @param X Matrix of values at which predictions are to be made. Not required for some `type` values. 9 | #' @param lambda Regularization parameter at which predictions are requested. For values of `lambda` not in the sequence of fitted models, linear interpolation is used. 10 | #' @param which Indices of the penalty parameter `lambda` at which predictions are required. Default: all indices. If `lambda` is specified, this will override `which`. 11 | #' @param type Type of prediction: 12 | #' * `link`: linear predictors 13 | #' * `response`: risk (i.e., `exp(link)`) 14 | #' * `survival`: the estimated survival function 15 | #' * `hazard`: the estimated cumulative hazard function 16 | #' * `median`: median survival time 17 | #' * The other options are all identical to their [grpreg()] counterparts 18 | #' @param ... Not used. 19 | #' 20 | #' @return The object returned depends on type. 21 | #' 22 | #' @references 23 | #' * Kalbfleish JD and Prentice RL (2002). The Statistical Analysis of Failure Time Data, 2nd edition. Wiley. 24 | #' 25 | #' @author Patrick Breheny 26 | #' 27 | #' @seealso [grpsurv()] 28 | #' 29 | #' @examples 30 | #' data(Lung) 31 | #' X <- Lung$X 32 | #' 33 | #' y <- Lung$y 34 | #' group <- Lung$group 35 | #' 36 | #' fit <- grpsurv(X, y, group) 37 | #' coef(fit, lambda=0.05) 38 | #' head(predict(fit, X, type="link", lambda=0.05)) 39 | #' head(predict(fit, X, type="response", lambda=0.05)) 40 | #' 41 | #' # Survival function 42 | #' S <- predict(fit, X[1,], type="survival", lambda=0.05) 43 | #' S(100) 44 | #' S <- predict(fit, X, type="survival", lambda=0.05) 45 | #' plot(S, xlim=c(0,200)) 46 | #' 47 | #' # Medians 48 | #' predict(fit, X[1,], type="median", lambda=0.05) 49 | #' M <- predict(fit, X, type="median") 50 | #' M[1:10, 1:10] 51 | #' 52 | #' # Nonzero coefficients 53 | #' predict(fit, type="vars", lambda=c(0.1, 0.01)) 54 | #' predict(fit, type="nvars", lambda=c(0.1, 0.01)) 55 | #' @export 56 | 57 | predict.grpsurv <- function(object, X, 58 | type=c("link", "response", "survival", "hazard", "median", "norm", 59 | "coefficients", "vars", "nvars", "groups", "ngroups"), 60 | lambda, which=1:length(object$lambda), ...) { 61 | type <- match.arg(type) 62 | if (type %in% c("norm", "coefficients", "vars", "nvars", "groups", "ngroups")) { 63 | return(predict.grpreg(object=object, X=X, type=type, lambda=lambda, which=which, ...)) 64 | } 65 | if (!missing(lambda)) { 66 | ind <- approx(object$lambda, seq(object$lambda), lambda)$y 67 | l <- floor(ind) 68 | r <- ceiling(ind) 69 | x <- ind %% 1 70 | beta <- (1-x)*object$beta[, l, drop=FALSE] + x*object$beta[, r, drop=FALSE] 71 | colnames(beta) <- round(lambda, 4) 72 | } else { 73 | beta <- object$beta[, which, drop=FALSE] 74 | } 75 | 76 | if (missing(X)) { 77 | eta <- matrix(0, 1, 1) 78 | warning('Returning "baseline" prediction; supply X for more interesting prediction') 79 | } else { 80 | if (inherits(object, "expanded")) X <- predict_spline(object, X) 81 | eta <- X %*% beta 82 | } 83 | if (type=='link') return(drop(eta)) 84 | if (type=='response') return(drop(exp(eta))) 85 | if (!missing(lambda)) { 86 | W <- (1-x)*exp(object$linear.predictors)[, l, drop=FALSE] + x*exp(object$linear.predictors)[, r, drop=FALSE] 87 | } else { 88 | W <- exp(object$linear.predictors)[, which, drop=FALSE] 89 | } 90 | 91 | if (type %in% c('survival', 'hazard') & ncol(W) > 1) stop('Can only return type="survival" for a single lambda value', call.=FALSE) 92 | if (type %in% c('survival', 'hazard')) val <- vector('list', length(eta)) 93 | if (type == 'median') val <- matrix(NA, nrow(eta), ncol(eta)) 94 | for (j in 1:ncol(eta)) { 95 | # Estimate baseline hazard 96 | w <- W[,j] 97 | r <- rev(cumsum(rev(w))) 98 | a <- ifelse(object$fail, (1-w/r)^(1/w), 1) 99 | S0 <- c(1, cumprod(a)) 100 | H0 <- c(0, cumsum(1-a)) 101 | x <- c(0, object$time) 102 | for (i in 1:nrow(eta)) { 103 | S <- S0^exp(eta[i,j]) 104 | H <- H0*exp(eta[i,j]) 105 | if (type == 'survival') val[[i]] <- approxfun(x, S, method='constant', ties="ordered") 106 | else if (type == 'hazard') val[[i]] <- approxfun(x, H, method='constant', ties="ordered") 107 | else if (type == 'median') { 108 | if (any(S < 0.5)) { 109 | val[i,j] <- x[min(which(S < .5))] 110 | } 111 | } 112 | } 113 | } 114 | if (type %in% c('survival', 'hazard')) { 115 | if (nrow(eta)==1) val <- val[[1]] 116 | class(val) <- c('grpsurv.func', class(val)) 117 | attr(val, 'time') <- object$time 118 | } else if (type == 'median') { 119 | val <- drop(val) 120 | } 121 | val 122 | } 123 | -------------------------------------------------------------------------------- /R/predict_spline.R: -------------------------------------------------------------------------------- 1 | predict_spline <- function(object, X) { 2 | p <- length(unique(object$group)) 3 | meta <- object$meta 4 | if (meta$type == 'bs') { 5 | df <- length(meta$knots[[1]]) + meta$degree 6 | } else if (meta$type == 'ns') { 7 | df <- length(meta$knots[[1]]) + 1 8 | } 9 | bsX <- matrix(NA, nrow(X), (p*df)) # make this work for vectors 10 | for(i in 1:p) { 11 | mat <- object$meta$X[,(df*(i-1)+1):(df*(i-1)+df)] 12 | attr(mat, "degree") <- meta$degree 13 | attr(mat, "knots") <- meta$knots[[i]] 14 | attr(mat, "Boundary.knots") <- meta$boundary[[i]] 15 | attr(mat, "intercept") <- FALSE 16 | attr(mat, "class") <- c(meta$type, "basis", "matrix") 17 | bsX[,(df*(i-1)+1):(df*(i-1)+df)] <- predict(mat, X[,i]) 18 | } 19 | bsX 20 | } 21 | -------------------------------------------------------------------------------- /R/residuals.R: -------------------------------------------------------------------------------- 1 | #' Extract residuals from a grpreg or grpsurv fit 2 | #' 3 | #' Currently, only deviance residuals are supported. 4 | #' 5 | #' @param object Object of class `grpreg` or `grpsurv`. 6 | #' @param lambda Values of the regularization parameter at which residuals are requested (numeric vector). For values of lambda not in the sequence of fitted models, linear interpolation is used. 7 | #' @param which Index of the penalty parameter at which residuals are requested (default = all indices). If `lambda` is specified, this take precedence over `which`. 8 | #' @param drop By default, if a single value of lambda is supplied, a vector of residuals is returned (logical; default=`TRUE`). Set `drop=FALSE` if you wish to have the function always return a matrix (see [drop()]). 9 | #' @param ... Not used. 10 | #' 11 | #' @examples 12 | #' data(Birthwt) 13 | #' X <- Birthwt$X 14 | #' y <- Birthwt$bwt 15 | #' group <- Birthwt$group 16 | #' fit <- grpreg(X, y, group, returnX=TRUE) 17 | #' residuals(fit)[1:5, 1:5] 18 | #' head(residuals(fit, lambda=0.1)) 19 | #' @export 20 | 21 | residuals.grpreg <- function(object, lambda, which=1:length(object$lambda), drop=TRUE, ...) { 22 | 23 | # Calculate matrix of residuals 24 | if (inherits(object, 'grpsurv')) { 25 | for (j in 1:length(object$lambda)) { 26 | h <- suppressWarnings(predict(object, which=j, type='hazard')(object$time)) 27 | M <- object$fail - h * exp(object$linear.predictors) 28 | R <- sign(M) * sqrt(-2*(M + object$fail*log(object$fail-M))) 29 | R[h==0,] <- 0 30 | R <- R[match(1:object$n, object$order),] # Return in original order 31 | } 32 | } else if (object$family == 'gaussian') { 33 | R <- object$y - object$linear.predictor 34 | } else if (object$family == 'binomial') { 35 | f <- binomial()$dev.resids 36 | M <- binomial()$linkinv(object$linear.predictor) 37 | R <- vapply(1:length(object$lambda), function(j) {dr(f, object$y, M[,j])}, double(length(object$y))) 38 | } else if (object$family == 'poisson') { 39 | f <- poisson()$dev.resids 40 | M <- poisson()$linkinv(object$linear.predictor) 41 | R <- vapply(1:length(object$lambda), function(j) {dr(f, object$y, M[,j])}, double(length(object$y))) 42 | } else { 43 | stop('Residuals not implemented for this type of grpreg object.') 44 | } 45 | 46 | # Interpolate and return 47 | if (!missing(lambda)) { 48 | ind <- approx(object$lambda, seq(object$lambda), lambda)$y 49 | l <- floor(ind) 50 | r <- ceiling(ind) 51 | w <- ind %% 1 52 | out <- (1-w)*R[, l, drop=FALSE] + w*R[, r, drop=FALSE] 53 | colnames(out) <- round(lambda, 4) 54 | } else { 55 | out <- R[, which, drop=FALSE] 56 | } 57 | if (drop) return(drop(out)) else return(out) 58 | } 59 | 60 | dr <- function(f, y, m) { 61 | sqrt(pmax(f(y, m, rep(1, length(y))), 0)) * ((y > m) * 2 - 1) 62 | } 63 | -------------------------------------------------------------------------------- /R/se-grpsurv.R: -------------------------------------------------------------------------------- 1 | # Bootstrap method for calculating CVSE for Cox models 2 | se_grpsurv <-function (y, eta, B = 100) { 3 | cve <- matrix(NA, B, ncol(eta)) 4 | for (b in 1:B) { 5 | ind <- sample(1:nrow(eta), replace=TRUE) 6 | cve[b,] <- deviance_grpsurv(y[ind,], eta[ind,]) 7 | } 8 | apply(cve, 2, sd) 9 | } 10 | -------------------------------------------------------------------------------- /R/select.R: -------------------------------------------------------------------------------- 1 | #' @rdname select 2 | #' @export 3 | select <- function(obj,...) UseMethod("select") 4 | 5 | #' Select an value of lambda along a grpreg path 6 | #' 7 | #' Selects a point along the regularization path of a fitted grpreg object 8 | #' according to the AIC, BIC, or GCV criteria. 9 | #' 10 | #' The criteria are defined as follows, where \eqn{L}{L} is the deviance (i.e, 11 | #' -2 times the log-likelihood), \eqn{\nu}{df} is the degrees of freedom, and 12 | #' \eqn{n}{n} is the sample size: 13 | #' 14 | #' \deqn{AIC = L + 2\nu}{AIC = L + 2*df} \deqn{BIC = L + \log(n)\nu}{BIC = L + 15 | #' log(n)*df} \deqn{GCV = \frac{L}{(1-\nu/n)^2}}{GCV= L/((1-df/n)^2)} 16 | #' \deqn{AICc = AIC + 2\frac{\nu(\nu+1)}{n-\nu-1}}{AICc = AIC + 17 | #' 2*df*(df+1)/(n-df-1)} \deqn{EBIC = BIC + 2 \log{p \choose \nu}}{EBIC = BIC + 18 | #' 2*log(p choose df)} 19 | #' 20 | #' @rdname select 21 | #' 22 | #' @param obj A fitted grpreg object. 23 | #' @param criterion The criterion by which to select the regularization 24 | #' parameter. One of \code{"AIC"}, \code{"BIC"}, \code{"GCV"}, \code{"AICc"}, 25 | #' or \code{"EBIC"}; default is \code{"BIC"}. 26 | #' @param df.method How should effective model parameters be calculated? One 27 | #' of: \code{"active"}, which counts the number of nonzero coefficients; or 28 | #' \code{"default"}, which uses the calculated \code{df} returned by 29 | #' \code{grpreg}. Default is \code{"default"}. 30 | #' @param smooth Applies a smoother to the information criteria before 31 | #' selecting the optimal value. 32 | #' @param \dots For S3 method compatibility. 33 | #' 34 | #' @return A list containing: 35 | #' \describe{ 36 | #' \item{lambda}{The selected value of the regularization parameter, `lambda`.} 37 | #' \item{beta}{The vector of coefficients at the chosen value of `lambda`.} 38 | #' \item{df}{The effective number of model parameters at the chosen value of `lambda`.} 39 | #' \item{IC}{A vector of the calculated model selection criteria for each point on the regularization path.} 40 | #' } 41 | #' 42 | #' @seealso [grpreg()] 43 | #' 44 | #' @examples 45 | #' data(Birthwt) 46 | #' X <- Birthwt$X 47 | #' y <- Birthwt$bwt 48 | #' group <- Birthwt$group 49 | #' fit <- grpreg(X, y, group, penalty="grLasso") 50 | #' select(fit) 51 | #' select(fit,crit="AIC",df="active") 52 | #' plot(fit) 53 | #' abline(v=select(fit)$lambda) 54 | #' par(mfrow=c(1,3)) 55 | #' l <- fit$lambda 56 | #' xlim <- rev(range(l)) 57 | #' plot(l, select(fit)$IC, xlim=xlim, pch=19, type="o", ylab="BIC") 58 | #' plot(l, select(fit,"AIC")$IC, xlim=xlim, pch=19, type="o",ylab="AIC") 59 | #' plot(l, select(fit,"GCV")$IC, xlim=xlim, pch=19, type="o",ylab="GCV") 60 | #' @export 61 | 62 | select.grpreg <- function(obj, criterion=c("BIC","AIC","GCV","AICc","EBIC"), df.method=c("default","active"), smooth=FALSE, ...) { 63 | criterion <- match.arg(criterion) 64 | df.method <- match.arg(df.method) 65 | ll <- logLik(obj, df.method=df.method, ...) 66 | df <- as.double(attr(ll,"df")) 67 | d <- dim(obj$beta) 68 | p <- if (length(d)==2) d[1] - 1 else d[2] - 1 69 | j <- if(obj$family=="gaussian") df - 2 else df - 1 70 | 71 | IC <- switch(criterion, 72 | AIC = AIC(ll), 73 | BIC = BIC(ll), 74 | GCV = (1/obj$n) * (-2) * as.double(ll) / (1-df/obj$n)^2, 75 | AICc = AIC(ll) + 2*df*(df+1)/(obj$n-df-1), 76 | EBIC = BIC(ll) + 2*(lgamma(p+1) - lgamma(j+1) - lgamma(p-j+1))) 77 | n.l <- length(obj$lambda) 78 | if (smooth & (n.l < 4)) { 79 | smooth <- FALSE 80 | warning("Need at least 4 points to use smooth=TRUE", call.=FALSE) 81 | } 82 | if (smooth) { 83 | fit.ss <- smooth.spline(IC[is.finite(IC)]) 84 | d <- diff(fit.ss$y) 85 | if (all(d<0)) i <- n.l 86 | else i <- min(which(d>0))-1 87 | if (i==0) i <- 1 88 | } else i <- which.min(IC) 89 | 90 | if (min(obj$lambda) == obj$lambda[i]) { 91 | warning(paste("minimum lambda selected for", obj$penalty), call.=FALSE) 92 | } else if ((max(obj$lambda) == obj$lambda[i]) & obj$penalty=="gBridge") { 93 | warning("maximum lambda selected", call.=FALSE) 94 | } 95 | return(list(beta=obj$beta[,i], 96 | lambda=obj$lambda[i], 97 | df=df[i], 98 | IC=IC)) 99 | } 100 | 101 | -------------------------------------------------------------------------------- /R/setupLambda.R: -------------------------------------------------------------------------------- 1 | setupLambda <- function(X, y, group, family, penalty, alpha, lambda.min, log.lambda, nlambda, group.multiplier) { 2 | 3 | # Fit to unpenalized covariates 4 | n <- length(y) 5 | K <- table(group) 6 | K1 <- if (min(group)==0) cumsum(K) else c(0, cumsum(K)) 7 | storage.mode(K1) <- "integer" 8 | if (K1[1]!=0) { 9 | fit <- glm(y~X[, group==0], family=family) 10 | } else { 11 | fit <- glm(y~1, family=family) 12 | } 13 | 14 | ## Determine lambda.max 15 | if (family=="gaussian") { 16 | r <- fit$residuals 17 | } else { 18 | w <- fit$weights 19 | if (max(w) < 1e-4) stop("Unpenalized portion of model is already saturated; exiting...", call.=FALSE) 20 | r <- residuals(fit, "working")*w 21 | } 22 | if (strtrim(penalty, 2) == "gr") { 23 | zmax <- .Call("maxgrad", X, r, K1, as.double(group.multiplier)) / n 24 | } else { 25 | zmax <- .Call("maxprod", X, r, K1, as.double(group.multiplier)) / n 26 | } 27 | lambda.max <- zmax/alpha 28 | 29 | if (log.lambda) { # lambda sequence on log-scale 30 | if (lambda.min==0) { 31 | lambda <- c(exp(seq(log(lambda.max), log(.001*lambda.max), length=nlambda-1)), 0) 32 | } else { 33 | lambda <- exp(seq(log(lambda.max), log(lambda.min*lambda.max), length=nlambda)) 34 | } 35 | } else { # lambda sequence on linear-scale 36 | if (lambda.min==0) { 37 | lambda <- c(seq(lambda.max, 0.001*lambda.max, length = nlambda-1), 0) 38 | } else { 39 | lambda <- seq(lambda.max, lambda.min*lambda.max, length = nlambda) 40 | } 41 | } 42 | lambda 43 | } 44 | 45 | setupLambda.gBridge <- function(X, y, group, family, alpha, lambda.min, lambda.max, nlambda, gamma, group.multiplier) { 46 | ## Fit to unpenalized covariates 47 | n <- length(y) 48 | ind <- which(group!=0) 49 | if (length(ind)!=length(group)) { 50 | fit <- glm(y~X[, group==0], family=family) 51 | } else { 52 | fit <- glm(y~1, family=family) 53 | } 54 | 55 | ## Guess lambda.max 56 | if (missing(lambda.max)) { 57 | if (family=="gaussian") { 58 | z <- crossprod(X[, ind], fit$residuals) / n 59 | a <- .35 60 | } else { 61 | z <- crossprod(X[, ind], fit$weights * residuals(fit, "working")) / n 62 | a <- .2 63 | } 64 | lambda.max <- max(abs(z)/group.multiplier[group])*a^(1-gamma)/(gamma*alpha) 65 | } 66 | if (lambda.min==0) { 67 | lambda <- c(exp(seq(log(lambda.max), log(.001*lambda.max), len=nlambda-1)), 0) 68 | } else { 69 | lambda <- exp(seq(log(lambda.max), log(lambda.min*lambda.max), len=nlambda)) 70 | } 71 | return(rev(lambda)) 72 | } 73 | -------------------------------------------------------------------------------- /R/setupLambdaCox.R: -------------------------------------------------------------------------------- 1 | setupLambdaCox <- function(X, y, Delta, group, penalty, alpha, lambda.min, nlambda, group.multiplier) { 2 | n <- nrow(X) 3 | p <- ncol(X) 4 | 5 | ## Fit to unpenalized covariates 6 | K <- table(group) 7 | K1 <- as.integer(if (min(group)==0) cumsum(K) else c(0, cumsum(K))) 8 | if (K1[1]!=0) { 9 | SURV <- get("Surv", asNamespace("survival")) 10 | COXPH <- get("coxph", asNamespace("survival")) 11 | nullFit <- COXPH(SURV(y, Delta) ~ X[, group==0, drop=FALSE]) 12 | eta <- nullFit$linear.predictors 13 | rsk <- rev(cumsum(rev(exp(eta)))) 14 | s <- Delta - exp(eta)*cumsum(Delta/rsk) 15 | } else { 16 | w <- 1/(n-(1:n)+1) 17 | s <- Delta - cumsum(Delta*w) 18 | } 19 | 20 | ## Determine lambda.max 21 | if (strtrim(penalty, 2) == "gr") { 22 | zmax <- .Call("maxgrad", X, s, K1, as.double(group.multiplier)) / n 23 | } else { 24 | zmax <- .Call("maxprod", X, s, K1, as.double(group.multiplier)) / n 25 | } 26 | lambda.max <- zmax/alpha 27 | 28 | if (lambda.min==0) lambda <- c(exp(seq(log(lambda.max), log(.001*lambda.max), len=nlambda-1)), 0) 29 | else lambda <- exp(seq(log(lambda.max), log(lambda.min*lambda.max), len=nlambda)) 30 | lambda 31 | } 32 | -------------------------------------------------------------------------------- /R/standardize.R: -------------------------------------------------------------------------------- 1 | unstandardize <- function(b, XG) { 2 | beta <- matrix(0, nrow=1+length(XG$scale), ncol=ncol(b)) 3 | beta[1 + XG$nz,] <- b[-1,] / XG$scale[XG$nz] 4 | beta[1,] <- b[1,] - crossprod(XG$center, beta[-1, , drop=FALSE]) 5 | beta 6 | } 7 | -------------------------------------------------------------------------------- /R/summary-cv-grpreg.R: -------------------------------------------------------------------------------- 1 | #' Summarizing inferences based on cross-validation 2 | #' 3 | #' Summary method for \code{cv.grpreg} or \code{cv.grpsurv} objects 4 | #' 5 | #' 6 | #' @aliases summary.cv.grpreg print.summary.cv.grpreg 7 | #' @param object A \code{"cv.grpreg"} object. 8 | #' @param x A \code{"summary.cv.grpreg"} object. 9 | #' @param digits Number of digits past the decimal point to print out. Can be 10 | #' a vector specifying different display digits for each of the five 11 | #' non-integer printed values. 12 | #' @param \dots Further arguments passed to or from other methods. 13 | #' @return \code{summary(cvfit)} produces an object with S3 class 14 | #' \code{"summary.cv.grpreg"}. The class has its own print method and contains 15 | #' the following list elements: \item{penalty}{The penalty used by 16 | #' \code{grpreg}/\code{grpsurv}.} \item{model}{The type of model: 17 | #' \code{"linear"}, \code{"logistic"}, \code{"Poisson"}, \code{"Cox"}, etc.} 18 | #' \item{n}{Number of observations} \item{p}{Number of regression coefficients 19 | #' (not including the intercept).} \item{min}{The index of \code{lambda} with 20 | #' the smallest cross-validation error.} \item{lambda}{The sequence of 21 | #' \code{lambda} values used by \code{cv.grpreg}/\code{cv.grpsurv}.} 22 | #' \item{cve}{Cross-validation error (deviance).} \item{r.squared}{Proportion 23 | #' of variance explained by the model, as estimated by cross-validation.} 24 | #' \item{snr}{Signal to noise ratio, as estimated by cross-validation.} 25 | #' \item{sigma}{For linear regression models, the scale parameter estimate.} 26 | #' \item{pe}{For logistic regression models, the prediction error 27 | #' (misclassification error).} 28 | #' @author Patrick Breheny 29 | #' @seealso \code{\link{grpreg}}, \code{\link{cv.grpreg}}, 30 | #' \code{\link{cv.grpsurv}}, \code{\link{plot.cv.grpreg}} 31 | #' @examples 32 | #' 33 | #' # Birthweight data 34 | #' data(Birthwt) 35 | #' X <- Birthwt$X 36 | #' group <- Birthwt$group 37 | #' 38 | #' # Linear regression 39 | #' y <- Birthwt$bwt 40 | #' cvfit <- cv.grpreg(X, y, group) 41 | #' summary(cvfit) 42 | #' 43 | #' # Logistic regression 44 | #' y <- Birthwt$low 45 | #' cvfit <- cv.grpreg(X, y, group, family="binomial") 46 | #' summary(cvfit) 47 | #' 48 | #' # Cox regression 49 | #' data(Lung) 50 | #' cvfit <- with(Lung, cv.grpsurv(X, y, group)) 51 | #' summary(cvfit) 52 | #' @export 53 | 54 | summary.cv.grpreg <- function(object, ...) { 55 | S <- pmax(object$null.dev - object$cve, 0) 56 | if (!inherits(object, 'cv.grpsurv') && object$fit$family=="gaussian") { 57 | rsq <- pmin(pmax(1 - object$cve/object$null.dev, 0), 1) 58 | } else { 59 | rsq <- pmin(pmax(1 - exp(object$cve-object$null.dev), 0), 1) 60 | } 61 | snr <- S/object$cve 62 | nvars <- predict(object$fit, type="nvars") 63 | ngroups <- predict(object$fit, type="ngroups") 64 | if (inherits(object, 'cv.grpsurv')) { 65 | model <- 'Cox' 66 | } else { 67 | model <- switch(object$fit$family, gaussian="linear", binomial="logistic", poisson="Poisson") 68 | } 69 | d <- dim(object$fit$beta) 70 | if (length(d)==3) { 71 | p <- d[2] - 1 72 | } else { 73 | if (model == 'Cox') { 74 | p <- d[1] 75 | } else { 76 | p <- d[1] - 1 77 | } 78 | } 79 | val <- list(penalty=object$fit$penalty, 80 | model=model, 81 | n=object$fit$n, 82 | p=p, 83 | min=object$min, 84 | lambda=object$lambda, 85 | cve=object$cve, 86 | r.squared=rsq, 87 | snr=snr, 88 | nvars=nvars, 89 | ngroups=ngroups, 90 | d=d) 91 | if (!inherits(object, 'cv.grpsurv') && object$fit$family=="gaussian") val$sigma <- sqrt(object$cve) 92 | if (!inherits(object, 'cv.grpsurv') && object$fit$family=="binomial") val$pe <- object$pe 93 | structure(val, class="summary.cv.grpreg") 94 | } 95 | 96 | #' @rdname summary.cv.grpreg 97 | #' @export 98 | 99 | print.summary.cv.grpreg <- function(x, digits, ...) { 100 | digits <- if (missing(digits)) digits <- c(2, 4, 2, 2, 3) else rep(digits, length.out=5) 101 | if (length(x$d)==3) { 102 | cat(x$penalty, "-penalized multivariate ", x$model, " regression with m=", x$d[1], ", n=", x$n/x$d[1], ", p=", x$p, "\n", sep="") 103 | } else { 104 | cat(x$penalty, "-penalized ", x$model, " regression with n=", x$n, ", p=", x$p, "\n", sep="") 105 | } 106 | cat("At minimum cross-validation error (lambda=", formatC(x$lambda[x$min], digits[2], format="f"), "):\n", sep="") 107 | cat("-------------------------------------------------\n") 108 | cat(" Nonzero coefficients: ", x$nvars[x$min], "\n", sep="") 109 | cat(" Nonzero groups: ", x$ngroups[x$min], "\n", sep="") 110 | cat(" Cross-validation error of ", formatC(min(x$cve), digits[1], format="f"), "\n", sep="") 111 | cat(" Maximum R-squared: ", formatC(max(x$r.squared), digits[3], format="f"), "\n", sep="") 112 | cat(" Maximum signal-to-noise ratio: ", formatC(max(x$snr), digits[4], format="f"), "\n", sep="") 113 | if (x$model == "logistic") cat(" Prediction error at lambda.min: ", formatC(x$pe[x$min], digits[5], format="f"), "\n", sep="") 114 | if (x$model == "linear") cat(" Scale estimate (sigma) at lambda.min: ", formatC(sqrt(x$cve[x$min]), digits[5], format="f"), "\n", sep="") 115 | } 116 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | [![GitHub version](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/pbreheny/grpreg/master/.version.json&style=flat&logo=github)](https://github.com/pbreheny/grpreg) 3 | [![CRAN version](https://img.shields.io/cran/v/grpreg?logo=R)](https://cran.r-project.org/package=grpreg) 4 | [![downloads](https://cranlogs.r-pkg.org/badges/grpreg)](https://cran.r-project.org/package=grpreg) 5 | [![R-CMD-check](https://github.com/pbreheny/grpreg/workflows/R-CMD-check/badge.svg)](https://github.com/pbreheny/grpreg/actions) 6 | [![codecov.io](https://codecov.io/github/pbreheny/grpreg/coverage.svg?branch=master)](https://app.codecov.io/gh/pbreheny/grpreg) 7 | 8 | 9 | # [Regularization Paths for Regression Models with Grouped Covariates](https://pbreheny.github.io/grpreg/) 10 | 11 | `grpreg` is an R package for fitting the regularization path of linear regression, GLM, and Cox regression models with grouped penalties. This includes group selection methods such as group lasso, group MCP, and group SCAD as well as bi-level selection methods such as the group exponential lasso, the composite MCP, and the group bridge. Utilities for carrying out cross-validation as well as post-fitting visualization, summarization, and prediction are also provided. 12 | 13 | ### Install 14 | 15 | * To install the latest release version from CRAN: 16 | 17 | ``` r 18 | install.packages("grpreg") 19 | ``` 20 | 21 | * To install the latest development version from GitHub: 22 | 23 | ``` r 24 | remotes::install_github("pbreheny/grpreg") 25 | ``` 26 | 27 | ### Get started 28 | 29 | See the ["getting started" vignette](https://pbreheny.github.io/grpreg/articles/grpreg.html) 30 | 31 | ### Learn more 32 | 33 | Follow the links under "Articles" at the [grpreg website](https://pbreheny.github.io/grpreg/) 34 | 35 | ### References 36 | 37 | For more on the mathematical foundations and algorithmic details, see: 38 | 39 | * [Breheny, P. and Huang, J. (2009) Penalized methods for bi-level variable selection. *Statistics and its interface*, **2**: 369-380.](https://myweb.uiowa.edu/pbreheny/pdf/Breheny2009.pdf) 40 | * [Breheny, P. and Huang, J. (2015) Group descent algorithms for nonconvex penalized linear and logistic regression models with grouped predictors. *Statistics and Computing*, **25**: 173-187.](https://dx.doi.org/10.1007/s11222-013-9424-2) 41 | -------------------------------------------------------------------------------- /data/Birthwt.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbreheny/grpreg/287121410d7f196af1c078338fd437b773a34b0e/data/Birthwt.RData -------------------------------------------------------------------------------- /data/Lung.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbreheny/grpreg/287121410d7f196af1c078338fd437b773a34b0e/data/Lung.RData -------------------------------------------------------------------------------- /data/birthwt.grpreg.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pbreheny/grpreg/287121410d7f196af1c078338fd437b773a34b0e/data/birthwt.grpreg.RData -------------------------------------------------------------------------------- /grpreg.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | ProjectId: e2a34794-455e-4924-80b5-840c61557bd9 3 | 4 | RestoreWorkspace: No 5 | SaveWorkspace: No 6 | AlwaysSaveHistory: No 7 | 8 | EnableCodeIndexing: Yes 9 | UseSpacesForTab: Yes 10 | NumSpacesForTab: 2 11 | Encoding: UTF-8 12 | 13 | RnwWeave: knitr 14 | LaTeX: pdfLaTeX 15 | 16 | BuildType: Package 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | PackageRoxygenize: rd,collate,namespace 19 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("If you use any of the L2-norm penalties (penalty='grLasso', 'grMCP', or 'grSCAD'), cite the Statistics and Computing article. If you use the group exponential lasso (penalty='gel'), cite the Biometrics article. If you use the composite MCP (penalty='cMCP') or group bridge penalties, cite the Statistics and its Interface (2009) article; note that the penalty is referred to as the 'group MCP' in the original article, but it would be better to refer to it as 'composite MCP' in future works.") 2 | 3 | bibentry( 4 | bibtype = "Article", 5 | author = c( 6 | person("Patrick", "Breheny"), 7 | person("Jian", "Huang") 8 | ), 9 | title = "Group descent algorithms for nonconvex penalized linear and logistic regression models with grouped predictors", 10 | journal = "Statistics and Computing", 11 | year = 2015, 12 | volume = 25, 13 | number = 2, 14 | pages = "173--187", 15 | doi = "10.1007/s11222-013-9424-2", 16 | url = "https://dx.doi.org/10.1007/s11222-013-9424-2") 17 | 18 | bibentry( 19 | bibtype = "Article", 20 | author = person("Patrick", "Breheny"), 21 | title = "The group exponential lasso for bi-level variable selection", 22 | journal = "Biometrics", 23 | year = 2015, 24 | volume = 71, 25 | number = 3, 26 | pages = "731--740", 27 | doi = "10.1111/biom.12300", 28 | url = "https://dx.doi.org/10.1111/biom.12300") 29 | 30 | bibentry( 31 | bibtype = "Article", 32 | author = c( 33 | person("Patrick", "Breheny"), 34 | person("Jian", "Huang") 35 | ), 36 | title = "Penalized methods for bi-level variable selection", 37 | journal = "Statistics and Its Interface", 38 | year = 2009, 39 | volume = 2, 40 | pages = "369--380", 41 | doi = "10.4310/sii.2009.v2.n3.a10") 42 | 43 | -------------------------------------------------------------------------------- /inst/tinytest/agreement.r: -------------------------------------------------------------------------------- 1 | suppressPackageStartupMessages(library(glmnet)) 2 | 3 | # gel reproduces lasso 4 | n <- 100 5 | group <- rep(1, 10) 6 | p <- length(group) 7 | X <- matrix(rnorm(n * p), ncol = p) 8 | y <- rnorm(n) 9 | yy <- rnorm(n) > 0 10 | gel <- coef(fit <- grpreg(X, y, group, penalty = "gel", tau = 0)) 11 | par(mfrow = c(2, 2)) 12 | plot(fit, log = TRUE) 13 | lasso <- as.matrix(coef(fit <- glmnet(X, y, lambda = fit$lambda))) 14 | plot(fit, "lambda") 15 | expect_equivalent(gel, lasso, tolerance = .01) 16 | gel <- coef(fit <- grpreg(X, yy, group, penalty = "gel", family = "binomial", tau = 0)) 17 | plot(fit, log = TRUE) 18 | lasso <- as.matrix(coef(fit <- glmnet(X, yy, family = "binomial", lambda = fit$lambda))) 19 | plot(fit, "lambda") 20 | expect_equivalent(gel, lasso, tolerance = .01) 21 | gel <- coef(fit <- grpreg(X, yy, group, penalty = "gel", family = "poisson", tau = 0)) 22 | plot(fit, log = TRUE) 23 | lasso <- as.matrix(coef(fit <- glmnet(X, yy, family = "poisson", lambda = fit$lambda))) 24 | plot(fit, "lambda") 25 | expect_equivalent(gel, lasso, tolerance = .01) 26 | 27 | # grLasso reproduces lasso 28 | n <- 50 29 | group <- 1:10 30 | p <- length(group) 31 | X <- matrix(rnorm(n * p), ncol = p) 32 | y <- rnorm(n) 33 | yy <- runif(n) > .5 34 | 35 | grLasso <- coef(fit <- grpreg(X, y, group, penalty = "grLasso")) 36 | par(mfrow = c(3, 2)) 37 | plot(fit, log = TRUE) 38 | fit1 <- fit 39 | lasso <- as.matrix(coef(fit <- glmnet(X, y, lambda = fit$lambda))) 40 | plot(fit, "lambda") 41 | fit2 <- fit 42 | expect_equivalent(grLasso, lasso, tolerance = .01) 43 | 44 | grLasso <- coef(fit <- grpreg(X, yy, group, penalty = "grLasso", family = "binomial")) 45 | plot(fit, log = TRUE) 46 | lasso <- as.matrix(coef(fit <- glmnet(X, yy, family = "binomial", lambda = fit$lambda))) 47 | plot(fit, "lambda") 48 | expect_equivalent(grLasso, lasso, tolerance = .01) 49 | grLasso <- coef(fit <- grpreg(X, yy, group, penalty = "grLasso", family = "poisson")) 50 | plot(fit, log = TRUE) 51 | lasso <- as.matrix(coef(fit <- glmnet(X, yy, family = "poisson", lambda = fit$lambda))) 52 | plot(fit, "lambda") 53 | expect_equivalent(grLasso, lasso, tolerance = .01) 54 | 55 | 56 | # grMCP and grSCAD reproduce MCP and SCAD lasso 57 | n <- 50 58 | group <- 1:10 59 | p <- length(group) 60 | X <- matrix(rnorm(n * p), ncol = p) 61 | y <- rnorm(n) 62 | grMCP <- coef(fit <- grpreg(X, y, group, penalty = "grMCP", gamma = 3)) 63 | par(mfrow = c(2, 2)) 64 | plot(fit) 65 | mcp <- coef(fit <- ncvreg::ncvreg(X, y, lambda = fit$lambda, penalty = "MCP", gamma = 3)) 66 | plot(fit) 67 | expect_equivalent(grMCP, mcp, tolerance = .01) 68 | grSCAD <- coef(fit <- grpreg(X, y, group, penalty = "grSCAD", gamma = 4)) 69 | plot(fit) 70 | scad <- coef(fit <- ncvreg::ncvreg(X, y, lambda = fit$lambda, penalty = "SCAD", gamma = 4)) 71 | plot(fit) 72 | expect_equivalent(grSCAD, scad, tolerance = .01) 73 | -------------------------------------------------------------------------------- /inst/tinytest/auc.r: -------------------------------------------------------------------------------- 1 | if (interactive()) library(tinytest) 2 | library(survival, quietly = TRUE) 3 | 4 | # The quantities here are not exactly the same thing (one is in-sample, the 5 | # other is out-of-sample), but they should be close 6 | n <- 500 7 | X <- matrix(rnorm(n * 10), n, 10) 8 | y <- Surv(rexp(n, rate = exp(X[, 1])), rbinom(n, 1, prob = 0.8)) 9 | cvfit <- cv.grpsurv(X, y, lambda.min = 0, returnY = TRUE) 10 | a <- AUC(cvfit) 11 | a[length(a)] 12 | fit <- coxph(y ~ X) 13 | b <- concordancefit(y, -predict(fit))$concordance 14 | expect_equivalent(a[length(a)], b, tol = 0.02) 15 | -------------------------------------------------------------------------------- /inst/tinytest/binomial.r: -------------------------------------------------------------------------------- 1 | # grpreg reproduces simple logistic regression 2 | n <- 20 3 | p <- 1 4 | X <- matrix(rnorm(n * p), ncol = p) 5 | y <- runif(n) > .5 6 | group <- 1 7 | reg <- glm(y ~ X, family = "binomial")$coef 8 | nlam <- 100 9 | par(mfcol = c(3, 2)) 10 | gel <- coef( 11 | fit <- grpreg( 12 | X, 13 | y, 14 | group, 15 | penalty = "gel", 16 | nlambda = nlam, 17 | lambda.min = 0, 18 | family = "binomial", 19 | eps = 1e-10 20 | ) 21 | )[, nlam] 22 | plot(fit, main = fit$penalty) 23 | expect_equivalent(gel, reg, tolerance = 1e-7) 24 | cMCP <- coef( 25 | fit <- grpreg( 26 | X, 27 | y, 28 | group, 29 | penalty = "cMCP", 30 | nlambda = nlam, 31 | lambda.min = 0, 32 | family = "binomial", 33 | gamma = 9, 34 | eps = 1e-10 35 | ) 36 | )[, nlam] 37 | plot(fit, main = fit$penalty) 38 | expect_equivalent(cMCP, reg, tolerance = 1e-7) 39 | bridge <- coef( 40 | fit <- gBridge(X, y, group, lambda.min = 0, nlambda = nlam, family = "binomial", eps = 1e-10) 41 | )[, 1] 42 | plot(fit, main = fit$penalty) 43 | expect_equivalent(bridge, reg, tolerance = 1e-7) 44 | grLasso <- coef( 45 | fit <- grpreg( 46 | X, 47 | y, 48 | group, 49 | penalty = "grLasso", 50 | nlambda = nlam, 51 | lambda.min = 0, 52 | family = "binomial", 53 | eps = 1e-10 54 | ) 55 | )[, nlam] 56 | plot(fit, main = fit$penalty) 57 | expect_equivalent(grLasso, reg, tolerance = 1e-7) 58 | grMCP <- coef( 59 | fit <- grpreg( 60 | X, 61 | y, 62 | group, 63 | penalty = "grMCP", 64 | nlambda = nlam, 65 | lambda.min = 0, 66 | family = "binomial", 67 | eps = 1e-10 68 | ) 69 | )[, nlam] 70 | plot(fit, main = fit$penalty) 71 | expect_equivalent(grMCP, reg, tolerance = 1e-7) 72 | grSCAD <- coef( 73 | fit <- grpreg( 74 | X, 75 | y, 76 | group, 77 | penalty = "grSCAD", 78 | nlambda = nlam, 79 | lambda.min = 0, 80 | family = "binomial", 81 | eps = 1e-10 82 | ) 83 | )[, nlam] 84 | plot(fit, main = fit$penalty) 85 | expect_equivalent(grSCAD, reg, tolerance = 1e-7) 86 | 87 | # grpreg() reproduces logistic regression 88 | n <- 100 89 | group <- rep(0:3, 1:4) 90 | p <- length(group) 91 | X <- matrix(rnorm(n * p), ncol = p) 92 | y <- runif(n) > .5 93 | fit.mle <- glm(y ~ X, family = "binomial") 94 | reg <- coef(fit.mle) 95 | par(mfcol = c(3, 2)) 96 | gel <- coef( 97 | fit <- grpreg(X, y, group, penalty = "gel", family = "binomial", eps = 1e-10, lambda.min = 0) 98 | )[, 100] 99 | plot(fit, main = fit$penalty) 100 | expect_equivalent(gel, reg, tol = 1e-6) 101 | cMCP <- coef( 102 | fit <- grpreg(X, y, group, penalty = "cMCP", family = "binomial", eps = 1e-10, lambda.min = 0) 103 | )[, 100] 104 | plot(fit, main = fit$penalty) 105 | expect_equivalent(cMCP, reg, tol = 1e-6) 106 | bridge <- coef(fit <- gBridge(X, y, group, family = "binomial", eps = 1e-10, lambda.min = 0))[, 1] 107 | plot(fit, main = fit$penalty) 108 | expect_equivalent(bridge, reg, tol = 1e-6) 109 | grLasso <- coef( 110 | fit <- grpreg(X, y, group, penalty = "grLasso", family = "binomial", eps = 1e-10, lambda.min = 0) 111 | )[, 100] 112 | plot(fit, main = fit$penalty) 113 | expect_equivalent(grLasso, reg, tol = 1e-6) 114 | grMCP <- coef( 115 | fit <- grpreg( 116 | X, 117 | y, 118 | group, 119 | penalty = "grMCP", 120 | family = "binomial", 121 | gamma = 2, 122 | eps = 1e-10, 123 | lambda.min = 0 124 | ) 125 | )[, 100] 126 | plot(fit, main = fit$penalty) 127 | expect_equivalent(grMCP, reg, tol = 1e-6) 128 | grSCAD <- coef( 129 | fit <- grpreg( 130 | X, 131 | y, 132 | group, 133 | penalty = "grSCAD", 134 | family = "binomial", 135 | gamma = 2.1, 136 | eps = 1e-10, 137 | lambda.min = 0 138 | ) 139 | )[, 100] 140 | plot(fit, main = fit$penalty) 141 | expect_equivalent(grSCAD, reg, tol = 1e-6) 142 | expect_equivalent(predict(fit, X)[, 100], predict(fit.mle), tol = 1e-6) 143 | expect_equivalent( 144 | predict(fit, X, type = "response")[, 100], 145 | predict(fit.mle, type = "response"), 146 | tol = 1e-6 147 | ) 148 | -------------------------------------------------------------------------------- /inst/tinytest/coerce.r: -------------------------------------------------------------------------------- 1 | # expect_equalcoersion of y 2 | char_y <- matrix(LETTERS[1:2], 4, 3) 3 | logi_y <- matrix(rep(c(TRUE, FALSE), each = 6), 4, 3) 4 | int_y <- matrix(1:12, 4, 3) 5 | num_y <- matrix(1.0 * 1:12, 4, 3) 6 | tools::assertError(grpreg:::newY(char_y, "gaussian")) 7 | tools::assertError(grpreg:::newY(num_y, "binomial")) 8 | tools::assertError(grpreg:::newY(int_y, "binomial")) 9 | grpreg:::newY(logi_y, "binomial") 10 | suppressMessages(grpreg:::newY(char_y, "binomial")) 11 | grpreg:::newY(num_y, "gaussian") 12 | grpreg:::newY(int_y, "gaussian") 13 | suppressMessages(grpreg:::newY(char_y[, 1], "binomial")) 14 | 15 | # expect_equalcoersion of X, y 16 | data(Birthwt, package = "grpreg") 17 | X <- data.frame(Birthwt$X) 18 | y <- factor(Birthwt$low, labels = c("No", "Yes")) 19 | suppressMessages(fit <- grpreg(X, y, family = "binomial")) 20 | 21 | # expect_equalcoersion of group 22 | y <- Birthwt$low 23 | g1 <- Birthwt$group 24 | g2 <- as.numeric(factor(g1)) 25 | g3 <- as.numeric(factor(g1, levels = sort(levels(g1)))) ## Tests reordering 26 | fit1 <- grpreg(X, y, group = g1, family = "binomial") 27 | fit2 <- grpreg(X, y, group = g2, family = "binomial") 28 | fit3 <- grpreg(X, y, group = g3, family = "binomial") 29 | expect_equal(coef(fit1, which = 50), coef(fit2, which = 50), tol = 0.001) 30 | expect_equal(coef(fit2, which = 50), coef(fit3, which = 50), tol = 0.001) 31 | expect_equal(coef(fit1, which = 50), coef(fit3, which = 50), tol = 0.001) 32 | -------------------------------------------------------------------------------- /inst/tinytest/cv-grpsurv.r: -------------------------------------------------------------------------------- 1 | n <- 50 2 | group <- rep(0:3, 4:1) 3 | p <- length(group) 4 | X <- matrix(rnorm(n * p), ncol = p) 5 | y <- cbind(rexp(n, exp(X[, 5] + X[, 7])), rep(0:1, c(10, n - 10))) 6 | cvfit <- cv.grpsurv(X, y) 7 | cvfit <- cv.grpsurv(X, y, group, penalty = "grLasso") 8 | cvfit <- cv.grpsurv(X, y, group, penalty = "gel") 9 | cvfit <- cv.grpsurv(X, y, group, penalty = "grLasso", nfolds = 50) 10 | cvfit <- cv.grpsurv(X, y, group, penalty = "gel", nfolds = 50) 11 | cvfit <- cv.grpsurv(X, y, group, penalty = "grLasso", se = "bootstrap") 12 | cvfit <- cv.grpsurv(X, y, group, penalty = "gel", se = "bootstrap") 13 | cvfit <- cv.grpsurv(X, y, group, penalty = "grLasso", se = "bootstrap") 14 | 15 | cvfit <- cv.grpsurv(X, y, group, penalty = "grLasso") 16 | op <- par(mfrow = c(2, 2)) 17 | plot(cvfit) 18 | plot(cvfit, type = "rsq") 19 | plot(cvfit, type = "snr") 20 | par(op) 21 | summary(cvfit) 22 | -------------------------------------------------------------------------------- /inst/tinytest/cv.r: -------------------------------------------------------------------------------- 1 | if (interactive()) library(tinytest) 2 | 3 | # Gaussian 4 | n <- 50 5 | group <- rep(0:4, 5:1) 6 | p <- length(group) 7 | X <- matrix(rnorm(n * p), ncol = p) 8 | y <- rnorm(n) 9 | cvfit <- cv.grpreg(X, y, group, penalty = "grLasso") 10 | cvfit <- cv.grpreg(X, y, group, penalty = "gel") 11 | cvfit <- cv.grpreg(X, y, group, penalty = "grLasso", fold = 1:50) 12 | cvfit <- cv.grpreg(X, y, group, penalty = "gel", fold = 1:50) 13 | plot(cvfit, type = "all") 14 | 15 | # Binomial 16 | n <- 50 17 | group <- rep(0:3, 4:1) 18 | p <- length(group) 19 | X <- matrix(rnorm(n * p), ncol = p) 20 | y <- runif(n) > 0.5 21 | cvfit <- cv.grpreg(X, y, group, family = "binomial", penalty = "grLasso") 22 | cvfit <- cv.grpreg(X, y, group, family = "binomial", penalty = "gel") 23 | cvfit <- cv.grpreg(X, y, group, family = "binomial", penalty = "grLasso", fold = 1:50) 24 | cvfit <- cv.grpreg(X, y, group, family = "binomial", penalty = "gel", fold = 1:50) 25 | plot(cvfit, type = "all") 26 | 27 | # Poisson 28 | n <- 50 29 | group <- rep(0:3, 4:1) 30 | p <- length(group) 31 | X <- matrix(rnorm(n * p), ncol = p) 32 | y <- sample(1:n) 33 | cvfit <- cv.grpreg(X, y, group, family = "poisson", penalty = "grLasso") 34 | cvfit <- cv.grpreg(X, y, group, family = "poisson", penalty = "gel") 35 | cvfit <- cv.grpreg(X, y, group, family = "poisson", penalty = "grLasso", fold = 1:50) 36 | cvfit <- cv.grpreg(X, y, group, family = "poisson", penalty = "gel", fold = 1:50) 37 | plot(cvfit, type = "all") 38 | summary(cvfit) 39 | 40 | # Multitask learning 41 | n <- 50 42 | p <- 10 43 | m <- 4 44 | X <- matrix(rnorm(n * p), ncol = p) 45 | Y <- matrix(rnorm(n * m), ncol = m) 46 | cvfit <- cv.grpreg(X, Y) 47 | cvfit <- cv.grpreg(X, Y, nfolds = 50) 48 | Y <- matrix(rnorm(n * m), ncol = m) > 0 49 | cvfit <- cv.grpreg(X, Y, family = "binomial") 50 | cvfit <- cv.grpreg(X, Y, family = "binomial", nfolds = 50) 51 | plot(cvfit, type = "all") 52 | 53 | # p > n 54 | n <- 75 55 | p <- 200 56 | X <- matrix(rnorm(n * p), n, p) 57 | mu <- exp(apply(X[, 1:10], 1, sum) * 0.5) 58 | y <- rpois(n, mu) 59 | g <- rep(LETTERS[1:20], each = 10) 60 | cvfit <- cv.grpreg(X, y, group = g) 61 | plot(cvfit, type = "all") 62 | cvfit <- cv.grpreg(X, y > 0, group = g, family = "binomial") 63 | plot(cvfit, type = "all") 64 | cvfit <- cv.grpreg(X, y, group = g, family = "poisson") 65 | plot(cvfit, type = "all") 66 | 67 | # summary 68 | set.seed(4) 69 | n <- 75 70 | p <- 200 71 | X <- matrix(rnorm(n * p), n, p) 72 | y <- rpois(n, 1) 73 | g <- rep(LETTERS[1:20], each = 10) 74 | cvfit <- cv.grpreg(X, y, group = g) 75 | s <- summary(cvfit) 76 | expect_equivalent(s$ngroups[1], 0) 77 | 78 | # predict 79 | expect_equivalent( 80 | predict(cvfit, X[1:3, ]), 81 | predict(cvfit$fit, X[1:3, ], lambda = cvfit$lambda.min) 82 | ) 83 | 84 | 85 | # R squared --------------------------------------------------------------- 86 | 87 | # Gaussian 88 | n <- 5000 89 | group <- rep(0:4, 5:1) 90 | p <- length(group) 91 | X <- matrix(rnorm(n * p), ncol = p) 92 | y <- rnorm(n, mean = X[, 6]) 93 | cvfit <- cv.grpreg(X, y, group, penalty = "grLasso", lambda.min = 0) 94 | summary(cvfit, lambda = 0)$r.squared 95 | summary(lm(y ~ ., as.data.frame(X)))$r.squared 96 | 97 | 98 | # Return Y ---------------------------------------------------------------- 99 | 100 | n <- 50 101 | group <- rep(0:4, 5:1) 102 | p <- length(group) 103 | X <- matrix(rnorm(n * p), ncol = p) 104 | y <- rnorm(n) 105 | cvfit <- cv.grpreg(X, y, group, returnY = TRUE) 106 | expect_equivalent( 107 | cvfit$cve, 108 | apply((cvfit$Y - y)^2, 2, mean) 109 | ) 110 | -------------------------------------------------------------------------------- /inst/tinytest/gBridge.r: -------------------------------------------------------------------------------- 1 | # gBridge reproduces linear regression 2 | n <- 50 3 | p <- 10 4 | X <- matrix(rnorm(n * p), ncol = p) 5 | y <- rnorm(n) 6 | group <- rep(0:3, 4:1) 7 | fit.mle <- lm(y ~ X) 8 | reg <- coef(fit.mle) 9 | fit <- gBridge(X, y, group, lambda.min = 0, eps = 1e-10) 10 | expect_equivalent(coef(fit, which = 1), coef(fit.mle)) 11 | expect_silent(plot(fit)) 12 | 13 | # CV 14 | cvfit <- cv.grpreg(X, y, group, lambda.min = 0, gBridge = TRUE) 15 | plot(cvfit) 16 | summary(cvfit) 17 | 18 | # Should probably have more tests for logistic regression, etc. 19 | -------------------------------------------------------------------------------- /inst/tinytest/gaussian.r: -------------------------------------------------------------------------------- 1 | # Simple linear regression ------------------------------------------------ 2 | 3 | n <- 5 4 | p <- 1 5 | X <- matrix(rnorm(n * p), ncol = p) 6 | y <- rnorm(n) 7 | group <- 1 8 | reg <- lm(y ~ X)$coef 9 | nlam <- 100 10 | par(mfcol = c(3, 2)) 11 | gel <- coef(fit <- grpreg(X, y, group, penalty = "gel", nlambda = nlam, lambda.min = 0))[, nlam] 12 | plot(fit, main = fit$penalty) 13 | expect_equivalent(gel, reg, tolerance = 1e-7) 14 | cMCP <- coef(fit <- grpreg(X, y, group, penalty = "cMCP", nlambda = nlam, lambda.min = 0))[, nlam] 15 | plot(fit, main = fit$penalty) 16 | expect_equivalent(cMCP, reg, tolerance = 1e-7) 17 | bridge <- coef(fit <- gBridge(X, y, group, nlambda = nlam, lambda.min = 0))[, 1] 18 | plot(fit, main = fit$penalty) 19 | expect_equivalent(bridge, reg, tolerance = 1e-7) 20 | grLasso <- coef(fit <- grpreg(X, y, group, penalty = "grLasso", nlambda = nlam, lambda.min = 0))[, 21 | nlam 22 | ] 23 | plot(fit, main = fit$penalty) 24 | expect_equivalent(grLasso, reg, tolerance = 1e-7) 25 | grMCP <- coef(fit <- grpreg(X, y, group, penalty = "grMCP", nlambda = nlam, lambda.min = 0))[, nlam] 26 | plot(fit, main = fit$penalty) 27 | expect_equivalent(grMCP, reg, tolerance = 1e-7) 28 | grSCAD <- coef(fit <- grpreg(X, y, group, penalty = "grSCAD", nlambda = nlam, lambda.min = 0))[, 29 | nlam 30 | ] 31 | plot(fit, main = fit$penalty) 32 | expect_equivalent(grSCAD, reg, tolerance = 1e-7) 33 | 34 | 35 | # Linear regression ------------------------------------------------------- 36 | 37 | n <- 50 38 | p <- 10 39 | X <- matrix(rnorm(n * p), ncol = p) 40 | y <- rnorm(n) 41 | group <- rep(0:3, 4:1) 42 | fit.mle <- lm(y ~ X) 43 | reg <- coef(fit.mle) 44 | nlam <- 100 45 | par(mfcol = c(3, 2)) 46 | gel <- coef( 47 | fit <- grpreg(X, y, group, penalty = "gel", nlambda = nlam, lambda.min = 0, eps = 1e-10) 48 | )[, nlam] 49 | plot(fit, main = fit$penalty) 50 | expect_equivalent(gel, reg, tolerance = 1e-7) 51 | cMCP <- coef(fit <- grpreg(X, y, group, penalty = "cMCP", lambda.min = 0, eps = 1e-10))[, 100] 52 | plot(fit, main = fit$penalty) 53 | expect_equivalent(cMCP, reg, tolerance = 1e-7) 54 | bridge <- coef(fit <- gBridge(X, y, group, lambda.min = 0, eps = 1e-10))[, 1] 55 | plot(fit, main = fit$penalty) 56 | expect_equivalent(bridge, reg, tolerance = 1e-7) 57 | grLasso <- coef(fit <- grpreg(X, y, group, penalty = "grLasso", lambda.min = 0, eps = 1e-10))[, 100] 58 | plot(fit, main = fit$penalty) 59 | expect_equivalent(grLasso, reg, tolerance = 1e-7) 60 | grMCP <- coef(fit <- grpreg(X, y, group, penalty = "grMCP", lambda.min = 0, eps = 1e-10))[, 100] 61 | plot(fit, main = fit$penalty) 62 | expect_equivalent(grMCP, reg, tolerance = 1e-7) 63 | grSCAD <- coef(fit <- grpreg(X, y, group, penalty = "grSCAD", lambda.min = 0, eps = 1e-10))[, 100] 64 | plot(fit, main = fit$penalty) 65 | expect_equivalent(grSCAD, reg, tolerance = 1e-7) 66 | expect_equivalent(predict(fit, X)[, 100], predict(fit.mle), tolerance = 1e-7) 67 | plot(fit, norm = TRUE, label = TRUE) 68 | plot(fit, legend.loc = "topleft") 69 | -------------------------------------------------------------------------------- /inst/tinytest/grpsurv.r: -------------------------------------------------------------------------------- 1 | if (interactive()) library(tinytest) 2 | suppressPackageStartupMessages(library(survival)) 3 | 4 | # Test that grpsurv works when x has 1 column 5 | y <- Surv(rexp(50), sample(rep(0:1, c(10, 40)))) 6 | X <- matrix(rnorm(50 * 1), 50, 1) 7 | g <- 1 8 | fit <- grpsurv(X, y, g, lambda.min = 0) 9 | 10 | # Test that grpsurv works 11 | y <- Surv(rexp(50), sample(rep(0:1, c(10, 40)))) 12 | X <- matrix(rnorm(50 * 6), 50, 6) 13 | g <- rep(1:3, each = 2) 14 | fit <- grpsurv(X, y, g, lambda.min = 0) 15 | 16 | # $ grpsurv equals MLE when lam=0 17 | fit.mle <- coxph(y ~ X) 18 | expect_equivalent(coef(fit)[, 100], coef(fit.mle), tol = 0.01) 19 | fit <- grpsurv(X, y, g, lambda.min = 0, penalty = "grSCAD") 20 | expect_equivalent(coef(fit)[, 100], coef(fit.mle), tol = 0.01) 21 | fit <- grpsurv(X, y, g, lambda.min = 0, penalty = "grMCP") 22 | expect_equivalent(coef(fit)[, 100], coef(fit.mle), tol = 0.01) 23 | fit <- grpsurv(X, y, g, lambda.min = 0, penalty = "gel") 24 | expect_equivalent(coef(fit)[, 100], coef(fit.mle), tol = 0.01) 25 | fit <- grpsurv(X, y, g, lambda.min = 0, penalty = "cMCP") 26 | expect_equivalent(coef(fit)[, 100], coef(fit.mle), tol = 0.01) 27 | 28 | # grpsurv returns correct logLik 29 | expect_equivalent(logLik(fit)[100], logLik(fit.mle)[1], tol = 0.1) 30 | expect_equivalent(AIC(fit)[100], AIC(fit.mle), tol = 0.1) 31 | 32 | # grpsurv returns correct linear predictors 33 | fit <- grpsurv(X, y, g, lambda.min = 0, penalty = "grLasso", eps = 1e-12) 34 | expect_equivalent(fit$linear.predictors[, 100], fit.mle$linear.predictors[order(y)]) 35 | fit <- grpsurv(X, y, g, lambda.min = 0, penalty = "gel", eps = 1e-12) 36 | expect_equivalent(fit$linear.predictors[, 100], fit.mle$linear.predictors[order(y)]) 37 | 38 | # residuals are correct (slightly different at final observation because baseline hazard estimated differently) 39 | fit <- grpsurv(X, y, g, lambda.min = 0, penalty = "grLasso", eps = 1e-12) 40 | r1 <- residuals(fit, lambda = 0) 41 | r2 <- residuals(fit.mle, type = "deviance") 42 | plot(r1, r2) 43 | abline(0, 1) 44 | expect_equivalent( 45 | residuals(fit, lambda = 0), 46 | residuals(fit.mle, type = "deviance"), 47 | tolerance = 0.1 48 | ) 49 | fit <- grpsurv(X, y, g, lambda.min = 0, penalty = "gel", eps = 1e-12) 50 | expect_equivalent( 51 | residuals(fit, lambda = 0), 52 | residuals(fit.mle, type = "deviance"), 53 | tolerance = 0.1 54 | ) 55 | 56 | # predict works for grpsurv 57 | head(predict(fit, X, "vars")) 58 | head(predict(fit, X, "nvars")) 59 | head(predict(fit, X, "groups")) 60 | head(predict(fit, X, "ngroups")) 61 | head(predict(fit, X, "link")) 62 | head(predict(fit, X, "response")) 63 | head(predict(fit, X, "coef")) 64 | head(predict(fit, X, "median")) 65 | S <- predict(fit, X, "survival", lambda = 0.1) 66 | plot(S) 67 | S <- predict(fit, X[1, ], "survival", lambda = 0.1) 68 | plot(S) 69 | H <- predict(fit, X, "hazard", lambda = 0.1) 70 | plot(H) 71 | H <- predict(fit, X[1, ], "hazard", lambda = 0.1) 72 | plot(H) 73 | 74 | # Survival curves vs survival package 75 | DF <- as.data.frame(X) 76 | fit.mle <- coxph(y ~ ., DF) 77 | S <- predict(fit, X[1, ], "survival", lambda = 0) 78 | plot(S) 79 | lines(survfit(fit.mle, DF[1, ], conf.int = FALSE)) 80 | H <- predict(fit, X[1, ], "hazard", lambda = 0) 81 | plot(H) 82 | lines(survfit(fit.mle, DF[1, ]), conf.int = FALSE, cumhaz = TRUE) 83 | 84 | # Cumulative hazard 85 | h1 <- H(fit$time) 86 | h2 <- survfit(fit.mle, DF[1, ])$cumhaz 87 | plot(h1, h2) 88 | abline(0, 1) 89 | 90 | # penalty factor works for grpsurv 91 | fit <- grpsurv(X, y, g, group.multiplier = c(1:3)) 92 | 93 | # coersion works for grpsurv 94 | fit <- grpsurv(as.data.frame(X), y) 95 | 96 | # loss works for grpsurv 97 | eta <- predict(fit, X, "link", lambda = 0.1) 98 | grpreg:::deviance_grpsurv(y, eta) 99 | grpreg:::deviance_grpsurv(y, eta, total = FALSE) 100 | 101 | # cross-validation works for grpsurv 102 | cvfit <- cv.grpsurv(X, y, g, lambda.min = 0) 103 | plot(cvfit) 104 | summary(cvfit) 105 | -------------------------------------------------------------------------------- /inst/tinytest/multitask.r: -------------------------------------------------------------------------------- 1 | n <- 100 2 | p <- 5 3 | 4 | # multitask learning works 5 | X <- matrix(rnorm(n * p), ncol = p) 6 | Y <- matrix(rnorm(n * 3), ncol = 3) 7 | colnames(X) <- LETTERS[1:p] 8 | fit <- grpreg(X, Y, penalty = "grLasso") 9 | expect_equivalent(dim(fit$beta), c(3, p + 1, 100)) 10 | fit <- grpreg(X, Y, penalty = "cMCP") 11 | expect_equivalent(dim(fit$beta), c(3, p + 1, 100)) 12 | fit <- gBridge(X, Y) 13 | expect_equivalent(dim(fit$beta), c(3, p + 1, 100)) 14 | 15 | # multitask learning works (logistic regression) 16 | fit <- grpreg(X, Y > 0, family = "binomial", penalty = "grLasso", lambda.min = 0.4) 17 | expect_equivalent(dim(fit$beta), c(3, p + 1, 100)) 18 | fit <- grpreg(X, Y > 0, family = "binomial", penalty = "cMCP", lambda.min = 0.4) 19 | expect_equivalent(dim(fit$beta), c(3, p + 1, 100)) 20 | fit <- gBridge(X, Y > 0, family = "binomial", lambda.min = 0.4) 21 | expect_equivalent(dim(fit$beta), c(3, p + 1, 100)) 22 | 23 | # coef/predict work for multitask learning 24 | fit <- grpreg(X, Y) 25 | l <- fit$lambda[20] 26 | coef(fit, which = 1:2) 27 | coef(fit, lambda = l) 28 | predict(fit, lambda = l, type = "nvars") 29 | predict(fit, which = c(30, 60), type = "nvars") 30 | predict(fit, lambda = l, type = "ngroups") 31 | predict(fit, which = c(30, 60), type = "ngroups") 32 | predict(fit, lambda = l, type = "groups") 33 | predict(fit, which = c(30, 60), type = "groups") 34 | predict(fit, lambda = l, type = "norm") 35 | predict(fit, which = c(30, 60), type = "norm") 36 | head(predict(fit, X, lambda = l)) 37 | predict(fit, X, which = c(30, 60))[1:10, , ] 38 | 39 | # coef/predict work for multitask learning (logistic regression) 40 | fit <- grpreg(X, Y > 0, family = "binomial") 41 | l <- fit$lambda[20] 42 | predict(fit, lambda = l, type = "nvars") 43 | predict(fit, lambda = l, type = "ngroups") 44 | predict(fit, lambda = l, type = "groups") 45 | predict(fit, lambda = l, type = "norm") 46 | head(predict(fit, X, lambda = l)) 47 | head(predict(fit, X, lambda = l, type = "response")) 48 | head(predict(fit, X, lambda = l, type = "class")) 49 | 50 | # cross-validation for multitask learning works 51 | cvfit <- cv.grpreg(X, Y) 52 | plot(cvfit) 53 | summary(cvfit) 54 | cvfit <- cv.grpreg(X, Y, penalty = "cMCP") 55 | plot(cvfit) 56 | summary(cvfit) 57 | 58 | # cross-validation for multitask learning works (logistic regression) 59 | cvfit <- cv.grpreg(X, Y > 0, family = "binomial") 60 | plot(cvfit) 61 | summary(cvfit) 62 | cvfit <- cv.grpreg(X, Y > 0, family = "binomial", penalty = "cMCP") 63 | plot(cvfit) 64 | summary(cvfit) 65 | 66 | # multitask learning reproduces linear regression 67 | fit.mle <- lm(Y ~ X) 68 | reg <- coef(fit.mle) 69 | cMCP <- coef(fit <- grpreg(X, Y, penalty = "cMCP", lambda.min = 0), which = 100) 70 | expect_equivalent(t(cMCP), reg, tolerance = 0.01) 71 | p <- predict(fit, X, which = 100) 72 | expect_equivalent(p, predict(fit.mle), tolerance = .01) 73 | bridge <- coef(fit <- gBridge(X, Y, lambda.min = 0), which = 1) 74 | expect_equivalent(t(bridge), reg, tolerance = .01) 75 | p <- predict(fit, X, which = 1) 76 | expect_equivalent(p, predict(fit.mle), tolerance = .01) 77 | grLasso <- coef(fit <- grpreg(X, Y, penalty = "grLasso", lambda.min = 0), which = 100) 78 | expect_equivalent(t(grLasso), reg, tolerance = .01) 79 | p <- predict(fit, X, which = 100) 80 | expect_equivalent(p, predict(fit.mle), tolerance = .01) 81 | grMCP <- coef(fit <- grpreg(X, Y, penalty = "grMCP", lambda.min = 0), which = 100) 82 | expect_equivalent(t(grMCP), reg, tolerance = .01) 83 | p <- predict(fit, X, which = 100) 84 | expect_equivalent(p, predict(fit.mle), tolerance = .01) 85 | grSCAD <- coef(fit <- grpreg(X, Y, penalty = "grSCAD", lambda.min = 0), which = 100) 86 | expect_equivalent(t(grSCAD), reg, tolerance = .01) 87 | p <- predict(fit, X, which = 100) 88 | expect_equivalent(p, predict(fit.mle), tolerance = .01) 89 | 90 | # multitask learning reproduces logistic regression 91 | n <- 50 92 | p <- 2 93 | X <- matrix(rnorm(n * p), ncol = p) 94 | Y <- matrix(rnorm(n * 3), ncol = 3) > 0 95 | fit.mle <- glm(Y[, 3] ~ X, family = binomial) 96 | mle <- coef(fit.mle) 97 | beta <- coef(fit <- grpreg(X, Y, lambda.min = 0, family = "binomial"), which = 100)[3, ] 98 | expect_equivalent(beta, mle, tolerance = .01) 99 | p <- predict(fit, X, which = 100)[, 3] 100 | expect_equivalent(p, predict(fit.mle), tolerance = .01) 101 | p <- predict(fit, X, which = 100, type = "response")[, 3] 102 | expect_equivalent(p, predict(fit.mle, type = "response"), tolerance = .01) 103 | bridge <- coef(fit <- gBridge(X, Y, family = "binomial", lambda.min = 0), which = 1)[3, ] 104 | expect_equivalent(bridge, mle, tolerance = .01) 105 | p <- predict(fit, X, which = 1)[, 3] 106 | expect_equivalent(p, predict(fit.mle), tolerance = .01) 107 | p <- predict(fit, X, which = 1, type = "response")[, 3] 108 | expect_equivalent(p, predict(fit.mle, type = "response"), tolerance = .01) 109 | -------------------------------------------------------------------------------- /inst/tinytest/noiseless.r: -------------------------------------------------------------------------------- 1 | X <- rbind(diag(8), -diag(8)) * sqrt(8) 2 | beta <- c(-1, -1, -1, -1, 2, 2, 2, 2) 3 | y <- as.numeric(X %*% beta) 4 | group <- c(1, 1, 1, 1, 2, 2, 2, 2) 5 | 6 | fit <- grpreg(X, y, group, lambda = c(2, 1.999999, 1, 0)) 7 | expect_equal(predict(fit, type = "nvars", which = 2), 4) 8 | expect_equivalent(coef(fit, lambda = 0), c(0, beta)) 9 | expect_equivalent(coef(fit, lambda = 1), c(0, 0, 0, 0, 0, 1, 1, 1, 1)) 10 | -------------------------------------------------------------------------------- /inst/tinytest/poisson.r: -------------------------------------------------------------------------------- 1 | # grpreg() reproduces poisson regression 2 | n <- 50 3 | group <- rep(0:3, 1:4) 4 | p <- length(group) 5 | X <- matrix(rnorm(n * p), ncol = p) 6 | y <- sample(1:5, n, replace = TRUE) 7 | fit.mle <- glm(y ~ X, family = "poisson") 8 | reg <- coef(fit.mle) 9 | par(mfcol = c(3, 2)) 10 | gel <- coef( 11 | fit <- grpreg(X, y, group, penalty = "gel", family = "poisson", eps = 1e-10, lambda.min = 0) 12 | )[, 100] 13 | plot(fit, main = fit$penalty) 14 | expect_equivalent(gel, reg, tolerance = 1e-7) 15 | cMCP <- coef( 16 | fit <- grpreg(X, y, group, penalty = "cMCP", family = "poisson", eps = 1e-10, lambda.min = 0) 17 | )[, 100] 18 | plot(fit, main = fit$penalty) 19 | expect_equivalent(cMCP, reg, tolerance = 1e-7) 20 | bridge <- coef(fit <- gBridge(X, y, group, family = "poisson", eps = 1e-10, lambda.min = 0))[, 1] 21 | plot(fit, main = fit$penalty) 22 | expect_equivalent(bridge, reg, tolerance = 1e-7) 23 | grLasso <- coef( 24 | fit <- grpreg(X, y, group, penalty = "grLasso", family = "poisson", eps = 1e-10, lambda.min = 0) 25 | )[, 100] 26 | plot(fit, main = fit$penalty) 27 | expect_equivalent(grLasso, reg, tolerance = 1e-7) 28 | grMCP <- coef( 29 | fit <- grpreg( 30 | X, 31 | y, 32 | group, 33 | penalty = "grMCP", 34 | family = "poisson", 35 | gamma = 2, 36 | eps = 1e-10, 37 | lambda.min = 0 38 | ) 39 | )[, 100] 40 | plot(fit, main = fit$penalty) 41 | expect_equivalent(grMCP, reg, tolerance = 1e-7) 42 | grSCAD <- coef( 43 | fit <- grpreg( 44 | X, 45 | y, 46 | group, 47 | penalty = "grSCAD", 48 | family = "poisson", 49 | gamma = 2.1, 50 | eps = 1e-10, 51 | lambda.min = 0 52 | ) 53 | )[, 100] 54 | plot(fit, main = fit$penalty) 55 | expect_equivalent(grSCAD, reg, tolerance = 1e-7) 56 | expect_equivalent(predict(fit, X)[, 100], predict(fit.mle), tolerance = 1e-7) 57 | expect_equivalent( 58 | predict(fit, X, type = "response")[, 100], 59 | predict(fit.mle, type = "response"), 60 | tolerance = 1e-7 61 | ) 62 | -------------------------------------------------------------------------------- /inst/tinytest/select.r: -------------------------------------------------------------------------------- 1 | if (interactive()) library(tinytest) 2 | 3 | data(Birthwt) 4 | X <- Birthwt$X 5 | y <- Birthwt$bwt 6 | group <- Birthwt$group 7 | fit <- grpreg(X, y, group, penalty = "grLasso", lambda.min = 0) 8 | lmfit <- lm(y ~ X) 9 | 10 | # BIC 11 | expect_silent(sb <- select(fit)) 12 | expect_equal(sb$IC[100], BIC(lmfit), tolerance = 1e-4) 13 | 14 | # AIC 15 | expect_warning(sa <- select(fit, criterion = "AIC", df.method = "active")) 16 | expect_silent(sa <- select(fit, criterion = "AIC")) 17 | expect_equivalent(sa$IC[100], AIC(lmfit), tolerance = 1e-4) 18 | 19 | # GCV 20 | expect_silent(s <- select(fit, criterion = "GCV", smooth = TRUE)) 21 | 22 | # AICc 23 | expect_silent(s <- select(fit, criterion = "AICc")) 24 | expect_true(all(s$IC >= sa$IC)) 25 | 26 | # EBIC 27 | expect_silent(s <- select(fit, criterion = "EBIC")) 28 | expect_true(all(s$IC >= sb$IC)) 29 | -------------------------------------------------------------------------------- /inst/tinytest/standardization-ortho.r: -------------------------------------------------------------------------------- 1 | n <- 20 2 | p <- 5 3 | l <- 5 4 | group <- c(1, 1, 2, 2, 2) 5 | zgroup <- rep(0:1, 2:3) 6 | 7 | # standardize() standardizes correctly 8 | X <- matrix(rnorm(n * p), ncol = p) 9 | XX <- .Call("standardize", X)[[1]] 10 | expect_equal(apply(XX, 2, mean), rep(0, 5)) 11 | expect_equal(apply(XX, 2, crossprod), rep(20, 5)) 12 | 13 | # unstandardize() unstandardizes correctly 14 | X <- matrix(rnorm(n * p), ncol = p) 15 | std <- .Call("standardize", X) 16 | XX <- std[[1]] 17 | center <- std[[2]] 18 | scale <- std[[3]] 19 | bb <- matrix(rnorm(l * (p + 1)), nrow = p + 1) 20 | b <- grpreg:::unstandardize(bb, list(center = center, scale = scale, nz = which(scale > 1e-10))) 21 | expect_equal(cbind(1, XX) %*% bb, cbind(1, X) %*% b) 22 | 23 | # orthogonalize() orthogonalizes correctly 24 | X <- matrix(rnorm(n * p), ncol = p) 25 | XX <- grpreg:::orthogonalize(X, group) 26 | for (j in 1:group[p]) { 27 | ind <- which(group == j) 28 | expect_equal(crossprod(XX[, ind]) / n, diag(length(ind))) 29 | } 30 | 31 | # unorthogonalize() unorthogonalizes correctly 32 | X <- matrix(rnorm(n * p), ncol = p) 33 | XX <- grpreg:::orthogonalize(X, group) 34 | bb <- matrix(rnorm(l * (p + 1)), nrow = p + 1) 35 | b <- grpreg:::unorthogonalize(bb, XX, attr(XX, "group")) 36 | expect_equal(cbind(1, XX) %*% bb, cbind(1, X) %*% b) 37 | 38 | # unorthogonalize() unorthogonalizes correctly w/o intercept 39 | X <- matrix(rnorm(n * p), ncol = p) 40 | XX <- grpreg:::orthogonalize(X, zgroup) 41 | bb <- matrix(rnorm(l * (p)), nrow = p) 42 | b <- grpreg:::unorthogonalize(bb, XX, attr(XX, "group"), intercept = FALSE) 43 | expect_equal(XX %*% bb, X %*% b) 44 | 45 | # orthogonalize() orthogonalizes correctly w/o full rank 46 | X <- matrix(rnorm(n * p), ncol = p) 47 | X[, 5] <- X[, 4] 48 | XX <- grpreg:::orthogonalize(X, group) 49 | for (j in 1:group[p]) { 50 | ind <- which(attr(XX, "group") == j) 51 | expect_equal(crossprod(XX[, ind]) / n, diag(length(ind))) 52 | } 53 | y <- rnorm(nrow(X)) 54 | fit <- grpreg(X, y, group = LETTERS[group]) 55 | 56 | # orthogonalize() orthogonalizes correctly w/ 0's present and non-full-rank 57 | X <- matrix(rnorm(n * p), ncol = p) 58 | X[, 4] <- 0 59 | X[, 5] <- X[, 3] 60 | grp <- grpreg:::newXG(X, group, rep(1, max(group)), 1, FALSE) 61 | XX <- grp$X 62 | g <- grp$g 63 | for (j in 1:max(g)) { 64 | ind <- which(g == j) 65 | expect_equal(crossprod(XX[, ind]) / n, diag(length(ind))) 66 | } 67 | y <- rnorm(nrow(X)) 68 | fit <- grpreg(X, y, group = LETTERS[group]) 69 | -------------------------------------------------------------------------------- /inst/tinytest/surv-torture.r: -------------------------------------------------------------------------------- 1 | suppressPackageStartupMessages(library(survival)) 2 | 3 | # single lambda 4 | n <- 50 5 | group <- rep(0:3, 4:1) 6 | p <- length(group) 7 | X <- matrix(rnorm(n * p), ncol = p) 8 | y <- Surv(rexp(n), rep(0:1, c(10, n - 10))) 9 | b.mle <- coef(coxph(y ~ X)) 10 | b <- coef(fit <- grpsurv(X, y, group, penalty = "grLasso", lambda = 0, eps = 1e-10)) 11 | expect_equivalent(b, b.mle, tol = 0.0001) 12 | 13 | # constant columns 14 | n <- 50 15 | group <- rep(0:3, 4:1) 16 | p <- length(group) 17 | X <- matrix(rnorm(n * p), ncol = p) 18 | X[, 5] <- 0 19 | y <- Surv(rexp(n), rep(0:1, c(10, n - 10))) 20 | b.mle <- coef(coxph(y ~ X)) 21 | b.mle[is.na(b.mle)] <- 0 22 | b <- coef(fit <- grpsurv(X, y, group, penalty = "grLasso", lambda.min = 0, eps = 1e-7), 0) 23 | expect_equivalent(b, b.mle, tol = 0.0001) 24 | b <- coef(fit <- grpsurv(X, y, group, penalty = "gel", lambda.min = 0, eps = 1e-7), 0) 25 | expect_equivalent(b, b.mle, tol = 0.0001) 26 | cvfit <- cv.grpsurv(X, y, group, penalty = "grLasso") 27 | cvfit <- cv.grpsurv(X, y, group, penalty = "gel") 28 | 29 | # constant groups 30 | n <- 50 31 | group <- rep(0:3, 4:1) 32 | p <- length(group) 33 | X <- matrix(rnorm(n * p), ncol = p) 34 | X[, group == 2] <- 0 35 | y <- Surv(rexp(n), rep(0:1, c(10, n - 10))) 36 | b.mle <- coef(coxph(y ~ X)) 37 | b.mle[is.na(b.mle)] <- 0 38 | b <- coef(fit <- grpsurv(X, y, group, penalty = "grLasso", lambda.min = 0, eps = 1e-7), 0) 39 | expect_equivalent(b, b.mle, tol = 0.0001) 40 | b <- coef(fit <- grpsurv(X, y, group, penalty = "gel", lambda.min = 0, eps = 1e-7), 0) 41 | expect_equivalent(b, b.mle, tol = 0.0001) 42 | cvfit <- cv.grpsurv(X, y, group, penalty = "grLasso") 43 | cvfit <- cv.grpsurv(X, y, group, penalty = "gel") 44 | 45 | # constant groups w/ multiplier specified 46 | n <- 50 47 | group <- rep(0:3, 4:1) 48 | p <- length(group) 49 | X <- matrix(rnorm(n * p), ncol = p) 50 | X[, group == 2] <- 0 51 | y <- Surv(rexp(n), rep(0:1, c(10, n - 10))) 52 | b.mle <- coef(coxph(y ~ X)) 53 | b.mle[is.na(b.mle)] <- 0 54 | b <- coef( 55 | fit <- grpsurv( 56 | X, 57 | y, 58 | group, 59 | penalty = "grLasso", 60 | lambda.min = 0, 61 | eps = 1e-7, 62 | group.multiplier = 1:3 63 | ), 64 | 0 65 | ) 66 | expect_equivalent(b, b.mle, tol = 0.0001) 67 | cvfit <- cv.grpsurv(X, y, group, penalty = "grLasso") 68 | cvfit <- cv.grpsurv(X, y, group, penalty = "gel") 69 | 70 | # grpsurv handles groups of non-full rank 71 | n <- 50 72 | group <- rep(0:3, 4:1) 73 | p <- length(group) 74 | X <- matrix(rnorm(n * p), ncol = p) 75 | X[, 7] <- X[, 6] 76 | y <- Surv(rexp(n), rep(0:1, c(10, n - 10))) 77 | b0 <- coef(coxph(y ~ X)) 78 | b0[6:7] <- b0[6] / 2 79 | b <- coef(fit <- grpsurv(X, y, group, penalty = "grLasso", lambda.min = 0, eps = 1e-7), 0) 80 | expect_equivalent(b, b0, tol = 0.0001) 81 | cvfit <- cv.grpsurv(X, y, group, penalty = "grLasso") 82 | cvfit <- cv.grpsurv(X, y, group, penalty = "gel") 83 | 84 | # out-of-order groups #1 85 | n <- 50 86 | group <- rep(1:2, each = 2) 87 | perm <- sample(1:length(group)) 88 | p <- length(group) 89 | X <- matrix(rnorm(n * p), ncol = p) 90 | y <- Surv(rexp(n), rep(0:1, c(10, n - 10))) 91 | fit1 <- grpsurv(X, y, group, penalty = "grLasso", lambda.min = 0, eps = 1e-7) 92 | fit2 <- grpsurv(X[, perm], y, group[perm], penalty = "grLasso", lambda.min = 0, eps = 1e-7) 93 | b1 <- coef(fit1, 0)[perm] 94 | b2 <- coef(fit2, 0) 95 | expect_equivalent(b1, b2, tol = 0.01) 96 | cvfit <- cv.grpsurv(X[, perm], y, group[perm], penalty = "grLasso") 97 | 98 | # out-of-order groups #2 99 | n <- 50 100 | group <- rep(0:3, 4:1) 101 | perm <- sample(1:length(group)) 102 | p <- length(group) 103 | X <- matrix(rnorm(n * p), ncol = p) 104 | y <- Surv(rexp(n), rep(0:1, c(10, n - 10))) 105 | fit1 <- grpsurv(X, y, group, penalty = "grLasso", lambda.min = 0, eps = 1e-7) 106 | fit2 <- grpsurv(X[, perm], y, group[perm], penalty = "grLasso", lambda.min = 0, eps = 1e-7) 107 | b1 <- coef(fit1, 0)[perm] 108 | b2 <- coef(fit2, 0) 109 | expect_equivalent(b1, b2, tol = 0.01) 110 | cvfit <- cv.grpsurv(X[, perm], y, group[perm], penalty = "grLasso") 111 | 112 | # groups order + rank + constant col + constant grp 113 | n <- 50 114 | group <- rep(0:4, c(2, 2:5)) 115 | perm <- sample(1:length(group)) 116 | p <- length(group) 117 | X <- matrix(rnorm(n * p), ncol = p) 118 | X[, 7] <- X[, 6] # Group 2 not full rank 119 | X[, group == 3] <- 0 # Group 3 constant 120 | X[, 15] <- 0 # Group 4 contains a zero column 121 | y <- Surv(rexp(n), rep(0:1, c(10, n - 10))) 122 | fit1 <- grpsurv(X, y, group, penalty = "grLasso", lambda.min = 0) 123 | fit2 <- grpsurv(X[, perm], y, group[perm], penalty = "grLasso", lambda.min = 0) 124 | b1 <- coef(fit1, 0) 125 | b2 <- coef(fit2, 0) 126 | expect_equivalent(b1[perm], b2, tol = 0.01) # Checking perm ordering 127 | nz <- which(apply(X, 2, sd) != 0) 128 | fit3 <- grpsurv(X[, nz], y, group[nz], penalty = "grLasso", lambda.min = 0) 129 | b3 <- coef(fit3, 0) 130 | expect_equivalent(b1[nz], b3, tol = 0.01) # Checking dropped group/var 131 | expect_equivalent(coef(fit1)["V6", ], coef(fit1)["V7", ], tol = 0.01) # Checking rank handled correctly 132 | cvfit <- cv.grpsurv(X[, perm], y, group[perm], penalty = "grLasso", lambda.min = 0) 133 | plot(cvfit) 134 | summary(cvfit) 135 | plot(cvfit$fit) 136 | -------------------------------------------------------------------------------- /inst/tinytest/torture.r: -------------------------------------------------------------------------------- 1 | # Single lambda 2 | n <- 50 3 | group <- rep(0:3, 4:1) 4 | p <- length(group) 5 | X <- matrix(rnorm(n * p), ncol = p) 6 | y <- rnorm(n) 7 | b.lm <- coef(lm(y ~ X)) 8 | b <- coef(fit <- grpreg(X, y, group, penalty = "grLasso", lambda = 0, eps = 1e-10)) 9 | expect_equivalent(b, b.lm, tol = 0.0001) 10 | 11 | # constant columns 12 | n <- 50 13 | group <- rep(0:3, 4:1) 14 | p <- length(group) 15 | X <- matrix(rnorm(n * p), ncol = p) 16 | X[, 5] <- 0 17 | y <- rnorm(n) 18 | b.lm <- coef(lm(y ~ X)) 19 | b.lm[is.na(b.lm)] <- 0 20 | b <- coef(fit <- grpreg(X, y, group, penalty = "grLasso", lambda.min = 0, eps = 1e-7), 0) 21 | expect_equivalent(b, b.lm, tol = 0.0001) 22 | b <- coef(fit <- grpreg(X, y, group, penalty = "gel", lambda.min = 0, eps = 1e-7), 0) 23 | expect_equivalent(b, b.lm, tol = 0.0001) 24 | cvfit <- cv.grpreg(X, y, group, penalty = "grLasso") 25 | cvfit <- cv.grpreg(X, y, group, penalty = "gel") 26 | 27 | # constant groups 28 | n <- 50 29 | group <- rep(0:3, 4:1) 30 | p <- length(group) 31 | X <- matrix(rnorm(n * p), ncol = p) 32 | X[, group == 2] <- 0 33 | y <- rnorm(n) 34 | b.lm <- coef(lm(y ~ X)) 35 | b.lm[is.na(b.lm)] <- 0 36 | b <- coef(fit <- grpreg(X, y, group, penalty = "grLasso", lambda.min = 0, eps = 1e-7), 0) 37 | expect_equivalent(b, b.lm, tol = 0.0001) 38 | b <- coef(fit <- grpreg(X, y, group, penalty = "gel", lambda.min = 0, eps = 1e-7), 0) 39 | expect_equivalent(b, b.lm, tol = 0.0001) 40 | cvfit <- cv.grpreg(X, y, group, penalty = "grLasso") 41 | cvfit <- cv.grpreg(X, y, group, penalty = "gel") 42 | 43 | # constant groups w/ multiplier specified 44 | n <- 50 45 | group <- rep(0:3, 4:1) 46 | p <- length(group) 47 | X <- matrix(rnorm(n * p), ncol = p) 48 | X[, group == 2] <- 0 49 | y <- rnorm(n) 50 | b.lm <- coef(lm(y ~ X)) 51 | b.lm[is.na(b.lm)] <- 0 52 | b <- coef( 53 | fit <- grpreg( 54 | X, 55 | y, 56 | group, 57 | penalty = "grLasso", 58 | lambda.min = 0, 59 | eps = 1e-7, 60 | group.multiplier = 1:3 61 | ), 62 | 0 63 | ) 64 | expect_equivalent(b, b.lm, tol = 0.0001) 65 | cvfit <- cv.grpreg(X, y, group, penalty = "grLasso") 66 | cvfit <- cv.grpreg(X, y, group, penalty = "gel") 67 | 68 | # grpreg handles groups of non-full rank 69 | n <- 50 70 | group <- rep(0:3, 4:1) 71 | p <- length(group) 72 | X <- matrix(rnorm(n * p), ncol = p) 73 | X[, 7] <- X[, 6] 74 | y <- rnorm(n) 75 | b0 <- coef(lm(y ~ X)) 76 | b0[7:8] <- b0[7] / 2 77 | b <- coef(fit <- grpreg(X, y, group, penalty = "grLasso", lambda.min = 0, eps = 1e-7), 0) 78 | expect_equivalent(b, b0, tol = 0.0001) 79 | cvfit <- cv.grpreg(X, y, group, penalty = "grLasso") 80 | cvfit <- cv.grpreg(X, y, group, penalty = "gel") 81 | 82 | # out-of-order groups #1 83 | n <- 50 84 | group <- rep(1:2, each = 2) 85 | perm <- sample(1:length(group)) 86 | p <- length(group) 87 | X <- matrix(rnorm(n * p), ncol = p) 88 | y <- rnorm(n) 89 | fit1 <- grpreg(X, y, group, penalty = "grLasso", lambda.min = 0, eps = 1e-7) 90 | fit2 <- grpreg(X[, perm], y, group[perm], penalty = "grLasso", lambda.min = 0, eps = 1e-7) 91 | b1 <- coef(fit1, 0)[-1][perm] 92 | b2 <- coef(fit2, 0)[-1] 93 | expect_equivalent(b1, b2, tol = 0.01) 94 | cvfit <- cv.grpreg(X[, perm], y, group[perm], penalty = "grLasso") 95 | 96 | # out-of-order groups #2 97 | n <- 50 98 | group <- rep(0:3, 4:1) 99 | perm <- sample(1:length(group)) 100 | p <- length(group) 101 | X <- matrix(rnorm(n * p), ncol = p) 102 | y <- rnorm(n) 103 | fit1 <- grpreg(X, y, group, penalty = "grLasso", lambda.min = 0, eps = 1e-7) 104 | fit2 <- grpreg(X[, perm], y, group[perm], penalty = "grLasso", lambda.min = 0, eps = 1e-7) 105 | b1 <- coef(fit1, 0)[-1][perm] 106 | b2 <- coef(fit2, 0)[-1] 107 | expect_equivalent(b1, b2, tol = 0.01) 108 | cvfit <- cv.grpreg(X[, perm], y, group[perm], penalty = "grLasso") 109 | 110 | # groups order + rank + constant col + constant grp 111 | n <- 50 112 | group <- rep(0:4, c(2, 2:5)) 113 | perm <- sample(1:length(group)) 114 | p <- length(group) 115 | X <- matrix(rnorm(n * p), ncol = p) 116 | X[, 7] <- X[, 6] # Group 2 not full rank 117 | X[, group == 3] <- 0 # Group 3 constant 118 | X[, 15] <- 0 # Group 4 contains a zero column 119 | y <- rnorm(n) 120 | fit1 <- grpreg(X, y, group, penalty = "grLasso", lambda.min = 0) 121 | fit2 <- grpreg(X[, perm], y, group[perm], penalty = "grLasso", lambda.min = 0) 122 | b1 <- coef(fit1, 0)[-1] 123 | b2 <- coef(fit2, 0)[-1] 124 | expect_equivalent(b1[perm], b2, tol = 0.01) # Checking perm ordering 125 | nz <- which(apply(X, 2, sd) != 0) 126 | fit3 <- grpreg(X[, nz], y, group[nz], penalty = "grLasso", lambda.min = 0) 127 | b3 <- coef(fit3, 0)[-1] 128 | expect_equivalent(b1[nz], b3, tol = 0.01) # Checking dropped group/var 129 | expect_equivalent(coef(fit1)["V6", ], coef(fit1)["V7", ], tol = 0.01) # Checking rank handled correctly 130 | cvfit <- cv.grpreg(X[, perm], y, group[perm], penalty = "grLasso", lambda.min = 0) 131 | plot(cvfit) 132 | summary(cvfit) 133 | plot(cvfit$fit) 134 | -------------------------------------------------------------------------------- /man/AUC.cv.grpsurv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/auc.R 3 | \name{AUC.cv.grpsurv} 4 | \alias{AUC.cv.grpsurv} 5 | \alias{AUC} 6 | \title{Calculates AUC for cv.grpsurv objects} 7 | \usage{ 8 | \method{AUC}{cv.grpsurv}(obj, ...) 9 | 10 | AUC(obj, ...) 11 | } 12 | \arguments{ 13 | \item{obj}{A \code{cv.grpsurv} object. You must run \code{cv.grpsurv()} with the option \code{returnY=TRUE} in order for \code{AUC} to work.} 14 | 15 | \item{\dots}{For S3 method compatibility.} 16 | } 17 | \description{ 18 | Calculates the cross-validated AUC (concordance) from a "cv.grpsurv" object. 19 | } 20 | \details{ 21 | The area under the curve (AUC), or equivalently, the concordance statistic 22 | (C), is calculated according to the procedure described in van Houwelingen 23 | and Putter (2011). The function calls \code{survival::concordancefit()}, except 24 | cross-validated linear predictors are used to guard against overfitting. 25 | Thus, the values returned by \code{AUC.cv.grpsurv()} will be lower than those you 26 | would obtain with \code{concordancefit()} if you fit the full (unpenalized) model. 27 | } 28 | \examples{ 29 | \dontshow{set.seed(1)} 30 | data(Lung) 31 | X <- Lung$X 32 | y <- Lung$y 33 | group <- Lung$group 34 | 35 | cvfit <- cv.grpsurv(X, y, group, returnY=TRUE) 36 | head(AUC(cvfit)) 37 | ll <- log(cvfit$fit$lambda) 38 | plot(ll, AUC(cvfit), xlim=rev(range(ll)), lwd=3, type='l', 39 | xlab=expression(log(lambda)), ylab='AUC', las=1) 40 | } 41 | \references{ 42 | van Houwelingen H, Putter H (2011). \emph{Dynamic Prediction in Clinical Survival Analysis}. CRC Press. 43 | } 44 | \seealso{ 45 | \code{\link[=cv.grpsurv]{cv.grpsurv()}}, \code{\link[survival:survival-deprecated]{survival::survConcordance()}} 46 | } 47 | -------------------------------------------------------------------------------- /man/Birthwt.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{Birthwt} 5 | \alias{Birthwt} 6 | \title{Risk Factors Associated with Low Infant Birth Weight} 7 | \format{ 8 | The \code{Birthwt} object is a list containing four elements (\code{X}, \code{bwt}, \code{low}, and \code{group}): 9 | \describe{ 10 | \item{bwt}{Birth weight in kilograms} 11 | \item{low}{Indicator of birth weight less than 2.5kg} 12 | \item{group}{Vector describing how the columns of X are grouped} 13 | \item{X}{A matrix with 189 observations (rows) and 16 predictor variables (columns).} 14 | } 15 | The matrix \code{X} contains the following columns: 16 | \describe{ 17 | \item{age1,age2,age3}{Orthogonal polynomials of first, second, and third degree representing mother's age in years} 18 | \item{lwt1,lwt2,lwt3}{Orthogonal polynomials of first, second, and third degree representing mother's weight in pounds at last menstrual period} 19 | \item{white,black}{Indicator functions for mother's race; "other" is reference group} 20 | \item{smoke}{Smoking status during pregnancy} 21 | \item{ptl1,ptl2m}{Indicator functions for one or for two or more previous premature labors, respectively. No previous premature labors is the reference category.} 22 | \item{ht}{History of hypertension} 23 | \item{ui}{Presence of uterine irritability} 24 | \item{ftv1,ftv2,ftv3m}{Indicator functions for one, for two, or for three or more physician visits during the first trimester, respectively. No visits is the reference category.} 25 | } 26 | } 27 | \source{ 28 | \url{https://cran.r-project.org/package=MASS} 29 | } 30 | \usage{ 31 | Birthwt 32 | } 33 | \description{ 34 | The \code{Birthwt} data contains 189 observations, 16 predictors, and an 35 | outcome, birthweight, available both as a continuous measure and a binary 36 | indicator for low birth weight.The data were collected at Baystate Medical 37 | Center, Springfield, Mass during 1986. This data frame is a 38 | reparameterization of the \code{birthwt} data frame from the \strong{MASS} package. 39 | } 40 | \examples{ 41 | data(Birthwt) 42 | hist(Birthwt$bwt, xlab="Child's birth weight", main="") 43 | table(Birthwt$low) 44 | ## See examples in ?birthwt (MASS package) 45 | ## for more about the data set 46 | ## See examples in ?grpreg for use of this data set 47 | ## with group penalized regression models 48 | } 49 | \references{ 50 | \itemize{ 51 | \item Venables, W. N. and Ripley, B. D. (2002). \emph{Modern Applied Statistics with S.} Fourth edition. Springer. 52 | \item Hosmer, D.W. and Lemeshow, S. (1989) \emph{Applied Logistic Regression.} New York: Wiley 53 | } 54 | } 55 | \seealso{ 56 | \link[MASS:birthwt]{MASS::birthwt}, \code{\link[=grpreg]{grpreg()}} 57 | } 58 | \keyword{datasets} 59 | -------------------------------------------------------------------------------- /man/Lung.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{Lung} 5 | \alias{Lung} 6 | \title{VA lung cancer data set} 7 | \format{ 8 | A list of two objects: \code{y} and \code{X} 9 | \describe{ 10 | \item{y}{A two column matrix (\code{Surv} object) containing the follow-up 11 | time (in days) and an indicator variable for whether the patient died 12 | while on the study or not.} 13 | \item{X}{A matrix with 137 observations (rows) and 9 predictor variables 14 | (columns). The remainder of this list describes the columns of \code{X}} 15 | \item{trt}{Treatment indicator (1=control group, 2=treatment group)} 16 | \item{karno}{Karnofsky performance score (0=bad, 100=good)} 17 | \item{diagtime}{Time from diagnosis to randomization (months)} 18 | \item{age}{Age (years, at baseline)} 19 | \item{prior}{Prior therapy (0=no, 1=yes)} 20 | \item{squamous}{Indicator for whether the cancer type is squamous cell 21 | carcinoma (0=no, 1=yes)} 22 | \item{small}{Indicator for whether the cancer type is small cell lung 23 | cancer (0=no, 1=yes)} 24 | \item{adeno}{Indicator for whether the cancer type is adenocarcinoma 25 | (0=no, 1=yes)} 26 | \item{large}{Indicator for whether the cancer type is large cell carcinoma 27 | (0=no, 1=yes)} 28 | } 29 | } 30 | \source{ 31 | \url{https://cran.r-project.org/package=survival} 32 | } 33 | \usage{ 34 | Lung 35 | } 36 | \description{ 37 | Data from a randomised trial of two treatment regimens for lung cancer. This 38 | is a standard survival analysis data set from the classic textbook by 39 | Kalbfleisch and Prentice. 40 | } 41 | \references{ 42 | \itemize{ 43 | \item Kalbfleisch D and Prentice RL (1980), \emph{The Statistical Analysis of 44 | Failure Time Data}. Wiley, New York. 45 | } 46 | } 47 | \seealso{ 48 | \code{grpsurv()} 49 | } 50 | \keyword{datasets} 51 | -------------------------------------------------------------------------------- /man/birthwt.grpreg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{birthwt.grpreg} 5 | \alias{birthwt.grpreg} 6 | \title{Risk Factors Associated with Low Infant Birth Weight} 7 | \format{ 8 | This data frame contains the following columns: 9 | \itemize{ 10 | \item\code{low} Indicator of birth weight less than 2.5kg \item\code{bwt} 11 | Birth weight in kilograms \item\code{age1,age2,age3} Orthogonal polynomials 12 | of first, second, and third degree representing mother's age in years 13 | \item\code{lwt1,lwt2,lwt3} Orthogonal polynomials of first, second, and 14 | third degree representing mother's weight in pounds at last menstrual period 15 | \item\code{white,black} Indicator functions for mother's race; "other" is 16 | reference group \item\code{smoke} smoking status during pregnancy 17 | \item\code{ptl1,ptl2m} Indicator functions for one or for two or more 18 | previous premature labors, respectively. No previous premature labors is 19 | the reference category. \item\code{ht} History of hypertension 20 | \item\code{ui} Presence of uterine irritability \item\code{ftv1,ftv2,ftv3m} 21 | Indicator functions for one, for two, or for three or more physician visits 22 | during the first trimester, respectively. No visits is the reference 23 | category. 24 | } 25 | } 26 | \usage{ 27 | birthwt.grpreg 28 | } 29 | \description{ 30 | This version of the data set has been deprecated and will not be supported 31 | in future versions. Please use \code{\link{Birthwt}} instead. 32 | } 33 | \seealso{ 34 | \code{\link{Birthwt}} 35 | } 36 | \keyword{datasets} 37 | -------------------------------------------------------------------------------- /man/cv.grpreg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cv-grpreg.R, R/cv-grpsurv.R 3 | \name{cv.grpreg} 4 | \alias{cv.grpreg} 5 | \alias{cv.grpsurv} 6 | \title{Cross-validation for grpreg/grpsurv} 7 | \usage{ 8 | cv.grpreg( 9 | X, 10 | y, 11 | group = 1:ncol(X), 12 | ..., 13 | nfolds = 10, 14 | seed, 15 | fold, 16 | returnY = FALSE, 17 | trace = FALSE 18 | ) 19 | 20 | cv.grpsurv( 21 | X, 22 | y, 23 | group = 1:ncol(X), 24 | ..., 25 | nfolds = 10, 26 | seed, 27 | fold, 28 | se = c("quick", "bootstrap"), 29 | returnY = FALSE, 30 | trace = FALSE 31 | ) 32 | } 33 | \arguments{ 34 | \item{X}{The design matrix, as in \code{\link[=grpreg]{grpreg()}}/\code{\link[=grpsurv]{grpsurv()}}.} 35 | 36 | \item{y}{The response vector (or matrix), as in \code{\link[=grpreg]{grpreg()}}/\code{\link[=grpsurv]{grpsurv()}}.} 37 | 38 | \item{group}{The grouping vector, as in \code{\link[=grpreg]{grpreg()}}/\code{\link[=grpsurv]{grpsurv()}}.} 39 | 40 | \item{...}{Additional arguments to \code{\link[=grpreg]{grpreg()}}/\code{\link[=grpsurv]{grpsurv()}}.} 41 | 42 | \item{nfolds}{The number of cross-validation folds. Default is 10.} 43 | 44 | \item{seed}{You may set the seed of the random number generator in order to 45 | obtain reproducible results.} 46 | 47 | \item{fold}{Which fold each observation belongs to. By default the 48 | observations are randomly assigned.} 49 | 50 | \item{returnY}{Should cv.grpreg()/cv.grpsurv() return the fitted 51 | values from the cross-validation folds? Default is FALSE; if TRUE, this 52 | will return a matrix in which the element for row i, column j is the fitted 53 | value for observation i from the fold in which observation i was excluded 54 | from the fit, at the jth value of lambda. NOTE: For \code{cv.grpsurv()}, the 55 | rows of \code{Y} are ordered by time on study, and therefore will not 56 | correspond to the original order of observations pased to \code{cv.grpsurv}.} 57 | 58 | \item{trace}{If set to TRUE, cv.grpreg will inform the user of its progress 59 | by announcing the beginning of each CV fold. Default is FALSE.} 60 | 61 | \item{se}{For \code{cv.grpsurv()}, the method by which the cross-valiation 62 | standard error (CVSE) is calculated. The 'quick' approach is based on a 63 | rough approximation, but can be calculated more or less instantly. The 64 | 'bootstrap' approach is more accurate, but requires additional computing 65 | time.} 66 | } 67 | \value{ 68 | An object with S3 class \code{"cv.grpreg"} containing: 69 | \item{cve}{The error for each value of \code{lambda}, averaged across the 70 | cross-validation folds.} \item{cvse}{The estimated standard error associated 71 | with each value of for \code{cve}.} \item{lambda}{The sequence of 72 | regularization parameter values along which the cross-validation error was 73 | calculated.} \item{fit}{The fitted \code{grpreg} object for the whole data.} 74 | \item{fold}{The fold assignments for cross-validation for each observation; 75 | note that for \code{cv.grpsurv}, these are in terms of the ordered 76 | observations, not the original observations.} \item{min}{The index of 77 | \code{lambda} corresponding to \code{lambda.min}.} \item{lambda.min}{The 78 | value of \code{lambda} with the minimum cross-validation error.} 79 | \item{null.dev}{The deviance for the intercept-only model.} 80 | \item{pe}{If \code{family="binomial"}, the cross-validation prediction error for 81 | each value of \code{lambda}.} 82 | } 83 | \description{ 84 | Performs k-fold cross validation for penalized regression models with 85 | grouped covariates over a grid of values for the regularization parameter 86 | lambda. 87 | } 88 | \details{ 89 | The function calls \code{\link[=grpreg]{grpreg()}} or \code{\link[=grpsurv]{grpsurv()}} \code{nfolds} times, each 90 | time leaving out 1/\code{nfolds} of the data. The cross-validation error is 91 | based on the deviance; 92 | \href{https://pbreheny.github.io/grpreg/articles/models.html}{see here for more details}. 93 | 94 | For Gaussian and Poisson responses, the folds are chosen according to simple 95 | random sampling. For binomial responses, the numbers for each outcome class 96 | are balanced across the folds; i.e., the number of outcomes in which 97 | \code{y} is equal to 1 is the same for each fold, or possibly off by 1 if 98 | the numbers do not divide evenly. This approach is used for Cox regression 99 | as well to balance the amount of censoring cross each fold. 100 | 101 | For Cox models, \code{cv.grpsurv} uses the approach of calculating the full 102 | Cox partial likelihood using the cross-validated set of linear predictors. 103 | Other approaches to cross-validation for the Cox regression model have been 104 | proposed in the literature; the strengths and weaknesses of the various 105 | methods for penalized regression in the Cox model are the subject of current 106 | research. A simple approximation to the standard error is provided, 107 | although an option to bootstrap the standard error (\code{se='bootstrap'}) 108 | is also available. 109 | 110 | As in \code{\link[=grpreg]{grpreg()}}, seemingly unrelated regressions/multitask learning can 111 | be carried out by setting \code{y} to be a matrix, in which case groups are 112 | set up automatically (see \code{\link[=grpreg]{grpreg()}} for details), and 113 | cross-validation is carried out with respect to rows of \code{y}. As 114 | mentioned in the details there, it is recommended to standardize the 115 | responses prior to fitting. 116 | } 117 | \examples{ 118 | \dontshow{set.seed(1)} 119 | data(Birthwt) 120 | X <- Birthwt$X 121 | y <- Birthwt$bwt 122 | group <- Birthwt$group 123 | 124 | cvfit <- cv.grpreg(X, y, group) 125 | plot(cvfit) 126 | summary(cvfit) 127 | coef(cvfit) ## Beta at minimum CVE 128 | 129 | cvfit <- cv.grpreg(X, y, group, penalty="gel") 130 | plot(cvfit) 131 | summary(cvfit) 132 | 133 | } 134 | \seealso{ 135 | \code{\link[=grpreg]{grpreg()}}, \code{\link[=plot.cv.grpreg]{plot.cv.grpreg()}}, \code{\link[=summary.cv.grpreg]{summary.cv.grpreg()}}, 136 | \code{\link[=predict.cv.grpreg]{predict.cv.grpreg()}} 137 | } 138 | \author{ 139 | Patrick Breheny 140 | } 141 | -------------------------------------------------------------------------------- /man/expand_spline.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/expand_spline.R 3 | \name{expand_spline} 4 | \alias{expand_spline} 5 | \title{Expand feature matrix using basis splines} 6 | \usage{ 7 | expand_spline(x, df = 3, degree = 3, type = c("ns", "bs")) 8 | } 9 | \arguments{ 10 | \item{x}{Features to be expanded (numeric matrix).} 11 | 12 | \item{df}{Degrees of freedom (numeric; default = 3).} 13 | 14 | \item{degree}{Degree of the piecewise polynomial (integer; default = 3 (cubic splines)).} 15 | 16 | \item{type}{Type of spline, either B-spline (\code{"bs"}) or natural cubic spline (\code{"ns"}; default).} 17 | } 18 | \value{ 19 | An object of class \code{expandedMatrix} consisting of: 20 | \itemize{ 21 | \item \code{X}: A matrix of dimension \code{nrow(x)} by \code{df*ncol(x)} 22 | \item \code{group}: A vector of length \code{df*ncol(x)} that describes the grouping structure 23 | \item Additional metadata on the splines, such as knot locations, required in order to evaluate spline at new feature values (e.g., for prediction) 24 | } 25 | } 26 | \description{ 27 | Performs a basis expansion for many features at once, returning output that is compatible 28 | for use with the \code{grpreg()} function. Returns an expanded matrix along with a vector 29 | that describes its grouping. 30 | } 31 | \details{ 32 | \code{expand_spline()} uses the function \code{\link[splines:bs]{splines::bs()}} or \code{\link[splines:ns]{splines::ns()}} to generate a basis 33 | matrix for each column of \code{x}. These matrices represent the spline basis for piecewise 34 | polynomials with specified degree evaluated separately for each original column of \code{x}. 35 | These matrices are then column-bound to form a single grouped matrix of derived features. A vector 36 | that describes the grouping present in the resulting matrix is also generated. The resulting 37 | object can be passed to \code{\link[=grpreg]{grpreg()}}. 38 | 39 | This methodology was originally proposed by Ravikumar et al. (2009), who named it SPAM (SParse Additive Modeling). 40 | } 41 | \examples{ 42 | \dontshow{set.seed(1)} 43 | Data <- gen_nonlinear_data(n=1000) 44 | X <- expand_spline(Data$X) 45 | fit <- grpreg(X, Data$y) 46 | plot_spline(fit, "V02", lambda = 0.03) 47 | } 48 | \references{ 49 | \itemize{ 50 | \item Ravikumar P, Lafferty J, Liu H and Wasserman L (2009). Sparse additive models. \emph{Journal of the Royal Statistical Society Series B}, \strong{71}: 1009-1030. 51 | } 52 | } 53 | \seealso{ 54 | \code{\link[=plot_spline]{plot_spline()}} to visualize the resulting nonlinear fits 55 | } 56 | -------------------------------------------------------------------------------- /man/gBridge.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gBridge.R 3 | \name{gBridge} 4 | \alias{gBridge} 5 | \title{Fit a group bridge regression path} 6 | \usage{ 7 | gBridge( 8 | X, 9 | y, 10 | group = 1:ncol(X), 11 | family = c("gaussian", "binomial", "poisson"), 12 | nlambda = 100, 13 | lambda, 14 | lambda.min = { 15 | if (nrow(X) > ncol(X)) 16 | 0.001 17 | else 0.05 18 | }, 19 | lambda.max, 20 | alpha = 1, 21 | eps = 0.001, 22 | delta = 1e-07, 23 | max.iter = 10000, 24 | gamma = 0.5, 25 | group.multiplier, 26 | warn = TRUE, 27 | returnX = FALSE, 28 | ... 29 | ) 30 | } 31 | \arguments{ 32 | \item{X}{The design matrix, as in \code{grpreg}.} 33 | 34 | \item{y}{The response vector (or matrix), as in \code{grpreg}.} 35 | 36 | \item{group}{The grouping vector, as in \code{grpreg}.} 37 | 38 | \item{family}{Either "gaussian" or "binomial", depending on the response.} 39 | 40 | \item{nlambda}{The number of \code{lambda} values, as in \code{grpreg}.} 41 | 42 | \item{lambda}{A user supplied sequence of \verb{lambda values, as in }grpreg()`.} 43 | 44 | \item{lambda.min}{The smallest value for \code{lambda}, as in \code{grpreg}.} 45 | 46 | \item{lambda.max}{The maximum value for \code{lambda}. Unlike the penalties 47 | in \code{grpreg}, it is not possible to solve for \code{lambda.max} directly 48 | with group bridge models. Thus, it must be specified by the user. If it is 49 | not specified, \code{gBridge} will attempt to guess \code{lambda.max}, but 50 | this is not particularly accurate.} 51 | 52 | \item{alpha}{Tuning parameter for the balance between the group penalty and 53 | the L2 penalty, as in \code{grpreg}.} 54 | 55 | \item{eps}{Convergence threshhold, as in \code{grpreg}.} 56 | 57 | \item{delta}{The group bridge penalty is not differentiable at zero, and 58 | requires a small number \code{delta} to bound it away from zero. There is 59 | typically no need to change this value.} 60 | 61 | \item{max.iter}{Maximum number of iterations, as in \code{grpreg}.} 62 | 63 | \item{gamma}{Tuning parameter of the group bridge penalty (the exponent to 64 | which the L1 norm of the coefficients in the group are raised). Default is 65 | 0.5, the square root.} 66 | 67 | \item{group.multiplier}{The multiplicative factor by which each group's 68 | penalty is to be multiplied, as in \code{grpreg}.} 69 | 70 | \item{warn}{Should the function give a warning if it fails to converge? As 71 | in \code{grpreg}.} 72 | 73 | \item{returnX}{Return the standardized design matrix (and associated group 74 | structure information)? Default is FALSE.} 75 | 76 | \item{...}{Not used.} 77 | } 78 | \value{ 79 | An object with S3 class \code{"grpreg"}, as in \code{grpreg}. 80 | } 81 | \description{ 82 | Fit regularization paths for linear and logistic group bridge-penalized 83 | regression models over a grid of values for the regularization parameter 84 | lambda. 85 | } 86 | \details{ 87 | This method fits the group bridge method of Huang et al. (2009). Unlike the 88 | penalties in \code{grpreg}, the group bridge is not differentiable at zero; 89 | because of this, a number of changes must be made to the algorithm, which is 90 | why it has its own function. Most notably, the method is unable to start at 91 | \code{lambda.max}; it must start at \code{lambda.min} and proceed in the 92 | opposite direction. 93 | 94 | In other respects, the usage and behavior of the function is similar to the 95 | rest of the \code{grpreg} package. 96 | } 97 | \examples{ 98 | data(Birthwt) 99 | X <- Birthwt$X 100 | group <- Birthwt$group 101 | 102 | ## Linear regression 103 | y <- Birthwt$bwt 104 | fit <- gBridge(X, y, group, lambda.max=0.08) 105 | plot(fit) 106 | select(fit)$beta 107 | 108 | ## Logistic regression 109 | y <- Birthwt$low 110 | fit <- gBridge(X, y, group, family="binomial", lambda.max=0.17) 111 | plot(fit) 112 | select(fit)$beta 113 | } 114 | \references{ 115 | \itemize{ 116 | \item Huang J, Ma S, Xie H, and Zhang C. (2009) A group bridge approach for 117 | variable selection. \emph{Biometrika}, \strong{96}: 339-355. \doi{10.1093/biomet/asp020} 118 | 119 | \item Breheny P and Huang J. (2009) Penalized methods for bi-level variable 120 | selection. \emph{Statistics and its interface}, \strong{2}: 369-380. 121 | \doi{10.4310/sii.2009.v2.n3.a10} 122 | } 123 | } 124 | \seealso{ 125 | \code{\link[=grpreg]{grpreg()}} 126 | } 127 | -------------------------------------------------------------------------------- /man/gen_nonlinear_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gen_nonlinear_data.R 3 | \name{gen_nonlinear_data} 4 | \alias{gen_nonlinear_data} 5 | \title{Generate nonlinear example data} 6 | \usage{ 7 | gen_nonlinear_data(n = 100, p = 16, seed) 8 | } 9 | \arguments{ 10 | \item{n}{Sample size (numeric; default = 100).} 11 | 12 | \item{p}{Number of features (numeric; default = 16).} 13 | 14 | \item{seed}{Set to get different random data sets, passed to \code{\link[=set.seed]{set.seed()}}} 15 | } 16 | \description{ 17 | Mainly intended to demonstrate the use of basis expansion models for sparse additive modeling; intended for use with \code{\link[=expand_spline]{expand_spline()}}. 18 | } 19 | \examples{ 20 | Data <- gen_nonlinear_data() 21 | } 22 | -------------------------------------------------------------------------------- /man/grpreg-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/grpreg-package.R 3 | \docType{package} 4 | \name{grpreg-package} 5 | \alias{grpreg-package} 6 | \title{grpreg: Regularization Paths for Regression Models with Grouped Covariates} 7 | \description{ 8 | Efficient algorithms for fitting the regularization path of linear regression, GLM, and Cox regression models with grouped penalties. This includes group selection methods such as group lasso, group MCP, and group SCAD as well as bi-level selection methods such as the group exponential lasso, the composite MCP, and the group bridge. For more information, see Breheny and Huang (2009) \doi{10.4310/sii.2009.v2.n3.a10}, Huang, Breheny, and Ma (2012) \doi{10.1214/12-sts392}, Breheny and Huang (2015) \doi{10.1007/s11222-013-9424-2}, and Breheny (2015) \doi{10.1111/biom.12300}, or visit the package homepage \url{https://pbreheny.github.io/grpreg/}. 9 | } 10 | \examples{ 11 | \donttest{vignette("getting-started", package="grpreg")} 12 | } 13 | \references{ 14 | \itemize{ 15 | \item Yuan M and Lin Y. (2006) Model selection and estimation in regression 16 | with grouped variables. \emph{Journal of the Royal Statistical Society Series B}, 17 | \strong{68}: 49-67. \doi{10.1111/j.1467-9868.2005.00532.x} 18 | 19 | \item Huang J, Ma S, Xie H, and Zhang C. (2009) A group bridge approach for 20 | variable selection. \emph{Biometrika}, \strong{96}: 339-355. \doi{10.1093/biomet/asp020} 21 | 22 | \item Breheny P and Huang J. (2009) Penalized methods for bi-level variable 23 | selection. \emph{Statistics and its interface}, \strong{2}: 369-380. 24 | \doi{10.4310/sii.2009.v2.n3.a10} 25 | 26 | \item Huang J, Breheny P, and Ma S. (2012). A selective review of group 27 | selection in high dimensional models. \emph{Statistical Science}, \strong{27}: 481-499. 28 | \doi{10.1214/12-sts392} 29 | 30 | \item Breheny P and Huang J. (2015) Group descent algorithms for nonconvex 31 | penalized linear and logistic regression models with grouped predictors. 32 | \emph{Statistics and Computing}, \strong{25}: 173-187. \doi{10.1007/s11222-013-9424-2} 33 | 34 | \item Breheny P. (2015) The group exponential lasso for bi-level variable 35 | selection. \emph{Biometrics}, \strong{71}: 731-740. \doi{10.1111/biom.12300} 36 | } 37 | } 38 | \seealso{ 39 | Useful links: 40 | \itemize{ 41 | \item \url{https://pbreheny.github.io/grpreg/} 42 | \item \url{https://github.com/pbreheny/grpreg} 43 | \item Report bugs at \url{https://github.com/pbreheny/grpreg/issues} 44 | } 45 | 46 | } 47 | \author{ 48 | Patrick Breheny 49 | } 50 | \keyword{internal} 51 | -------------------------------------------------------------------------------- /man/logLik.grpreg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/logLik.R 3 | \name{logLik.grpreg} 4 | \alias{logLik.grpreg} 5 | \alias{logLik} 6 | \alias{logLik.grpsurv} 7 | \title{logLik method for grpreg} 8 | \usage{ 9 | \method{logLik}{grpreg}(object, df.method = c("default", "active"), REML = FALSE, ...) 10 | 11 | \method{logLik}{grpsurv}(object, df.method = c("default", "active"), ...) 12 | } 13 | \arguments{ 14 | \item{object}{A fitted \code{grpreg} or \code{grpsurv} object, as obtained from 15 | \code{\link[=grpreg]{grpreg()}} or \code{\link[=grpsurv]{grpsurv()}}} 16 | 17 | \item{df.method}{How should effective model parameters be calculated? One 18 | of: \code{"active"}, which counts the number of nonzero coefficients; or 19 | \code{"default"}, which uses the calculated \code{df} returned by 20 | \code{grpreg}. Default is \code{"default"}.} 21 | 22 | \item{REML}{Use restricted MLE for estimation of the scale parameter in a 23 | gaussian model? Default is FALSE.} 24 | 25 | \item{...}{For S3 method compatibility.} 26 | } 27 | \value{ 28 | Returns an object of class 'logLik', in this case consisting of a 29 | number (or vector of numbers) with two attributes: 'df' (the estimated 30 | degrees of freedom in the model) and 'nobs' (number of observations). 31 | 32 | The 'print' method for 'logLik' objects is not intended to handle vectors; 33 | consequently, the value of the function does not necessarily display 34 | correctly. However, it works with 'AIC' and 'BIC' without any glitches and 35 | returns the expected vectorized output. 36 | } 37 | \description{ 38 | Calculates the log likelihood and degrees of freedom for a fitted grpreg 39 | object. 40 | } 41 | \details{ 42 | Exists mainly for use with \code{\link[stats:AIC]{stats::AIC()}} and \code{\link[stats:AIC]{stats::BIC()}}. 43 | } 44 | \examples{ 45 | data(Birthwt) 46 | X <- Birthwt$X 47 | y <- Birthwt$bwt 48 | group <- Birthwt$group 49 | fit <- grpreg(X,y,group,penalty="cMCP") 50 | logLik(fit) ## Display is glitchy for vectors 51 | AIC(fit) 52 | BIC(fit) 53 | } 54 | \seealso{ 55 | \code{\link[=grpreg]{grpreg()}} 56 | } 57 | \author{ 58 | Patrick Breheny 59 | } 60 | -------------------------------------------------------------------------------- /man/plot.cv.grpreg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot-cv.R 3 | \name{plot.cv.grpreg} 4 | \alias{plot.cv.grpreg} 5 | \title{Plots the cross-validation curve from a \code{cv.grpreg} object} 6 | \usage{ 7 | \method{plot}{cv.grpreg}( 8 | x, 9 | log.l = TRUE, 10 | type = c("cve", "rsq", "scale", "snr", "pred", "all"), 11 | selected = TRUE, 12 | vertical.line = TRUE, 13 | col = "red", 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{x}{A \code{cv.grpreg} object.} 19 | 20 | \item{log.l}{Should horizontal axis be on the log scale? Default is TRUE.} 21 | 22 | \item{type}{What to plot on the vertical axis. \code{cve} plots the 23 | cross-validation error (deviance); \code{rsq} plots an estimate of the 24 | fraction of the deviance explained by the model (R-squared); \code{snr} 25 | plots an estimate of the signal-to-noise ratio; \code{scale} plots, for 26 | \code{family="gaussian"}, an estimate of the scale parameter (standard 27 | deviation); \code{pred} plots, for \code{family="binomial"}, the estimated 28 | prediction error; \code{all} produces all of the above.} 29 | 30 | \item{selected}{If \code{TRUE} (the default), places an axis on top of the 31 | plot denoting the number of groups in the model (i.e., that contain a 32 | nonzero regression coefficient) at that value of \code{lambda}.} 33 | 34 | \item{vertical.line}{If \code{TRUE} (the default), draws a vertical line at 35 | the value where cross-validaton error is minimized.} 36 | 37 | \item{col}{Controls the color of the dots (CV estimates).} 38 | 39 | \item{\dots}{Other graphical parameters to \code{plot}} 40 | } 41 | \description{ 42 | Plots the cross-validation curve from a \code{cv.grpreg} object, along with 43 | standard error bars. 44 | } 45 | \details{ 46 | Error bars representing approximate +/- 1 SE (68\\% confidence intervals) are 47 | plotted along with the estimates at value of \code{lambda}. For \code{rsq} 48 | and \code{snr}, these confidence intervals are quite crude, especially near 49 | zero, and will hopefully be improved upon in later versions of 50 | \code{grpreg}. 51 | } 52 | \examples{ 53 | \dontshow{set.seed(1)} 54 | # Birthweight data 55 | data(Birthwt) 56 | X <- Birthwt$X 57 | group <- Birthwt$group 58 | 59 | # Linear regression 60 | y <- Birthwt$bwt 61 | cvfit <- cv.grpreg(X, y, group) 62 | plot(cvfit) 63 | op <- par(mfrow=c(2,2)) 64 | plot(cvfit, type="all") 65 | 66 | ## Logistic regression 67 | y <- Birthwt$low 68 | cvfit <- cv.grpreg(X, y, group, family="binomial") 69 | par(op) 70 | plot(cvfit) 71 | par(mfrow=c(2,2)) 72 | plot(cvfit, type="all") 73 | } 74 | \seealso{ 75 | \code{\link[=grpreg]{grpreg()}}, \code{\link[=cv.grpreg]{cv.grpreg()}} 76 | } 77 | -------------------------------------------------------------------------------- /man/plot.grpreg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot.R 3 | \name{plot.grpreg} 4 | \alias{plot.grpreg} 5 | \title{Plot coefficients from a "grpreg" object} 6 | \usage{ 7 | \method{plot}{grpreg}(x, alpha = 1, legend.loc, label = FALSE, log.l = FALSE, norm = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{x}{Fitted \code{"grpreg"} model.} 11 | 12 | \item{alpha}{Controls alpha-blending. Default is alpha=1.} 13 | 14 | \item{legend.loc}{Where should the legend go? If left unspecified, no 15 | legend is drawn. See \code{\link[graphics]{legend}} for details.} 16 | 17 | \item{label}{If TRUE, annotates the plot with text labels in the right 18 | margin describing which variable/group the corresponding line belongs to.} 19 | 20 | \item{log.l}{Should horizontal axis be on the log scale? Default is FALSE.} 21 | 22 | \item{norm}{If \code{TRUE}, plot the norm of each group, rather than the 23 | individual coefficients.} 24 | 25 | \item{\dots}{Other graphical parameters to \code{plot}, \code{matlines}, or 26 | \code{legend}} 27 | } 28 | \description{ 29 | Produces a plot of the coefficient paths for a fitted \code{grpreg} object. 30 | } 31 | \examples{ 32 | # Fit model to birthweight data 33 | data(Birthwt) 34 | X <- Birthwt$X 35 | y <- Birthwt$bwt 36 | group <- Birthwt$group 37 | fit <- grpreg(X, y, group, penalty="grLasso") 38 | 39 | # Plot (basic) 40 | plot(fit) 41 | 42 | # Plot group norms, with labels in right margin 43 | plot(fit, norm=TRUE, label=TRUE) 44 | 45 | # Plot (miscellaneous options) 46 | myColors <- c("black", "red", "green", "blue", "yellow", "purple", 47 | "orange", "brown") 48 | plot(fit, legend.loc="topleft", col=myColors) 49 | labs <- c("Mother's Age", "# Phys. visits", "Hypertension", "Mother's weight", 50 | "# Premature", "Race", "Smoking", "Uterine irritability") 51 | plot(fit, legend.loc="topleft", lwd=6, alpha=0.5, legend=labs) 52 | plot(fit, norm=TRUE, legend.loc="topleft", lwd=6, alpha=0.5, legend=labs) 53 | } 54 | \seealso{ 55 | \code{\link[=grpreg]{grpreg()}} 56 | } 57 | -------------------------------------------------------------------------------- /man/plot.grpsurv.func.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot-grpsurv-func.R 3 | \name{plot.grpsurv.func} 4 | \alias{plot.grpsurv.func} 5 | \title{Plot survival curve for grpsurv model} 6 | \usage{ 7 | \method{plot}{grpsurv.func}(x, alpha = 1, ...) 8 | } 9 | \arguments{ 10 | \item{x}{A \code{'grpsurv.func'} object, which is returned by 11 | \code{predict.grpsurv} if \code{type='survival'} is specified. See 12 | examples.} 13 | 14 | \item{alpha}{Controls alpha-blending (i.e., transparency). Useful if many 15 | overlapping lines are present.} 16 | 17 | \item{\dots}{Other graphical parameters to pass to \code{plot}} 18 | } 19 | \description{ 20 | Plot survival curve for a model that has been fit using \code{grpsurv} 21 | followed by a prediction of the survival function using 22 | \code{predict.grpsurv} 23 | } 24 | \examples{ 25 | 26 | data(Lung) 27 | X <- Lung$X 28 | y <- Lung$y 29 | group <- Lung$group 30 | fit <- grpsurv(X, y, group) 31 | 32 | # A single survival curve 33 | S <- predict(fit, X[1,], type='survival', lambda=.05) 34 | plot(S, xlim=c(0,200)) 35 | 36 | # Lots of survival curves 37 | S <- predict(fit, X, type='survival', lambda=.05) 38 | plot(S, xlim=c(0,200), alpha=0.3) 39 | } 40 | \seealso{ 41 | \code{\link{grpsurv}}, \code{\link{predict.grpsurv}} 42 | } 43 | \author{ 44 | Patrick Breheny 45 | } 46 | -------------------------------------------------------------------------------- /man/plot_spline.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_spline.R 3 | \name{plot_spline} 4 | \alias{plot_spline} 5 | \title{Plot spline curve for a fitted additive model} 6 | \usage{ 7 | plot_spline( 8 | fit, 9 | variable, 10 | lambda, 11 | which = NULL, 12 | partial = FALSE, 13 | type = "contrast", 14 | warnings = TRUE, 15 | points.par = NULL, 16 | add = FALSE, 17 | ... 18 | ) 19 | } 20 | \arguments{ 21 | \item{fit}{A \code{grpreg} object. The model must have been fit using a \code{expand_spline} object.} 22 | 23 | \item{variable}{The name of the variable which will be plotted (character).} 24 | 25 | \item{lambda}{Values of the regularization parameter \code{lambda} which will be used for the plot. If a vector is passed, a curve will be drawn for each value of lambda (numeric vector; if a \code{cv.grpreg} object is passed, the \code{lambda} value minimizing cross-validation error will be used as a default; otherwise, there is no default value)} 26 | 27 | \item{which}{Index of penalty parameter \code{lambda} which will be used for the plot. If both \code{lambda} and \code{which} are specified, \code{lambda} takes precedence (integer vector).} 28 | 29 | \item{partial}{If \code{TRUE}, a scatter plot of the partial residuals is superimposed on the curve (logical; default = \code{FALSE}). If multiple lambdas are specified, the largest value is used to calculate the residuals.} 30 | 31 | \item{type}{Type of plot to be produced (default = \code{"contrast"}). The following options are supported: 32 | \itemize{ 33 | \item If \code{"conditional"}, the plot returned shows the value of the variable on the x-axis and the change in linear predictor on the y-axis, holding all other variables constant at their mean value. 34 | \item If \code{"contrast"}, the plot returned shows the effect on the linear predictor by moving the x variable away from its mean. 35 | }} 36 | 37 | \item{warnings}{If \code{FALSE}, warnings will be suppressed (default = \code{TRUE}).} 38 | 39 | \item{points.par}{List of parameters (see \code{\link[=par]{par()}} to pass to \code{\link[=points]{points()}} when \code{partial=TRUE}.} 40 | 41 | \item{add}{Add spline to existing plot? (default: FALSE)} 42 | 43 | \item{...}{Further arguments to be passed to \code{plot()}. Note that these arguments also control the appearance of the lines.} 44 | } 45 | \description{ 46 | Plots a spline curve for a single variable using a \code{grpreg} or \code{cv.grpreg} object for which an additive model was fit. 47 | } 48 | \details{ 49 | \code{plot_spline()} takes a model fit using both the \code{\link[=grpreg]{grpreg()}} and \code{\link[=expand_spline]{expand_spline()}} functions and plots a spline curve for a given variable. 50 | } 51 | \examples{ 52 | \dontshow{set.seed(1)} 53 | Data <- gen_nonlinear_data(n=1000) 54 | X <- expand_spline(Data$X) 55 | fit <- grpreg(X, Data$y) 56 | plot_spline(fit, "V02", lambda = 0.03) 57 | plot_spline(fit, "V02", which = c(10, 90)) 58 | plot_spline(fit, "V02", lambda = 0.03, partial=TRUE) 59 | plot_spline(fit, "V02", lambda = 0.03, partial=TRUE, type='conditional') 60 | plot_spline(fit, "V02", lambda = 0.03, partial=TRUE, lwd=6, col='yellow', 61 | points.par=list(pch=9, col='blue')) 62 | 63 | op <- par(mfrow=c(3,2), mar=c(4.5, 4.5, 0.25, 0.25)) 64 | for (i in 1:6) plot_spline(fit, sprintf("V\%02d", i), lambda = 0.03, partial=TRUE) 65 | par(op) 66 | 67 | cvfit <- cv.grpreg(X, Data$y) 68 | plot_spline(cvfit, "V02") 69 | plot_spline(cvfit, "V02", partial=TRUE) 70 | } 71 | -------------------------------------------------------------------------------- /man/predict.grpreg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predict-cv.R, R/predict.R 3 | \name{predict.cv.grpreg} 4 | \alias{predict.cv.grpreg} 5 | \alias{coef.cv.grpreg} 6 | \alias{predict.grpreg} 7 | \alias{coef.grpreg} 8 | \title{Model predictions based on a fitted \code{grpreg} object} 9 | \usage{ 10 | \method{predict}{cv.grpreg}( 11 | object, 12 | X, 13 | lambda = object$lambda.min, 14 | which = object$min, 15 | type = c("link", "response", "class", "coefficients", "vars", "groups", "nvars", 16 | "ngroups", "norm"), 17 | ... 18 | ) 19 | 20 | \method{coef}{cv.grpreg}(object, lambda = object$lambda.min, which = object$min, ...) 21 | 22 | \method{predict}{grpreg}( 23 | object, 24 | X, 25 | type = c("link", "response", "class", "coefficients", "vars", "groups", "nvars", 26 | "ngroups", "norm"), 27 | lambda, 28 | which = 1:length(object$lambda), 29 | ... 30 | ) 31 | 32 | \method{coef}{grpreg}(object, lambda, which = 1:length(object$lambda), drop = TRUE, ...) 33 | } 34 | \arguments{ 35 | \item{object}{Fitted \code{"grpreg"} or \code{"cv.grpreg"} model object.} 36 | 37 | \item{X}{Matrix of values at which predictions are to be made. Not used for 38 | \code{type="coefficients"}} 39 | 40 | \item{lambda}{Values of the regularization parameter \code{lambda} at which 41 | predictions are requested. For values of \code{lambda} not in the sequence 42 | of fitted models, linear interpolation is used.} 43 | 44 | \item{which}{Indices of the penalty parameter \code{lambda} at which 45 | predictions are required. By default, all indices are returned. If 46 | \code{lambda} is specified, this will override \code{which}.} 47 | 48 | \item{type}{Type of prediction: \code{"link"} returns the linear predictors; 49 | \code{"response"} gives the fitted values; \code{"class"} returns the 50 | binomial outcome with the highest probability; \code{"coefficients"} returns 51 | the coefficients; \code{"vars"} returns the indices for the nonzero 52 | coefficients; \code{"groups"} returns the indices for the groups with at 53 | least one nonzero coefficient; \code{"nvars"} returns the number of nonzero 54 | coefficients; \code{"ngroups"} returns the number of groups with at least 55 | one nonzero coefficient; \code{"norm"} returns the L2 norm of the 56 | coefficients in each group.} 57 | 58 | \item{\dots}{Not used.} 59 | 60 | \item{drop}{By default, if a single value of \code{lambda} is supplied, a 61 | vector of coefficients is returned. Set \code{drop=FALSE} if you wish to 62 | have \code{coef} always return a matrix (see \code{\link{drop}}).} 63 | } 64 | \value{ 65 | The object returned depends on type. 66 | } 67 | \description{ 68 | Similar to other predict methods, this function returns predictions from a 69 | fitted \code{"grpreg"} object. 70 | } 71 | \details{ 72 | \code{coef} and \code{predict} methods are provided for \code{"cv.grpreg"} 73 | options as a convenience. They simply call \code{coef.grpreg} and 74 | \code{predict.grpreg} with \code{lambda} set to the value that minimizes the 75 | cross-validation error. 76 | } 77 | \examples{ 78 | # Fit penalized logistic regression model to birthweight data 79 | data(Birthwt) 80 | X <- Birthwt$X 81 | y <- Birthwt$low 82 | group <- Birthwt$group 83 | fit <- grpreg(X, y, group, penalty="grLasso", family="binomial") 84 | 85 | # Coef and predict methods 86 | coef(fit, lambda=.001) 87 | predict(fit, X, type="link", lambda=.07)[1:10] 88 | predict(fit, X, type="response", lambda=.07)[1:10] 89 | predict(fit, X, type="class", lambda=.01)[1:15] 90 | predict(fit, type="vars", lambda=.07) 91 | predict(fit, type="groups", lambda=.07) 92 | predict(fit, type="norm", lambda=.07) 93 | 94 | # Coef and predict methods for cross-validation 95 | cvfit <- cv.grpreg(X, y, group, family="binomial", penalty="grMCP") 96 | coef(cvfit) 97 | predict(cvfit, X)[1:10] 98 | predict(cvfit, X, type="response")[1:10] 99 | predict(cvfit, type="groups") 100 | } 101 | \seealso{ 102 | \code{grpreg} 103 | } 104 | \author{ 105 | Patrick Breheny 106 | } 107 | -------------------------------------------------------------------------------- /man/predict.grpsurv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predict-surv.R 3 | \name{predict.grpsurv} 4 | \alias{predict.grpsurv} 5 | \title{Model predictions for grpsurv objects} 6 | \usage{ 7 | \method{predict}{grpsurv}( 8 | object, 9 | X, 10 | type = c("link", "response", "survival", "hazard", "median", "norm", "coefficients", 11 | "vars", "nvars", "groups", "ngroups"), 12 | lambda, 13 | which = 1:length(object$lambda), 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{object}{Fitted \code{grpsurv} model object.} 19 | 20 | \item{X}{Matrix of values at which predictions are to be made. Not required for some \code{type} values.} 21 | 22 | \item{type}{Type of prediction: 23 | \itemize{ 24 | \item \code{link}: linear predictors 25 | \item \code{response}: risk (i.e., \code{exp(link)}) 26 | \item \code{survival}: the estimated survival function 27 | \item \code{hazard}: the estimated cumulative hazard function 28 | \item \code{median}: median survival time 29 | \item The other options are all identical to their \code{\link[=grpreg]{grpreg()}} counterparts 30 | }} 31 | 32 | \item{lambda}{Regularization parameter at which predictions are requested. For values of \code{lambda} not in the sequence of fitted models, linear interpolation is used.} 33 | 34 | \item{which}{Indices of the penalty parameter \code{lambda} at which predictions are required. Default: all indices. If \code{lambda} is specified, this will override \code{which}.} 35 | 36 | \item{...}{Not used.} 37 | } 38 | \value{ 39 | The object returned depends on type. 40 | } 41 | \description{ 42 | Similar to other predict methods, this function returns predictions from a fitted \code{grpsurv} object. 43 | } 44 | \details{ 45 | Estimation of baseline survival function conditional on the estimated values of \code{beta} is carried out according to the method described in Chapter 4.3 of Kalbfleisch and Prentice. 46 | } 47 | \examples{ 48 | data(Lung) 49 | X <- Lung$X 50 | 51 | y <- Lung$y 52 | group <- Lung$group 53 | 54 | fit <- grpsurv(X, y, group) 55 | coef(fit, lambda=0.05) 56 | head(predict(fit, X, type="link", lambda=0.05)) 57 | head(predict(fit, X, type="response", lambda=0.05)) 58 | 59 | # Survival function 60 | S <- predict(fit, X[1,], type="survival", lambda=0.05) 61 | S(100) 62 | S <- predict(fit, X, type="survival", lambda=0.05) 63 | plot(S, xlim=c(0,200)) 64 | 65 | # Medians 66 | predict(fit, X[1,], type="median", lambda=0.05) 67 | M <- predict(fit, X, type="median") 68 | M[1:10, 1:10] 69 | 70 | # Nonzero coefficients 71 | predict(fit, type="vars", lambda=c(0.1, 0.01)) 72 | predict(fit, type="nvars", lambda=c(0.1, 0.01)) 73 | } 74 | \references{ 75 | \itemize{ 76 | \item Kalbfleish JD and Prentice RL (2002). The Statistical Analysis of Failure Time Data, 2nd edition. Wiley. 77 | } 78 | } 79 | \seealso{ 80 | \code{\link[=grpsurv]{grpsurv()}} 81 | } 82 | \author{ 83 | Patrick Breheny 84 | } 85 | -------------------------------------------------------------------------------- /man/residuals.grpreg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/residuals.R 3 | \name{residuals.grpreg} 4 | \alias{residuals.grpreg} 5 | \title{Extract residuals from a grpreg or grpsurv fit} 6 | \usage{ 7 | \method{residuals}{grpreg}(object, lambda, which = 1:length(object$lambda), drop = TRUE, ...) 8 | } 9 | \arguments{ 10 | \item{object}{Object of class \code{grpreg} or \code{grpsurv}.} 11 | 12 | \item{lambda}{Values of the regularization parameter at which residuals are requested (numeric vector). For values of lambda not in the sequence of fitted models, linear interpolation is used.} 13 | 14 | \item{which}{Index of the penalty parameter at which residuals are requested (default = all indices). If \code{lambda} is specified, this take precedence over \code{which}.} 15 | 16 | \item{drop}{By default, if a single value of lambda is supplied, a vector of residuals is returned (logical; default=\code{TRUE}). Set \code{drop=FALSE} if you wish to have the function always return a matrix (see \code{\link[=drop]{drop()}}).} 17 | 18 | \item{...}{Not used.} 19 | } 20 | \description{ 21 | Currently, only deviance residuals are supported. 22 | } 23 | \examples{ 24 | data(Birthwt) 25 | X <- Birthwt$X 26 | y <- Birthwt$bwt 27 | group <- Birthwt$group 28 | fit <- grpreg(X, y, group, returnX=TRUE) 29 | residuals(fit)[1:5, 1:5] 30 | head(residuals(fit, lambda=0.1)) 31 | } 32 | -------------------------------------------------------------------------------- /man/select.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/select.R 3 | \name{select} 4 | \alias{select} 5 | \alias{select.grpreg} 6 | \title{Select an value of lambda along a grpreg path} 7 | \usage{ 8 | select(obj, ...) 9 | 10 | \method{select}{grpreg}( 11 | obj, 12 | criterion = c("BIC", "AIC", "GCV", "AICc", "EBIC"), 13 | df.method = c("default", "active"), 14 | smooth = FALSE, 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{obj}{A fitted grpreg object.} 20 | 21 | \item{\dots}{For S3 method compatibility.} 22 | 23 | \item{criterion}{The criterion by which to select the regularization 24 | parameter. One of \code{"AIC"}, \code{"BIC"}, \code{"GCV"}, \code{"AICc"}, 25 | or \code{"EBIC"}; default is \code{"BIC"}.} 26 | 27 | \item{df.method}{How should effective model parameters be calculated? One 28 | of: \code{"active"}, which counts the number of nonzero coefficients; or 29 | \code{"default"}, which uses the calculated \code{df} returned by 30 | \code{grpreg}. Default is \code{"default"}.} 31 | 32 | \item{smooth}{Applies a smoother to the information criteria before 33 | selecting the optimal value.} 34 | } 35 | \value{ 36 | A list containing: 37 | \describe{ 38 | \item{lambda}{The selected value of the regularization parameter, \code{lambda}.} 39 | \item{beta}{The vector of coefficients at the chosen value of \code{lambda}.} 40 | \item{df}{The effective number of model parameters at the chosen value of \code{lambda}.} 41 | \item{IC}{A vector of the calculated model selection criteria for each point on the regularization path.} 42 | } 43 | } 44 | \description{ 45 | Selects a point along the regularization path of a fitted grpreg object 46 | according to the AIC, BIC, or GCV criteria. 47 | } 48 | \details{ 49 | The criteria are defined as follows, where \eqn{L}{L} is the deviance (i.e, 50 | -2 times the log-likelihood), \eqn{\nu}{df} is the degrees of freedom, and 51 | \eqn{n}{n} is the sample size: 52 | 53 | \deqn{AIC = L + 2\nu}{AIC = L + 2*df} \deqn{BIC = L + \log(n)\nu}{BIC = L + 54 | log(n)*df} \deqn{GCV = \frac{L}{(1-\nu/n)^2}}{GCV= L/((1-df/n)^2)} 55 | \deqn{AICc = AIC + 2\frac{\nu(\nu+1)}{n-\nu-1}}{AICc = AIC + 56 | 2*df*(df+1)/(n-df-1)} \deqn{EBIC = BIC + 2 \log{p \choose \nu}}{EBIC = BIC + 57 | 2*log(p choose df)} 58 | } 59 | \examples{ 60 | data(Birthwt) 61 | X <- Birthwt$X 62 | y <- Birthwt$bwt 63 | group <- Birthwt$group 64 | fit <- grpreg(X, y, group, penalty="grLasso") 65 | select(fit) 66 | select(fit,crit="AIC",df="active") 67 | plot(fit) 68 | abline(v=select(fit)$lambda) 69 | par(mfrow=c(1,3)) 70 | l <- fit$lambda 71 | xlim <- rev(range(l)) 72 | plot(l, select(fit)$IC, xlim=xlim, pch=19, type="o", ylab="BIC") 73 | plot(l, select(fit,"AIC")$IC, xlim=xlim, pch=19, type="o",ylab="AIC") 74 | plot(l, select(fit,"GCV")$IC, xlim=xlim, pch=19, type="o",ylab="GCV") 75 | } 76 | \seealso{ 77 | \code{\link[=grpreg]{grpreg()}} 78 | } 79 | -------------------------------------------------------------------------------- /man/summary.cv.grpreg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summary-cv-grpreg.R 3 | \name{summary.cv.grpreg} 4 | \alias{summary.cv.grpreg} 5 | \alias{print.summary.cv.grpreg} 6 | \title{Summarizing inferences based on cross-validation} 7 | \usage{ 8 | \method{summary}{cv.grpreg}(object, ...) 9 | 10 | \method{print}{summary.cv.grpreg}(x, digits, ...) 11 | } 12 | \arguments{ 13 | \item{object}{A \code{"cv.grpreg"} object.} 14 | 15 | \item{\dots}{Further arguments passed to or from other methods.} 16 | 17 | \item{x}{A \code{"summary.cv.grpreg"} object.} 18 | 19 | \item{digits}{Number of digits past the decimal point to print out. Can be 20 | a vector specifying different display digits for each of the five 21 | non-integer printed values.} 22 | } 23 | \value{ 24 | \code{summary(cvfit)} produces an object with S3 class 25 | \code{"summary.cv.grpreg"}. The class has its own print method and contains 26 | the following list elements: \item{penalty}{The penalty used by 27 | \code{grpreg}/\code{grpsurv}.} \item{model}{The type of model: 28 | \code{"linear"}, \code{"logistic"}, \code{"Poisson"}, \code{"Cox"}, etc.} 29 | \item{n}{Number of observations} \item{p}{Number of regression coefficients 30 | (not including the intercept).} \item{min}{The index of \code{lambda} with 31 | the smallest cross-validation error.} \item{lambda}{The sequence of 32 | \code{lambda} values used by \code{cv.grpreg}/\code{cv.grpsurv}.} 33 | \item{cve}{Cross-validation error (deviance).} \item{r.squared}{Proportion 34 | of variance explained by the model, as estimated by cross-validation.} 35 | \item{snr}{Signal to noise ratio, as estimated by cross-validation.} 36 | \item{sigma}{For linear regression models, the scale parameter estimate.} 37 | \item{pe}{For logistic regression models, the prediction error 38 | (misclassification error).} 39 | } 40 | \description{ 41 | Summary method for \code{cv.grpreg} or \code{cv.grpsurv} objects 42 | } 43 | \examples{ 44 | 45 | # Birthweight data 46 | data(Birthwt) 47 | X <- Birthwt$X 48 | group <- Birthwt$group 49 | 50 | # Linear regression 51 | y <- Birthwt$bwt 52 | cvfit <- cv.grpreg(X, y, group) 53 | summary(cvfit) 54 | 55 | # Logistic regression 56 | y <- Birthwt$low 57 | cvfit <- cv.grpreg(X, y, group, family="binomial") 58 | summary(cvfit) 59 | 60 | # Cox regression 61 | data(Lung) 62 | cvfit <- with(Lung, cv.grpsurv(X, y, group)) 63 | summary(cvfit) 64 | } 65 | \seealso{ 66 | \code{\link{grpreg}}, \code{\link{cv.grpreg}}, 67 | \code{\link{cv.grpsurv}}, \code{\link{plot.cv.grpreg}} 68 | } 69 | \author{ 70 | Patrick Breheny 71 | } 72 | -------------------------------------------------------------------------------- /pkgdown/_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://pbreheny.github.io/grpreg/ 2 | 3 | template: 4 | bootstrap: 5 5 | light-switch: true 6 | math-rendering: katex 7 | includes: 8 | in_header: | 9 | 10 | 11 | 12 | 13 | figures: 14 | dev: png 15 | dpi: 96 16 | fig.ext: png 17 | fig.width: 5 18 | fig.height: 5 19 | fig.retina: 2 20 | bg: "#fff" 21 | 22 | navbar: 23 | structure: 24 | left: [intro, articles, reference, news] 25 | right: [search, github] 26 | 27 | home: 28 | sidebar: 29 | structure: [links, citation, authors, dev] 30 | 31 | footer: 32 | structure: 33 | left: developed_by 34 | right: last_updated 35 | components: 36 | last_updated: !expr paste("Last updated:", format(Sys.time(), "%B %d, %Y")) 37 | 38 | articles: 39 | - title: "Learn more" 40 | navbar: ~ 41 | contents: 42 | - articles/models 43 | - articles/penalties 44 | - articles/additive-models 45 | - articles/adaptive-rescaling 46 | 47 | reference: 48 | - title: Model fitting 49 | contents: 50 | - grpreg 51 | - grpsurv 52 | - gBridge 53 | - title: Model selection and cross-validation 54 | contents: 55 | - cv.grpreg 56 | - cv.grpsurv 57 | - plot.cv.grpreg 58 | - summary.cv.grpreg 59 | - AUC.cv.grpsurv 60 | - select 61 | - title: Plotting and extracting model features 62 | contents: 63 | - logLik.grpreg 64 | - predict.grpreg 65 | - predict.grpsurv 66 | - plot.grpreg 67 | - plot.grpsurv.func 68 | - residuals.grpreg 69 | - title: Utilities for additive models 70 | contents: 71 | - expand_spline 72 | - gen_nonlinear_data 73 | - plot_spline 74 | - title: Data sets 75 | contents: 76 | - Birthwt 77 | - Lung 78 | - title: internal 79 | contents: 80 | - birthwt.grpreg 81 | - grpreg-package 82 | -------------------------------------------------------------------------------- /pkgdown/extra.css: -------------------------------------------------------------------------------- 1 | .dropdown-menu { 2 | --bs-dropdown-link-color: #aaa; 3 | } 4 | 5 | a { 6 | text-decoration:none; 7 | } 8 | 9 | a:hover { 10 | color: #00e7a7; 11 | } 12 | 13 | .navbar { 14 | background: color-mix(in oklab, var(--bs-link-color) 50%, var(--bs-body-bg) 50%); 15 | } 16 | 17 | [data-bs-theme="dark"] pre code { color: #f8f8f2 } 18 | [data-bs-theme="dark"] pre code span.al { color: #ff858f; background-color: #2a0f15; font-weight: bold } /* alert */ 19 | [data-bs-theme="dark"] pre code span.an { color: #f8f8f2} /* annotation */ 20 | [data-bs-theme="dark"] pre code span.at { color: #00e0e0} /* attribute */ 21 | [data-bs-theme="dark"] pre code span.bn { color: #61cc33} /* base n */ 22 | [data-bs-theme="dark"] pre code span.bu { color: #61cc33} /* builtin */ 23 | [data-bs-theme="dark"] pre code span.cf { color: #df9af4; font-weight: bold} /* control flow */ 24 | [data-bs-theme="dark"] pre code span.ch { color: #61cc33} /* character */ 25 | [data-bs-theme="dark"] pre code span.cn { color: #61cc33} /* constant */ 26 | [data-bs-theme="dark"] pre code span.r-in span.co { color: #6abafb; font-style: italic } /* comment */ 27 | [data-bs-theme="dark"] pre code span.r-out.co { color: #ccc; font-style: italic } /* comment */ 28 | [data-bs-theme="dark"] pre code span.cv { color: #61cc33} /* comment variable */ 29 | [data-bs-theme="dark"] pre code span.do { color: #f8f8f2} /* documentation */ 30 | [data-bs-theme="dark"] pre code span.dt { color: #df9af4} /* data type */ 31 | [data-bs-theme="dark"] pre code span.dv { color: #61cc33} /* decimal value */ 32 | [data-bs-theme="dark"] pre code span.er { color: #ff858f; text-decoration: underline} /* error */ 33 | [data-bs-theme="dark"] pre code span.ex { color: #00e0e0; font-weight: bold} /* extension */ 34 | [data-bs-theme="dark"] pre code span.fl { color: #61cc33} /* float */ 35 | [data-bs-theme="dark"] pre code span.fu { color: #ff858f} /* function */ 36 | [data-bs-theme="dark"] pre code span.im { color: #61cc33} /* import */ 37 | [data-bs-theme="dark"] pre code span.in { color: #f8f8f2} /* information */ 38 | [data-bs-theme="dark"] pre code span.kw { color: #df9af4; font-weight: bold} /* keyword */ 39 | [data-bs-theme="dark"] pre code span.op { color: #df9af4} /* operator */ 40 | [data-bs-theme="dark"] pre code span.ot { color: #00e0e0} /* other */ 41 | [data-bs-theme="dark"] pre code span.pp { color: #f8f8f2} /* preprocessor */ 42 | [data-bs-theme="dark"] pre code span.re { color: #00e0e0; background-color: #f8f8f2} /* regular expression? */ 43 | [data-bs-theme="dark"] pre code span.sc { color: #61cc33} /* special character */ 44 | [data-bs-theme="dark"] pre code span.ss { color: #61cc33} /* special string */ 45 | [data-bs-theme="dark"] pre code span.st { color: #61cc33} /* string */ 46 | [data-bs-theme="dark"] pre code span.va { color: #f8f8f2} /* variable */ 47 | [data-bs-theme="dark"] pre code span.vs { color: #61cc33} /* verbatim string */ 48 | [data-bs-theme="dark"] pre code span.wa { color: #dcc6e0} /* warning */ 49 | [data-bs-theme="dark"] pre code .r-pr { color: #777 } /* prompt */ 50 | -------------------------------------------------------------------------------- /src/gdfit_cox.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | double crossprod(double *x, double *y, int n, int j); 8 | double norm(double *x, int p); 9 | double S(double z, double l); 10 | double F(double z, double l1, double l2, double gamma); 11 | double Fs(double z, double l1, double l2, double gamma); 12 | 13 | // Group descent update -- cox 14 | void gd_cox(double *b, double *x, double *r, double *eta, double v, int g, 15 | int *K1, int n, int l, int p, const char *penalty, double lam1, 16 | double lam2, double gamma, SEXP df, double *a, double *maxChange) { 17 | 18 | // Calculate z 19 | int K = K1[g+1] - K1[g]; 20 | double *z = R_Calloc(K, double); 21 | for (int j=K1[g]; j maxChange[0]) maxChange[0] = fabs(shift); 35 | for (int i=0; i 0) REAL(df)[l] += K * len / z_norm; 45 | free(z); 46 | } 47 | 48 | SEXP gdfit_cox(SEXP X_, SEXP d_, SEXP penalty_, SEXP K1_, SEXP K0_, SEXP lambda, SEXP alpha_, SEXP eps_, SEXP max_iter_, SEXP gamma_, SEXP group_multiplier, SEXP dfmax_, SEXP gmax_, SEXP warn_, SEXP user_) { 49 | 50 | // Lengths/dimensions 51 | int n = length(d_); 52 | int L = length(lambda); 53 | int J = length(K1_) - 1; 54 | int p = length(X_)/n; 55 | 56 | // Pointers 57 | double *X = REAL(X_); 58 | double *d = REAL(d_); 59 | const char *penalty = CHAR(STRING_ELT(penalty_, 0)); 60 | int *K1 = INTEGER(K1_); 61 | int K0 = INTEGER(K0_)[0]; 62 | double *lam = REAL(lambda); 63 | double alpha = REAL(alpha_)[0]; 64 | double eps = REAL(eps_)[0]; 65 | int max_iter = INTEGER(max_iter_)[0]; 66 | int tot_iter = 0; 67 | double gamma = REAL(gamma_)[0]; 68 | double *m = REAL(group_multiplier); 69 | int dfmax = INTEGER(dfmax_)[0]; 70 | int gmax = INTEGER(gmax_)[0]; 71 | int warn = INTEGER(warn_)[0]; 72 | int user = INTEGER(user_)[0]; 73 | 74 | // Outcome 75 | SEXP res, beta, Loss, iter, df, Eta; 76 | PROTECT(res = allocVector(VECSXP, 5)); 77 | PROTECT(beta = allocVector(REALSXP, L*p)); 78 | for (int j=0; j<(L*p); j++) REAL(beta)[j] = 0; 79 | PROTECT(iter = allocVector(INTSXP, L)); 80 | for (int i=0; i=0; i--) rsk[i] = rsk[i+1] + 1; 106 | nullDev = 0; 107 | for (int i=0; i gmax || nv > dfmax || tot_iter == max_iter) { 132 | for (int ll=l; ll=0; i--) { 148 | rsk[i] = rsk[i+1] + haz[i]; 149 | } 150 | for (int i=0; i maxChange) maxChange = fabs(shift); 180 | b[l*p+j] = shift + a[j]; 181 | for (int i=0; i 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | /* .Call calls */ 9 | extern SEXP gdfit_glm(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); 10 | extern SEXP gdfit_cox(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); 11 | extern SEXP gdfit_gaussian(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); 12 | extern SEXP lcdfit_glm(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); 13 | extern SEXP lcdfit_cox(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); 14 | extern SEXP lcdfit_gaussian(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); 15 | extern SEXP maxgrad(SEXP, SEXP, SEXP, SEXP); 16 | extern SEXP maxprod(SEXP, SEXP, SEXP, SEXP); 17 | extern SEXP standardize(SEXP); 18 | 19 | static const R_CallMethodDef CallEntries[] = { 20 | {"gdfit_glm", (DL_FUNC) &gdfit_glm, 16}, 21 | {"gdfit_cox", (DL_FUNC) &gdfit_cox, 15}, 22 | {"gdfit_gaussian", (DL_FUNC) &gdfit_gaussian, 15}, 23 | {"lcdfit_glm", (DL_FUNC) &lcdfit_glm, 18}, 24 | {"lcdfit_cox", (DL_FUNC) &lcdfit_cox, 17}, 25 | {"lcdfit_gaussian", (DL_FUNC) &lcdfit_gaussian, 16}, 26 | {"maxgrad", (DL_FUNC) &maxgrad, 4}, 27 | {"maxprod", (DL_FUNC) &maxprod, 4}, 28 | {"standardize", (DL_FUNC) &standardize, 1}, 29 | {NULL, NULL, 0} 30 | }; 31 | 32 | void R_init_grpreg(DllInfo *dll) { 33 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 34 | R_useDynamicSymbols(dll, FALSE); 35 | } 36 | 37 | // Cross product of the jth column of x with y 38 | double crossprod(double *x, double *y, int n, int j) { 39 | double val = 0; 40 | int nn = n*j; 41 | for (int i=0; i val) val = x[i]; 73 | } 74 | return(val); 75 | } 76 | 77 | // Gaussian loss 78 | double gLoss(double *r, int n) { 79 | double l = 0; 80 | for (int i=0;i 10) { 87 | return(1); 88 | } else if (eta < -10) { 89 | return(0); 90 | } else { 91 | return(exp(eta)/(1+exp(eta))); 92 | } 93 | } 94 | 95 | // Euclidean norm 96 | double norm(double *x, int p) { 97 | double x_norm = 0; 98 | for (int j=0; j l) return(z-l); 106 | if (z < -l) return(z+l); 107 | return(0); 108 | } 109 | 110 | // Firm-thresholding operator 111 | double F(double z, double l1, double l2, double gamma) { 112 | double s=0; 113 | if (z > 0) s = 1; 114 | else if (z < 0) s = -1; 115 | if (fabs(z) <= l1) return(0); 116 | else if (fabs(z) <= gamma*l1*(1+l2)) return(s*(fabs(z)-l1)/(1+l2-1/gamma)); 117 | else return(z/(1+l2)); 118 | } 119 | 120 | // SCAD-modified firm-thresholding operator 121 | double Fs(double z, double l1, double l2, double gamma) { 122 | double s=0; 123 | if (z > 0) s = 1; 124 | else if (z < 0) s = -1; 125 | if (fabs(z) <= l1) return(0); 126 | else if (fabs(z) <= (l1*(1+l2)+l1)) return(s*(fabs(z)-l1)/(1+l2)); 127 | else if (fabs(z) <= gamma*l1*(1+l2)) return(s*(fabs(z)-gamma*l1/(gamma-1))/(1-1/(gamma-1)+l2)); 128 | else return(z/(1+l2)); 129 | } 130 | 131 | // MCP penalty 132 | double MCP(double theta, double l, double a) { 133 | theta = fabs(theta); 134 | if (theta <= a*l) return(l*theta - pow(theta,2)/(2*a)); 135 | else return(a*pow(l,2)/2); 136 | } 137 | 138 | // MCP penalization rate 139 | double dMCP(double theta, double l, double a) { 140 | theta = fabs(theta); 141 | if (theta < a*l) return(l-theta/a); 142 | else return(0); 143 | } 144 | -------------------------------------------------------------------------------- /src/maxprod.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | double crossprod(double *X, double *y, int n, int j); 8 | double norm(double *x, int p); 9 | 10 | SEXP maxprod(SEXP X_, SEXP y_, SEXP K_, SEXP m_) { 11 | 12 | // Declarations 13 | int n = nrows(X_); 14 | int J = length(K_)-1; 15 | SEXP zmax; 16 | PROTECT(zmax = allocVector(REALSXP, 1)); 17 | REAL(zmax)[0] = 0; 18 | double *X = REAL(X_); 19 | double *y = REAL(y_); 20 | double *m = REAL(m_); 21 | int *K = INTEGER(K_); 22 | 23 | for (int g=0; g REAL(zmax)[0]) REAL(zmax)[0] = z; 27 | } 28 | } 29 | 30 | // Return 31 | UNPROTECT(1); 32 | return(zmax); 33 | } 34 | 35 | SEXP maxgrad(SEXP X_, SEXP y_, SEXP K_, SEXP m_) { 36 | 37 | // Declarations 38 | int n = nrows(X_); 39 | int J = length(K_)-1; 40 | SEXP zmax; 41 | PROTECT(zmax = allocVector(REALSXP, 1)); 42 | REAL(zmax)[0] = 0; 43 | double *X = REAL(X_); 44 | double *y = REAL(y_); 45 | double *m = REAL(m_); 46 | int *K = INTEGER(K_); 47 | 48 | for (int g=0; g REAL(zmax)[0]) REAL(zmax)[0] = z; 56 | free(Z); 57 | } 58 | 59 | // Return 60 | UNPROTECT(1); 61 | return(zmax); 62 | } 63 | -------------------------------------------------------------------------------- /src/standardize.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | SEXP standardize(SEXP X_) { 9 | // Declarations 10 | int n = nrows(X_); 11 | int p = ncols(X_); 12 | SEXP XX_, c_, s_; 13 | PROTECT(XX_ = allocMatrix(REALSXP, n, p)); 14 | for (int j=0; j<(n*p); j++) REAL(XX_)[j] = 0; 15 | PROTECT(c_ = allocVector(REALSXP, p)); 16 | for (int j=0; j 8 | %\VignetteIndexEntry{Getting started with grpreg} 9 | %\VignetteEngine{knitr::rmarkdown} 10 | %\VignetteEncoding{UTF-8} 11 | --- 12 | 13 | ```{r setup, include=FALSE} 14 | library(grpreg) 15 | set.seed(4) 16 | knitr::opts_knit$set(aliases=c(h = 'fig.height', w = 'fig.width')) 17 | knitr::opts_chunk$set(comment="#", collapse=TRUE, cache=FALSE, tidy=FALSE) 18 | knitr::knit_hooks$set(small.mar = function(before, options, envir) { 19 | if (before) par(mar = c(4, 4, .1, .1)) 20 | }) 21 | ``` 22 | 23 | **[grpreg](https://pbreheny.github.io/grpreg/)** is an R package for fitting the regularization path of linear regression, GLM, and Cox regression models with grouped penalties. This includes group selection methods such as group lasso, group MCP, and group SCAD as well as bi-level selection methods such as the group exponential lasso, the composite MCP, and the group bridge. Utilities for carrying out cross-validation as well as post-fitting visualization, summarization, and prediction are also provided. 24 | 25 | **grpreg** comes with a few example data sets; we'll look at `Birthwt`, which involves identifying risk factors associated with low birth weight. The outcome can either be measured continuously (`bwt`, the birth weight in kilograms) or dichotomized (`low`) with respect to the newborn having a low birth weight (under 2.5 kg). 26 | 27 | ```{r Birthwt} 28 | data(Birthwt) 29 | X <- Birthwt$X 30 | y <- Birthwt$bwt 31 | head(X) 32 | ``` 33 | The original design matrix consisted of 8 variables, which have been expanded here into 16 features. For example, there are multiple indicator functions for race ("other" being the reference group) and several continuous factors such as age have been expanded using polynomial contrasts (splines would give a similar structure). Hence, the columns of the design matrix are *grouped*; this is what grpreg is designed for. The grouping information is encoded as follows: 34 | 35 | ```{r Birthwt_group} 36 | group <- Birthwt$group 37 | group 38 | ``` 39 | 40 | Here, groups are given as a factor; unique integer codes (which are essentially unlabeled factors) and character vectors are also allowed (character vectors do have some limitations, however, as the order of the groups is left unspecified, which can lead to ambiguity if you also try to set the `group.multiplier` option). To fit a group lasso model to this data: 41 | 42 | ```{r fit} 43 | fit <- grpreg(X, y, group, penalty="grLasso") 44 | ``` 45 | 46 | We can then plot the coefficient paths with 47 | 48 | ```{r plot, h=4, w=6, small.mar=TRUE} 49 | plot(fit) 50 | ``` 51 | 52 | Notice that when a group enters the model (e.g., the green group), all of its coefficients become nonzero; this is what happens with group lasso models. To see what the coefficients are, we could use the `coef` function: 53 | 54 | ```{r coef} 55 | coef(fit, lambda=0.05) 56 | ``` 57 | 58 | Note that the number of physician's visits (`ftv`) is not included in the model at $\lambda=0.05$. 59 | 60 | Typically, one would carry out cross-validation for the purposes of carrying out inference on the predictive accuracy of the model at various values of $\lambda$. 61 | 62 | ```{r cvplot, h=5, w=6} 63 | cvfit <- cv.grpreg(X, y, group, penalty="grLasso") 64 | plot(cvfit) 65 | ``` 66 | The coefficients corresponding to the value of $\lambda$ that minimizes the cross-validation error can be obtained via `coef`: 67 | 68 | ```{r cv_coef} 69 | coef(cvfit) 70 | ``` 71 | 72 | Predicted values can be obtained via `predict`, which has a number of options: 73 | 74 | ```{r predict} 75 | predict(cvfit, X=head(X)) # Predictions for new observations 76 | predict(fit, type="ngroups", lambda=0.1) # Number of nonzero groups 77 | predict(fit, type="groups", lambda=0.1) # Identity of nonzero groups 78 | predict(fit, type="nvars", lambda=0.1) # Number of nonzero coefficients 79 | predict(fit, type="vars", lambda=0.1) # Identity of nonzero coefficients 80 | ``` 81 | 82 | Note that the original fit (to the full data set) is returned as `cvfit$fit`; it is not necessary to call both `grpreg` and `cv.grpreg` to analyze a data set. Several other penalties are available, as are methods for logistic regression and Cox proportional hazards regression. 83 | -------------------------------------------------------------------------------- /vignettes/vignette.css: -------------------------------------------------------------------------------- 1 | body { 2 | background-color: #fff; 3 | margin: 1em auto; 4 | max-width: 700px; 5 | overflow: visible; 6 | padding-left: 2em; 7 | padding-right: 2em; 8 | margin-bottom: 50px; 9 | font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; 10 | font-size: 14px; 11 | line-height: 1.35; 12 | } 13 | table { 14 | margin: 1em auto; 15 | border-width: 1px; 16 | border-color: #DDDDDD; 17 | border-style: outset; 18 | border-collapse: collapse; 19 | } 20 | table th { 21 | border-width: 2px; 22 | padding: 5px; 23 | border-style: inset; 24 | } 25 | table td { 26 | border-width: 1px; 27 | border-style: inset; 28 | line-height: 18px; 29 | padding: 5px 5px; 30 | } 31 | table, table th, table td { 32 | border-left-style: none; 33 | border-right-style: none; 34 | } 35 | table thead, table .even { 36 | background-color: #f7f7f7; 37 | } 38 | p { 39 | margin: 0.5em 0; 40 | } 41 | blockquote { 42 | background-color: #f6f6f6; 43 | padding: 0.25em 0.75em; 44 | } 45 | hr { 46 | border-style: solid; 47 | border: none; 48 | border-top: 1px solid #777; 49 | margin: 28px 0; 50 | } 51 | dl { 52 | margin-left: 0; 53 | } 54 | dl dd { 55 | margin-bottom: 13px; 56 | margin-left: 13px; 57 | } 58 | dl dt { 59 | font-weight: bold; 60 | } 61 | ul { 62 | margin-top: 0; 63 | } 64 | ul li { 65 | list-style: circle outside; 66 | } 67 | ul ul { 68 | margin-bottom: 0; 69 | } 70 | pre, code { 71 | background-color: #f7f7f7; 72 | border-radius: 3px; 73 | color: #333; 74 | white-space: pre-wrap; 75 | } 76 | pre { 77 | border: 1px solid #eee; 78 | border-radius: 3px; 79 | margin: 5px 0px 10px 0px; 80 | padding: 10px; 81 | } 82 | pre:not([class]) { 83 | background-color: #f7f7f7; 84 | } 85 | code { 86 | font-family: Consolas, Monaco, 'Courier New', monospace; 87 | font-size: 85%; 88 | } 89 | p > code, li > code { 90 | padding: 2px 0; 91 | } 92 | .figure { 93 | text-align: center; 94 | } 95 | img { 96 | background-color: #FFFFFF; 97 | padding: 2px; 98 | display: block; 99 | margin: auto; 100 | } 101 | h1 { 102 | margin-top: 0; 103 | font-size: 35px; 104 | line-height: 40px; 105 | } 106 | h2 { 107 | border-bottom: 4px solid #f7f7f7; 108 | padding-top: 10px; 109 | padding-bottom: 2px; 110 | font-size: 145%; 111 | } 112 | h3 { 113 | border-bottom: 2px solid #f7f7f7; 114 | padding-top: 10px; 115 | font-size: 120%; 116 | } 117 | h4 { 118 | border-bottom: 1px solid #f7f7f7; 119 | margin-left: 8px; 120 | font-size: 105%; 121 | } 122 | h5, h6 { 123 | border-bottom: 1px solid #ccc; 124 | font-size: 105%; 125 | } 126 | a { 127 | color: #008dff; 128 | text-decoration: none; 129 | } 130 | a:hover { 131 | color: #00b500; 132 | } 133 | 134 | a.sourceLine:hover { 135 | text-decoration: none; 136 | } 137 | code span.dt {color: #000000;} 138 | code span.fl {color: #1514b5;} 139 | code span.fu {color: #000000;} 140 | code span.ch,code span.st {color: #036a07;} 141 | code span.kw {color: #375f84;} 142 | code span.co {color: #888888;} 143 | code span.message { color: black; font-weight: bolder;} 144 | code span.error { color: orange; font-weight: bolder;} 145 | code span.warning { color: #6A0366; font-weight: bolder;} 146 | --------------------------------------------------------------------------------